Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ b0d85178

History | View | Annotate | Download (358 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39

    
40
from ganeti import ssh
41
from ganeti import utils
42
from ganeti import errors
43
from ganeti import hypervisor
44
from ganeti import locking
45
from ganeti import constants
46
from ganeti import objects
47
from ganeti import serializer
48
from ganeti import ssconf
49
from ganeti import uidpool
50
from ganeti import compat
51
from ganeti import masterd
52

    
53
import ganeti.masterd.instance # pylint: disable-msg=W0611
54

    
55

    
56
class LogicalUnit(object):
57
  """Logical Unit base class.
58

59
  Subclasses must follow these rules:
60
    - implement ExpandNames
61
    - implement CheckPrereq (except when tasklets are used)
62
    - implement Exec (except when tasklets are used)
63
    - implement BuildHooksEnv
64
    - redefine HPATH and HTYPE
65
    - optionally redefine their run requirements:
66
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
67

68
  Note that all commands require root permissions.
69

70
  @ivar dry_run_result: the value (if any) that will be returned to the caller
71
      in dry-run mode (signalled by opcode dry_run parameter)
72

73
  """
74
  HPATH = None
75
  HTYPE = None
76
  _OP_REQP = []
77
  REQ_BGL = True
78

    
79
  def __init__(self, processor, op, context, rpc):
80
    """Constructor for LogicalUnit.
81

82
    This needs to be overridden in derived classes in order to check op
83
    validity.
84

85
    """
86
    self.proc = processor
87
    self.op = op
88
    self.cfg = context.cfg
89
    self.context = context
90
    self.rpc = rpc
91
    # Dicts used to declare locking needs to mcpu
92
    self.needed_locks = None
93
    self.acquired_locks = {}
94
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
95
    self.add_locks = {}
96
    self.remove_locks = {}
97
    # Used to force good behavior when calling helper functions
98
    self.recalculate_locks = {}
99
    self.__ssh = None
100
    # logging
101
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
102
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
103
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
104
    # support for dry-run
105
    self.dry_run_result = None
106
    # support for generic debug attribute
107
    if (not hasattr(self.op, "debug_level") or
108
        not isinstance(self.op.debug_level, int)):
109
      self.op.debug_level = 0
110

    
111
    # Tasklets
112
    self.tasklets = None
113

    
114
    for attr_name in self._OP_REQP:
115
      attr_val = getattr(op, attr_name, None)
116
      if attr_val is None:
117
        raise errors.OpPrereqError("Required parameter '%s' missing" %
118
                                   attr_name, errors.ECODE_INVAL)
119

    
120
    self.CheckArguments()
121

    
122
  def __GetSSH(self):
123
    """Returns the SshRunner object
124

125
    """
126
    if not self.__ssh:
127
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
128
    return self.__ssh
129

    
130
  ssh = property(fget=__GetSSH)
131

    
132
  def CheckArguments(self):
133
    """Check syntactic validity for the opcode arguments.
134

135
    This method is for doing a simple syntactic check and ensure
136
    validity of opcode parameters, without any cluster-related
137
    checks. While the same can be accomplished in ExpandNames and/or
138
    CheckPrereq, doing these separate is better because:
139

140
      - ExpandNames is left as as purely a lock-related function
141
      - CheckPrereq is run after we have acquired locks (and possible
142
        waited for them)
143

144
    The function is allowed to change the self.op attribute so that
145
    later methods can no longer worry about missing parameters.
146

147
    """
148
    pass
149

    
150
  def ExpandNames(self):
151
    """Expand names for this LU.
152

153
    This method is called before starting to execute the opcode, and it should
154
    update all the parameters of the opcode to their canonical form (e.g. a
155
    short node name must be fully expanded after this method has successfully
156
    completed). This way locking, hooks, logging, ecc. can work correctly.
157

158
    LUs which implement this method must also populate the self.needed_locks
159
    member, as a dict with lock levels as keys, and a list of needed lock names
160
    as values. Rules:
161

162
      - use an empty dict if you don't need any lock
163
      - if you don't need any lock at a particular level omit that level
164
      - don't put anything for the BGL level
165
      - if you want all locks at a level use locking.ALL_SET as a value
166

167
    If you need to share locks (rather than acquire them exclusively) at one
168
    level you can modify self.share_locks, setting a true value (usually 1) for
169
    that level. By default locks are not shared.
170

171
    This function can also define a list of tasklets, which then will be
172
    executed in order instead of the usual LU-level CheckPrereq and Exec
173
    functions, if those are not defined by the LU.
174

175
    Examples::
176

177
      # Acquire all nodes and one instance
178
      self.needed_locks = {
179
        locking.LEVEL_NODE: locking.ALL_SET,
180
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
181
      }
182
      # Acquire just two nodes
183
      self.needed_locks = {
184
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
185
      }
186
      # Acquire no locks
187
      self.needed_locks = {} # No, you can't leave it to the default value None
188

189
    """
190
    # The implementation of this method is mandatory only if the new LU is
191
    # concurrent, so that old LUs don't need to be changed all at the same
192
    # time.
193
    if self.REQ_BGL:
194
      self.needed_locks = {} # Exclusive LUs don't need locks.
195
    else:
196
      raise NotImplementedError
197

    
198
  def DeclareLocks(self, level):
199
    """Declare LU locking needs for a level
200

201
    While most LUs can just declare their locking needs at ExpandNames time,
202
    sometimes there's the need to calculate some locks after having acquired
203
    the ones before. This function is called just before acquiring locks at a
204
    particular level, but after acquiring the ones at lower levels, and permits
205
    such calculations. It can be used to modify self.needed_locks, and by
206
    default it does nothing.
207

208
    This function is only called if you have something already set in
209
    self.needed_locks for the level.
210

211
    @param level: Locking level which is going to be locked
212
    @type level: member of ganeti.locking.LEVELS
213

214
    """
215

    
216
  def CheckPrereq(self):
217
    """Check prerequisites for this LU.
218

219
    This method should check that the prerequisites for the execution
220
    of this LU are fulfilled. It can do internode communication, but
221
    it should be idempotent - no cluster or system changes are
222
    allowed.
223

224
    The method should raise errors.OpPrereqError in case something is
225
    not fulfilled. Its return value is ignored.
226

227
    This method should also update all the parameters of the opcode to
228
    their canonical form if it hasn't been done by ExpandNames before.
229

230
    """
231
    if self.tasklets is not None:
232
      for (idx, tl) in enumerate(self.tasklets):
233
        logging.debug("Checking prerequisites for tasklet %s/%s",
234
                      idx + 1, len(self.tasklets))
235
        tl.CheckPrereq()
236
    else:
237
      raise NotImplementedError
238

    
239
  def Exec(self, feedback_fn):
240
    """Execute the LU.
241

242
    This method should implement the actual work. It should raise
243
    errors.OpExecError for failures that are somewhat dealt with in
244
    code, or expected.
245

246
    """
247
    if self.tasklets is not None:
248
      for (idx, tl) in enumerate(self.tasklets):
249
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
250
        tl.Exec(feedback_fn)
251
    else:
252
      raise NotImplementedError
253

    
254
  def BuildHooksEnv(self):
255
    """Build hooks environment for this LU.
256

257
    This method should return a three-node tuple consisting of: a dict
258
    containing the environment that will be used for running the
259
    specific hook for this LU, a list of node names on which the hook
260
    should run before the execution, and a list of node names on which
261
    the hook should run after the execution.
262

263
    The keys of the dict must not have 'GANETI_' prefixed as this will
264
    be handled in the hooks runner. Also note additional keys will be
265
    added by the hooks runner. If the LU doesn't define any
266
    environment, an empty dict (and not None) should be returned.
267

268
    No nodes should be returned as an empty list (and not None).
269

270
    Note that if the HPATH for a LU class is None, this function will
271
    not be called.
272

273
    """
274
    raise NotImplementedError
275

    
276
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
277
    """Notify the LU about the results of its hooks.
278

279
    This method is called every time a hooks phase is executed, and notifies
280
    the Logical Unit about the hooks' result. The LU can then use it to alter
281
    its result based on the hooks.  By default the method does nothing and the
282
    previous result is passed back unchanged but any LU can define it if it
283
    wants to use the local cluster hook-scripts somehow.
284

285
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
286
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
287
    @param hook_results: the results of the multi-node hooks rpc call
288
    @param feedback_fn: function used send feedback back to the caller
289
    @param lu_result: the previous Exec result this LU had, or None
290
        in the PRE phase
291
    @return: the new Exec result, based on the previous result
292
        and hook results
293

294
    """
295
    # API must be kept, thus we ignore the unused argument and could
296
    # be a function warnings
297
    # pylint: disable-msg=W0613,R0201
298
    return lu_result
299

    
300
  def _ExpandAndLockInstance(self):
301
    """Helper function to expand and lock an instance.
302

303
    Many LUs that work on an instance take its name in self.op.instance_name
304
    and need to expand it and then declare the expanded name for locking. This
305
    function does it, and then updates self.op.instance_name to the expanded
306
    name. It also initializes needed_locks as a dict, if this hasn't been done
307
    before.
308

309
    """
310
    if self.needed_locks is None:
311
      self.needed_locks = {}
312
    else:
313
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
314
        "_ExpandAndLockInstance called with instance-level locks set"
315
    self.op.instance_name = _ExpandInstanceName(self.cfg,
316
                                                self.op.instance_name)
317
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
318

    
319
  def _LockInstancesNodes(self, primary_only=False):
320
    """Helper function to declare instances' nodes for locking.
321

322
    This function should be called after locking one or more instances to lock
323
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
324
    with all primary or secondary nodes for instances already locked and
325
    present in self.needed_locks[locking.LEVEL_INSTANCE].
326

327
    It should be called from DeclareLocks, and for safety only works if
328
    self.recalculate_locks[locking.LEVEL_NODE] is set.
329

330
    In the future it may grow parameters to just lock some instance's nodes, or
331
    to just lock primaries or secondary nodes, if needed.
332

333
    If should be called in DeclareLocks in a way similar to::
334

335
      if level == locking.LEVEL_NODE:
336
        self._LockInstancesNodes()
337

338
    @type primary_only: boolean
339
    @param primary_only: only lock primary nodes of locked instances
340

341
    """
342
    assert locking.LEVEL_NODE in self.recalculate_locks, \
343
      "_LockInstancesNodes helper function called with no nodes to recalculate"
344

    
345
    # TODO: check if we're really been called with the instance locks held
346

    
347
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
348
    # future we might want to have different behaviors depending on the value
349
    # of self.recalculate_locks[locking.LEVEL_NODE]
350
    wanted_nodes = []
351
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
352
      instance = self.context.cfg.GetInstanceInfo(instance_name)
353
      wanted_nodes.append(instance.primary_node)
354
      if not primary_only:
355
        wanted_nodes.extend(instance.secondary_nodes)
356

    
357
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
358
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
359
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
360
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
361

    
362
    del self.recalculate_locks[locking.LEVEL_NODE]
363

    
364

    
365
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
366
  """Simple LU which runs no hooks.
367

368
  This LU is intended as a parent for other LogicalUnits which will
369
  run no hooks, in order to reduce duplicate code.
370

371
  """
372
  HPATH = None
373
  HTYPE = None
374

    
375
  def BuildHooksEnv(self):
376
    """Empty BuildHooksEnv for NoHooksLu.
377

378
    This just raises an error.
379

380
    """
381
    assert False, "BuildHooksEnv called for NoHooksLUs"
382

    
383

    
384
class Tasklet:
385
  """Tasklet base class.
386

387
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
388
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
389
  tasklets know nothing about locks.
390

391
  Subclasses must follow these rules:
392
    - Implement CheckPrereq
393
    - Implement Exec
394

395
  """
396
  def __init__(self, lu):
397
    self.lu = lu
398

    
399
    # Shortcuts
400
    self.cfg = lu.cfg
401
    self.rpc = lu.rpc
402

    
403
  def CheckPrereq(self):
404
    """Check prerequisites for this tasklets.
405

406
    This method should check whether the prerequisites for the execution of
407
    this tasklet are fulfilled. It can do internode communication, but it
408
    should be idempotent - no cluster or system changes are allowed.
409

410
    The method should raise errors.OpPrereqError in case something is not
411
    fulfilled. Its return value is ignored.
412

413
    This method should also update all parameters to their canonical form if it
414
    hasn't been done before.
415

416
    """
417
    raise NotImplementedError
418

    
419
  def Exec(self, feedback_fn):
420
    """Execute the tasklet.
421

422
    This method should implement the actual work. It should raise
423
    errors.OpExecError for failures that are somewhat dealt with in code, or
424
    expected.
425

426
    """
427
    raise NotImplementedError
428

    
429

    
430
def _GetWantedNodes(lu, nodes):
431
  """Returns list of checked and expanded node names.
432

433
  @type lu: L{LogicalUnit}
434
  @param lu: the logical unit on whose behalf we execute
435
  @type nodes: list
436
  @param nodes: list of node names or None for all nodes
437
  @rtype: list
438
  @return: the list of nodes, sorted
439
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
440

441
  """
442
  if not isinstance(nodes, list):
443
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
444
                               errors.ECODE_INVAL)
445

    
446
  if not nodes:
447
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
448
      " non-empty list of nodes whose name is to be expanded.")
449

    
450
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
451
  return utils.NiceSort(wanted)
452

    
453

    
454
def _GetWantedInstances(lu, instances):
455
  """Returns list of checked and expanded instance names.
456

457
  @type lu: L{LogicalUnit}
458
  @param lu: the logical unit on whose behalf we execute
459
  @type instances: list
460
  @param instances: list of instance names or None for all instances
461
  @rtype: list
462
  @return: the list of instances, sorted
463
  @raise errors.OpPrereqError: if the instances parameter is wrong type
464
  @raise errors.OpPrereqError: if any of the passed instances is not found
465

466
  """
467
  if not isinstance(instances, list):
468
    raise errors.OpPrereqError("Invalid argument type 'instances'",
469
                               errors.ECODE_INVAL)
470

    
471
  if instances:
472
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
473
  else:
474
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
475
  return wanted
476

    
477

    
478
def _GetUpdatedParams(old_params, update_dict,
479
                      use_default=True, use_none=False):
480
  """Return the new version of a parameter dictionary.
481

482
  @type old_params: dict
483
  @param old_params: old parameters
484
  @type update_dict: dict
485
  @param update_dict: dict containing new parameter values, or
486
      constants.VALUE_DEFAULT to reset the parameter to its default
487
      value
488
  @param use_default: boolean
489
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
490
      values as 'to be deleted' values
491
  @param use_none: boolean
492
  @type use_none: whether to recognise C{None} values as 'to be
493
      deleted' values
494
  @rtype: dict
495
  @return: the new parameter dictionary
496

497
  """
498
  params_copy = copy.deepcopy(old_params)
499
  for key, val in update_dict.iteritems():
500
    if ((use_default and val == constants.VALUE_DEFAULT) or
501
        (use_none and val is None)):
502
      try:
503
        del params_copy[key]
504
      except KeyError:
505
        pass
506
    else:
507
      params_copy[key] = val
508
  return params_copy
509

    
510

    
511
def _CheckOutputFields(static, dynamic, selected):
512
  """Checks whether all selected fields are valid.
513

514
  @type static: L{utils.FieldSet}
515
  @param static: static fields set
516
  @type dynamic: L{utils.FieldSet}
517
  @param dynamic: dynamic fields set
518

519
  """
520
  f = utils.FieldSet()
521
  f.Extend(static)
522
  f.Extend(dynamic)
523

    
524
  delta = f.NonMatching(selected)
525
  if delta:
526
    raise errors.OpPrereqError("Unknown output fields selected: %s"
527
                               % ",".join(delta), errors.ECODE_INVAL)
528

    
529

    
530
def _CheckBooleanOpField(op, name):
531
  """Validates boolean opcode parameters.
532

533
  This will ensure that an opcode parameter is either a boolean value,
534
  or None (but that it always exists).
535

536
  """
537
  val = getattr(op, name, None)
538
  if not (val is None or isinstance(val, bool)):
539
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
540
                               (name, str(val)), errors.ECODE_INVAL)
541
  setattr(op, name, val)
542

    
543

    
544
def _CheckGlobalHvParams(params):
545
  """Validates that given hypervisor params are not global ones.
546

547
  This will ensure that instances don't get customised versions of
548
  global params.
549

550
  """
551
  used_globals = constants.HVC_GLOBALS.intersection(params)
552
  if used_globals:
553
    msg = ("The following hypervisor parameters are global and cannot"
554
           " be customized at instance level, please modify them at"
555
           " cluster level: %s" % utils.CommaJoin(used_globals))
556
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
557

    
558

    
559
def _CheckNodeOnline(lu, node):
560
  """Ensure that a given node is online.
561

562
  @param lu: the LU on behalf of which we make the check
563
  @param node: the node to check
564
  @raise errors.OpPrereqError: if the node is offline
565

566
  """
567
  if lu.cfg.GetNodeInfo(node).offline:
568
    raise errors.OpPrereqError("Can't use offline node %s" % node,
569
                               errors.ECODE_INVAL)
570

    
571

    
572
def _CheckNodeNotDrained(lu, node):
573
  """Ensure that a given node is not drained.
574

575
  @param lu: the LU on behalf of which we make the check
576
  @param node: the node to check
577
  @raise errors.OpPrereqError: if the node is drained
578

579
  """
580
  if lu.cfg.GetNodeInfo(node).drained:
581
    raise errors.OpPrereqError("Can't use drained node %s" % node,
582
                               errors.ECODE_INVAL)
583

    
584

    
585
def _CheckNodeHasOS(lu, node, os_name, force_variant):
586
  """Ensure that a node supports a given OS.
587

588
  @param lu: the LU on behalf of which we make the check
589
  @param node: the node to check
590
  @param os_name: the OS to query about
591
  @param force_variant: whether to ignore variant errors
592
  @raise errors.OpPrereqError: if the node is not supporting the OS
593

594
  """
595
  result = lu.rpc.call_os_get(node, os_name)
596
  result.Raise("OS '%s' not in supported OS list for node %s" %
597
               (os_name, node),
598
               prereq=True, ecode=errors.ECODE_INVAL)
599
  if not force_variant:
600
    _CheckOSVariant(result.payload, os_name)
601

    
602

    
603
def _RequireFileStorage():
604
  """Checks that file storage is enabled.
605

606
  @raise errors.OpPrereqError: when file storage is disabled
607

608
  """
609
  if not constants.ENABLE_FILE_STORAGE:
610
    raise errors.OpPrereqError("File storage disabled at configure time",
611
                               errors.ECODE_INVAL)
612

    
613

    
614
def _CheckDiskTemplate(template):
615
  """Ensure a given disk template is valid.
616

617
  """
618
  if template not in constants.DISK_TEMPLATES:
619
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
620
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
621
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
622
  if template == constants.DT_FILE:
623
    _RequireFileStorage()
624

    
625

    
626
def _CheckStorageType(storage_type):
627
  """Ensure a given storage type is valid.
628

629
  """
630
  if storage_type not in constants.VALID_STORAGE_TYPES:
631
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
632
                               errors.ECODE_INVAL)
633
  if storage_type == constants.ST_FILE:
634
    _RequireFileStorage()
635

    
636

    
637
def _GetClusterDomainSecret():
638
  """Reads the cluster domain secret.
639

640
  """
641
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
642
                               strict=True)
643

    
644

    
645
def _CheckInstanceDown(lu, instance, reason):
646
  """Ensure that an instance is not running."""
647
  if instance.admin_up:
648
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
649
                               (instance.name, reason), errors.ECODE_STATE)
650

    
651
  pnode = instance.primary_node
652
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
653
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
654
              prereq=True, ecode=errors.ECODE_ENVIRON)
655

    
656
  if instance.name in ins_l.payload:
657
    raise errors.OpPrereqError("Instance %s is running, %s" %
658
                               (instance.name, reason), errors.ECODE_STATE)
659

    
660

    
661
def _ExpandItemName(fn, name, kind):
662
  """Expand an item name.
663

664
  @param fn: the function to use for expansion
665
  @param name: requested item name
666
  @param kind: text description ('Node' or 'Instance')
667
  @return: the resolved (full) name
668
  @raise errors.OpPrereqError: if the item is not found
669

670
  """
671
  full_name = fn(name)
672
  if full_name is None:
673
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
674
                               errors.ECODE_NOENT)
675
  return full_name
676

    
677

    
678
def _ExpandNodeName(cfg, name):
679
  """Wrapper over L{_ExpandItemName} for nodes."""
680
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
681

    
682

    
683
def _ExpandInstanceName(cfg, name):
684
  """Wrapper over L{_ExpandItemName} for instance."""
685
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
686

    
687

    
688
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
689
                          memory, vcpus, nics, disk_template, disks,
690
                          bep, hvp, hypervisor_name):
691
  """Builds instance related env variables for hooks
692

693
  This builds the hook environment from individual variables.
694

695
  @type name: string
696
  @param name: the name of the instance
697
  @type primary_node: string
698
  @param primary_node: the name of the instance's primary node
699
  @type secondary_nodes: list
700
  @param secondary_nodes: list of secondary nodes as strings
701
  @type os_type: string
702
  @param os_type: the name of the instance's OS
703
  @type status: boolean
704
  @param status: the should_run status of the instance
705
  @type memory: string
706
  @param memory: the memory size of the instance
707
  @type vcpus: string
708
  @param vcpus: the count of VCPUs the instance has
709
  @type nics: list
710
  @param nics: list of tuples (ip, mac, mode, link) representing
711
      the NICs the instance has
712
  @type disk_template: string
713
  @param disk_template: the disk template of the instance
714
  @type disks: list
715
  @param disks: the list of (size, mode) pairs
716
  @type bep: dict
717
  @param bep: the backend parameters for the instance
718
  @type hvp: dict
719
  @param hvp: the hypervisor parameters for the instance
720
  @type hypervisor_name: string
721
  @param hypervisor_name: the hypervisor for the instance
722
  @rtype: dict
723
  @return: the hook environment for this instance
724

725
  """
726
  if status:
727
    str_status = "up"
728
  else:
729
    str_status = "down"
730
  env = {
731
    "OP_TARGET": name,
732
    "INSTANCE_NAME": name,
733
    "INSTANCE_PRIMARY": primary_node,
734
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
735
    "INSTANCE_OS_TYPE": os_type,
736
    "INSTANCE_STATUS": str_status,
737
    "INSTANCE_MEMORY": memory,
738
    "INSTANCE_VCPUS": vcpus,
739
    "INSTANCE_DISK_TEMPLATE": disk_template,
740
    "INSTANCE_HYPERVISOR": hypervisor_name,
741
  }
742

    
743
  if nics:
744
    nic_count = len(nics)
745
    for idx, (ip, mac, mode, link) in enumerate(nics):
746
      if ip is None:
747
        ip = ""
748
      env["INSTANCE_NIC%d_IP" % idx] = ip
749
      env["INSTANCE_NIC%d_MAC" % idx] = mac
750
      env["INSTANCE_NIC%d_MODE" % idx] = mode
751
      env["INSTANCE_NIC%d_LINK" % idx] = link
752
      if mode == constants.NIC_MODE_BRIDGED:
753
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
754
  else:
755
    nic_count = 0
756

    
757
  env["INSTANCE_NIC_COUNT"] = nic_count
758

    
759
  if disks:
760
    disk_count = len(disks)
761
    for idx, (size, mode) in enumerate(disks):
762
      env["INSTANCE_DISK%d_SIZE" % idx] = size
763
      env["INSTANCE_DISK%d_MODE" % idx] = mode
764
  else:
765
    disk_count = 0
766

    
767
  env["INSTANCE_DISK_COUNT"] = disk_count
768

    
769
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
770
    for key, value in source.items():
771
      env["INSTANCE_%s_%s" % (kind, key)] = value
772

    
773
  return env
774

    
775

    
776
def _NICListToTuple(lu, nics):
777
  """Build a list of nic information tuples.
778

779
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
780
  value in LUQueryInstanceData.
781

782
  @type lu:  L{LogicalUnit}
783
  @param lu: the logical unit on whose behalf we execute
784
  @type nics: list of L{objects.NIC}
785
  @param nics: list of nics to convert to hooks tuples
786

787
  """
788
  hooks_nics = []
789
  cluster = lu.cfg.GetClusterInfo()
790
  for nic in nics:
791
    ip = nic.ip
792
    mac = nic.mac
793
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
794
    mode = filled_params[constants.NIC_MODE]
795
    link = filled_params[constants.NIC_LINK]
796
    hooks_nics.append((ip, mac, mode, link))
797
  return hooks_nics
798

    
799

    
800
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
801
  """Builds instance related env variables for hooks from an object.
802

803
  @type lu: L{LogicalUnit}
804
  @param lu: the logical unit on whose behalf we execute
805
  @type instance: L{objects.Instance}
806
  @param instance: the instance for which we should build the
807
      environment
808
  @type override: dict
809
  @param override: dictionary with key/values that will override
810
      our values
811
  @rtype: dict
812
  @return: the hook environment dictionary
813

814
  """
815
  cluster = lu.cfg.GetClusterInfo()
816
  bep = cluster.FillBE(instance)
817
  hvp = cluster.FillHV(instance)
818
  args = {
819
    'name': instance.name,
820
    'primary_node': instance.primary_node,
821
    'secondary_nodes': instance.secondary_nodes,
822
    'os_type': instance.os,
823
    'status': instance.admin_up,
824
    'memory': bep[constants.BE_MEMORY],
825
    'vcpus': bep[constants.BE_VCPUS],
826
    'nics': _NICListToTuple(lu, instance.nics),
827
    'disk_template': instance.disk_template,
828
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
829
    'bep': bep,
830
    'hvp': hvp,
831
    'hypervisor_name': instance.hypervisor,
832
  }
833
  if override:
834
    args.update(override)
835
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
836

    
837

    
838
def _AdjustCandidatePool(lu, exceptions):
839
  """Adjust the candidate pool after node operations.
840

841
  """
842
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
843
  if mod_list:
844
    lu.LogInfo("Promoted nodes to master candidate role: %s",
845
               utils.CommaJoin(node.name for node in mod_list))
846
    for name in mod_list:
847
      lu.context.ReaddNode(name)
848
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
849
  if mc_now > mc_max:
850
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
851
               (mc_now, mc_max))
852

    
853

    
854
def _DecideSelfPromotion(lu, exceptions=None):
855
  """Decide whether I should promote myself as a master candidate.
856

857
  """
858
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
859
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
860
  # the new node will increase mc_max with one, so:
861
  mc_should = min(mc_should + 1, cp_size)
862
  return mc_now < mc_should
863

    
864

    
865
def _CheckNicsBridgesExist(lu, target_nics, target_node):
866
  """Check that the brigdes needed by a list of nics exist.
867

868
  """
869
  cluster = lu.cfg.GetClusterInfo()
870
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
871
  brlist = [params[constants.NIC_LINK] for params in paramslist
872
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
873
  if brlist:
874
    result = lu.rpc.call_bridges_exist(target_node, brlist)
875
    result.Raise("Error checking bridges on destination node '%s'" %
876
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
877

    
878

    
879
def _CheckInstanceBridgesExist(lu, instance, node=None):
880
  """Check that the brigdes needed by an instance exist.
881

882
  """
883
  if node is None:
884
    node = instance.primary_node
885
  _CheckNicsBridgesExist(lu, instance.nics, node)
886

    
887

    
888
def _CheckOSVariant(os_obj, name):
889
  """Check whether an OS name conforms to the os variants specification.
890

891
  @type os_obj: L{objects.OS}
892
  @param os_obj: OS object to check
893
  @type name: string
894
  @param name: OS name passed by the user, to check for validity
895

896
  """
897
  if not os_obj.supported_variants:
898
    return
899
  try:
900
    variant = name.split("+", 1)[1]
901
  except IndexError:
902
    raise errors.OpPrereqError("OS name must include a variant",
903
                               errors.ECODE_INVAL)
904

    
905
  if variant not in os_obj.supported_variants:
906
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
907

    
908

    
909
def _GetNodeInstancesInner(cfg, fn):
910
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
911

    
912

    
913
def _GetNodeInstances(cfg, node_name):
914
  """Returns a list of all primary and secondary instances on a node.
915

916
  """
917

    
918
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
919

    
920

    
921
def _GetNodePrimaryInstances(cfg, node_name):
922
  """Returns primary instances on a node.
923

924
  """
925
  return _GetNodeInstancesInner(cfg,
926
                                lambda inst: node_name == inst.primary_node)
927

    
928

    
929
def _GetNodeSecondaryInstances(cfg, node_name):
930
  """Returns secondary instances on a node.
931

932
  """
933
  return _GetNodeInstancesInner(cfg,
934
                                lambda inst: node_name in inst.secondary_nodes)
935

    
936

    
937
def _GetStorageTypeArgs(cfg, storage_type):
938
  """Returns the arguments for a storage type.
939

940
  """
941
  # Special case for file storage
942
  if storage_type == constants.ST_FILE:
943
    # storage.FileStorage wants a list of storage directories
944
    return [[cfg.GetFileStorageDir()]]
945

    
946
  return []
947

    
948

    
949
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
950
  faulty = []
951

    
952
  for dev in instance.disks:
953
    cfg.SetDiskID(dev, node_name)
954

    
955
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
956
  result.Raise("Failed to get disk status from node %s" % node_name,
957
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
958

    
959
  for idx, bdev_status in enumerate(result.payload):
960
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
961
      faulty.append(idx)
962

    
963
  return faulty
964

    
965

    
966
class LUPostInitCluster(LogicalUnit):
967
  """Logical unit for running hooks after cluster initialization.
968

969
  """
970
  HPATH = "cluster-init"
971
  HTYPE = constants.HTYPE_CLUSTER
972
  _OP_REQP = []
973

    
974
  def BuildHooksEnv(self):
975
    """Build hooks env.
976

977
    """
978
    env = {"OP_TARGET": self.cfg.GetClusterName()}
979
    mn = self.cfg.GetMasterNode()
980
    return env, [], [mn]
981

    
982
  def CheckPrereq(self):
983
    """No prerequisites to check.
984

985
    """
986
    return True
987

    
988
  def Exec(self, feedback_fn):
989
    """Nothing to do.
990

991
    """
992
    return True
993

    
994

    
995
class LUDestroyCluster(LogicalUnit):
996
  """Logical unit for destroying the cluster.
997

998
  """
999
  HPATH = "cluster-destroy"
1000
  HTYPE = constants.HTYPE_CLUSTER
1001
  _OP_REQP = []
1002

    
1003
  def BuildHooksEnv(self):
1004
    """Build hooks env.
1005

1006
    """
1007
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1008
    return env, [], []
1009

    
1010
  def CheckPrereq(self):
1011
    """Check prerequisites.
1012

1013
    This checks whether the cluster is empty.
1014

1015
    Any errors are signaled by raising errors.OpPrereqError.
1016

1017
    """
1018
    master = self.cfg.GetMasterNode()
1019

    
1020
    nodelist = self.cfg.GetNodeList()
1021
    if len(nodelist) != 1 or nodelist[0] != master:
1022
      raise errors.OpPrereqError("There are still %d node(s) in"
1023
                                 " this cluster." % (len(nodelist) - 1),
1024
                                 errors.ECODE_INVAL)
1025
    instancelist = self.cfg.GetInstanceList()
1026
    if instancelist:
1027
      raise errors.OpPrereqError("There are still %d instance(s) in"
1028
                                 " this cluster." % len(instancelist),
1029
                                 errors.ECODE_INVAL)
1030

    
1031
  def Exec(self, feedback_fn):
1032
    """Destroys the cluster.
1033

1034
    """
1035
    master = self.cfg.GetMasterNode()
1036
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1037

    
1038
    # Run post hooks on master node before it's removed
1039
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1040
    try:
1041
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1042
    except:
1043
      # pylint: disable-msg=W0702
1044
      self.LogWarning("Errors occurred running hooks on %s" % master)
1045

    
1046
    result = self.rpc.call_node_stop_master(master, False)
1047
    result.Raise("Could not disable the master role")
1048

    
1049
    if modify_ssh_setup:
1050
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1051
      utils.CreateBackup(priv_key)
1052
      utils.CreateBackup(pub_key)
1053

    
1054
    return master
1055

    
1056

    
1057
def _VerifyCertificate(filename):
1058
  """Verifies a certificate for LUVerifyCluster.
1059

1060
  @type filename: string
1061
  @param filename: Path to PEM file
1062

1063
  """
1064
  try:
1065
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1066
                                           utils.ReadFile(filename))
1067
  except Exception, err: # pylint: disable-msg=W0703
1068
    return (LUVerifyCluster.ETYPE_ERROR,
1069
            "Failed to load X509 certificate %s: %s" % (filename, err))
1070

    
1071
  (errcode, msg) = \
1072
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1073
                                constants.SSL_CERT_EXPIRATION_ERROR)
1074

    
1075
  if msg:
1076
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1077
  else:
1078
    fnamemsg = None
1079

    
1080
  if errcode is None:
1081
    return (None, fnamemsg)
1082
  elif errcode == utils.CERT_WARNING:
1083
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1084
  elif errcode == utils.CERT_ERROR:
1085
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1086

    
1087
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1088

    
1089

    
1090
class LUVerifyCluster(LogicalUnit):
1091
  """Verifies the cluster status.
1092

1093
  """
1094
  HPATH = "cluster-verify"
1095
  HTYPE = constants.HTYPE_CLUSTER
1096
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1097
  REQ_BGL = False
1098

    
1099
  TCLUSTER = "cluster"
1100
  TNODE = "node"
1101
  TINSTANCE = "instance"
1102

    
1103
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1104
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1105
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1106
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1107
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1108
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1109
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1110
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1111
  ENODEDRBD = (TNODE, "ENODEDRBD")
1112
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1113
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1114
  ENODEHV = (TNODE, "ENODEHV")
1115
  ENODELVM = (TNODE, "ENODELVM")
1116
  ENODEN1 = (TNODE, "ENODEN1")
1117
  ENODENET = (TNODE, "ENODENET")
1118
  ENODEOS = (TNODE, "ENODEOS")
1119
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1120
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1121
  ENODERPC = (TNODE, "ENODERPC")
1122
  ENODESSH = (TNODE, "ENODESSH")
1123
  ENODEVERSION = (TNODE, "ENODEVERSION")
1124
  ENODESETUP = (TNODE, "ENODESETUP")
1125
  ENODETIME = (TNODE, "ENODETIME")
1126

    
1127
  ETYPE_FIELD = "code"
1128
  ETYPE_ERROR = "ERROR"
1129
  ETYPE_WARNING = "WARNING"
1130

    
1131
  class NodeImage(object):
1132
    """A class representing the logical and physical status of a node.
1133

1134
    @type name: string
1135
    @ivar name: the node name to which this object refers
1136
    @ivar volumes: a structure as returned from
1137
        L{ganeti.backend.GetVolumeList} (runtime)
1138
    @ivar instances: a list of running instances (runtime)
1139
    @ivar pinst: list of configured primary instances (config)
1140
    @ivar sinst: list of configured secondary instances (config)
1141
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1142
        of this node (config)
1143
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1144
    @ivar dfree: free disk, as reported by the node (runtime)
1145
    @ivar offline: the offline status (config)
1146
    @type rpc_fail: boolean
1147
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1148
        not whether the individual keys were correct) (runtime)
1149
    @type lvm_fail: boolean
1150
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1151
    @type hyp_fail: boolean
1152
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1153
    @type ghost: boolean
1154
    @ivar ghost: whether this is a known node or not (config)
1155
    @type os_fail: boolean
1156
    @ivar os_fail: whether the RPC call didn't return valid OS data
1157
    @type oslist: list
1158
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1159

1160
    """
1161
    def __init__(self, offline=False, name=None):
1162
      self.name = name
1163
      self.volumes = {}
1164
      self.instances = []
1165
      self.pinst = []
1166
      self.sinst = []
1167
      self.sbp = {}
1168
      self.mfree = 0
1169
      self.dfree = 0
1170
      self.offline = offline
1171
      self.rpc_fail = False
1172
      self.lvm_fail = False
1173
      self.hyp_fail = False
1174
      self.ghost = False
1175
      self.os_fail = False
1176
      self.oslist = {}
1177

    
1178
  def ExpandNames(self):
1179
    self.needed_locks = {
1180
      locking.LEVEL_NODE: locking.ALL_SET,
1181
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1182
    }
1183
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1184

    
1185
  def _Error(self, ecode, item, msg, *args, **kwargs):
1186
    """Format an error message.
1187

1188
    Based on the opcode's error_codes parameter, either format a
1189
    parseable error code, or a simpler error string.
1190

1191
    This must be called only from Exec and functions called from Exec.
1192

1193
    """
1194
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1195
    itype, etxt = ecode
1196
    # first complete the msg
1197
    if args:
1198
      msg = msg % args
1199
    # then format the whole message
1200
    if self.op.error_codes:
1201
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1202
    else:
1203
      if item:
1204
        item = " " + item
1205
      else:
1206
        item = ""
1207
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1208
    # and finally report it via the feedback_fn
1209
    self._feedback_fn("  - %s" % msg)
1210

    
1211
  def _ErrorIf(self, cond, *args, **kwargs):
1212
    """Log an error message if the passed condition is True.
1213

1214
    """
1215
    cond = bool(cond) or self.op.debug_simulate_errors
1216
    if cond:
1217
      self._Error(*args, **kwargs)
1218
    # do not mark the operation as failed for WARN cases only
1219
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1220
      self.bad = self.bad or cond
1221

    
1222
  def _VerifyNode(self, ninfo, nresult):
1223
    """Run multiple tests against a node.
1224

1225
    Test list:
1226

1227
      - compares ganeti version
1228
      - checks vg existence and size > 20G
1229
      - checks config file checksum
1230
      - checks ssh to other nodes
1231

1232
    @type ninfo: L{objects.Node}
1233
    @param ninfo: the node to check
1234
    @param nresult: the results from the node
1235
    @rtype: boolean
1236
    @return: whether overall this call was successful (and we can expect
1237
         reasonable values in the respose)
1238

1239
    """
1240
    node = ninfo.name
1241
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1242

    
1243
    # main result, nresult should be a non-empty dict
1244
    test = not nresult or not isinstance(nresult, dict)
1245
    _ErrorIf(test, self.ENODERPC, node,
1246
                  "unable to verify node: no data returned")
1247
    if test:
1248
      return False
1249

    
1250
    # compares ganeti version
1251
    local_version = constants.PROTOCOL_VERSION
1252
    remote_version = nresult.get("version", None)
1253
    test = not (remote_version and
1254
                isinstance(remote_version, (list, tuple)) and
1255
                len(remote_version) == 2)
1256
    _ErrorIf(test, self.ENODERPC, node,
1257
             "connection to node returned invalid data")
1258
    if test:
1259
      return False
1260

    
1261
    test = local_version != remote_version[0]
1262
    _ErrorIf(test, self.ENODEVERSION, node,
1263
             "incompatible protocol versions: master %s,"
1264
             " node %s", local_version, remote_version[0])
1265
    if test:
1266
      return False
1267

    
1268
    # node seems compatible, we can actually try to look into its results
1269

    
1270
    # full package version
1271
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1272
                  self.ENODEVERSION, node,
1273
                  "software version mismatch: master %s, node %s",
1274
                  constants.RELEASE_VERSION, remote_version[1],
1275
                  code=self.ETYPE_WARNING)
1276

    
1277
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1278
    if isinstance(hyp_result, dict):
1279
      for hv_name, hv_result in hyp_result.iteritems():
1280
        test = hv_result is not None
1281
        _ErrorIf(test, self.ENODEHV, node,
1282
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1283

    
1284

    
1285
    test = nresult.get(constants.NV_NODESETUP,
1286
                           ["Missing NODESETUP results"])
1287
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1288
             "; ".join(test))
1289

    
1290
    return True
1291

    
1292
  def _VerifyNodeTime(self, ninfo, nresult,
1293
                      nvinfo_starttime, nvinfo_endtime):
1294
    """Check the node time.
1295

1296
    @type ninfo: L{objects.Node}
1297
    @param ninfo: the node to check
1298
    @param nresult: the remote results for the node
1299
    @param nvinfo_starttime: the start time of the RPC call
1300
    @param nvinfo_endtime: the end time of the RPC call
1301

1302
    """
1303
    node = ninfo.name
1304
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1305

    
1306
    ntime = nresult.get(constants.NV_TIME, None)
1307
    try:
1308
      ntime_merged = utils.MergeTime(ntime)
1309
    except (ValueError, TypeError):
1310
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1311
      return
1312

    
1313
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1314
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1315
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1316
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1317
    else:
1318
      ntime_diff = None
1319

    
1320
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1321
             "Node time diverges by at least %s from master node time",
1322
             ntime_diff)
1323

    
1324
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1325
    """Check the node time.
1326

1327
    @type ninfo: L{objects.Node}
1328
    @param ninfo: the node to check
1329
    @param nresult: the remote results for the node
1330
    @param vg_name: the configured VG name
1331

1332
    """
1333
    if vg_name is None:
1334
      return
1335

    
1336
    node = ninfo.name
1337
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1338

    
1339
    # checks vg existence and size > 20G
1340
    vglist = nresult.get(constants.NV_VGLIST, None)
1341
    test = not vglist
1342
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1343
    if not test:
1344
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1345
                                            constants.MIN_VG_SIZE)
1346
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1347

    
1348
    # check pv names
1349
    pvlist = nresult.get(constants.NV_PVLIST, None)
1350
    test = pvlist is None
1351
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1352
    if not test:
1353
      # check that ':' is not present in PV names, since it's a
1354
      # special character for lvcreate (denotes the range of PEs to
1355
      # use on the PV)
1356
      for _, pvname, owner_vg in pvlist:
1357
        test = ":" in pvname
1358
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1359
                 " '%s' of VG '%s'", pvname, owner_vg)
1360

    
1361
  def _VerifyNodeNetwork(self, ninfo, nresult):
1362
    """Check the node time.
1363

1364
    @type ninfo: L{objects.Node}
1365
    @param ninfo: the node to check
1366
    @param nresult: the remote results for the node
1367

1368
    """
1369
    node = ninfo.name
1370
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1371

    
1372
    test = constants.NV_NODELIST not in nresult
1373
    _ErrorIf(test, self.ENODESSH, node,
1374
             "node hasn't returned node ssh connectivity data")
1375
    if not test:
1376
      if nresult[constants.NV_NODELIST]:
1377
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1378
          _ErrorIf(True, self.ENODESSH, node,
1379
                   "ssh communication with node '%s': %s", a_node, a_msg)
1380

    
1381
    test = constants.NV_NODENETTEST not in nresult
1382
    _ErrorIf(test, self.ENODENET, node,
1383
             "node hasn't returned node tcp connectivity data")
1384
    if not test:
1385
      if nresult[constants.NV_NODENETTEST]:
1386
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1387
        for anode in nlist:
1388
          _ErrorIf(True, self.ENODENET, node,
1389
                   "tcp communication with node '%s': %s",
1390
                   anode, nresult[constants.NV_NODENETTEST][anode])
1391

    
1392
    test = constants.NV_MASTERIP not in nresult
1393
    _ErrorIf(test, self.ENODENET, node,
1394
             "node hasn't returned node master IP reachability data")
1395
    if not test:
1396
      if not nresult[constants.NV_MASTERIP]:
1397
        if node == self.master_node:
1398
          msg = "the master node cannot reach the master IP (not configured?)"
1399
        else:
1400
          msg = "cannot reach the master IP"
1401
        _ErrorIf(True, self.ENODENET, node, msg)
1402

    
1403

    
1404
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1405
    """Verify an instance.
1406

1407
    This function checks to see if the required block devices are
1408
    available on the instance's node.
1409

1410
    """
1411
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1412
    node_current = instanceconfig.primary_node
1413

    
1414
    node_vol_should = {}
1415
    instanceconfig.MapLVsByNode(node_vol_should)
1416

    
1417
    for node in node_vol_should:
1418
      n_img = node_image[node]
1419
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1420
        # ignore missing volumes on offline or broken nodes
1421
        continue
1422
      for volume in node_vol_should[node]:
1423
        test = volume not in n_img.volumes
1424
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1425
                 "volume %s missing on node %s", volume, node)
1426

    
1427
    if instanceconfig.admin_up:
1428
      pri_img = node_image[node_current]
1429
      test = instance not in pri_img.instances and not pri_img.offline
1430
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1431
               "instance not running on its primary node %s",
1432
               node_current)
1433

    
1434
    for node, n_img in node_image.items():
1435
      if (not node == node_current):
1436
        test = instance in n_img.instances
1437
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1438
                 "instance should not run on node %s", node)
1439

    
1440
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1441
    """Verify if there are any unknown volumes in the cluster.
1442

1443
    The .os, .swap and backup volumes are ignored. All other volumes are
1444
    reported as unknown.
1445

1446
    """
1447
    for node, n_img in node_image.items():
1448
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1449
        # skip non-healthy nodes
1450
        continue
1451
      for volume in n_img.volumes:
1452
        test = (node not in node_vol_should or
1453
                volume not in node_vol_should[node])
1454
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1455
                      "volume %s is unknown", volume)
1456

    
1457
  def _VerifyOrphanInstances(self, instancelist, node_image):
1458
    """Verify the list of running instances.
1459

1460
    This checks what instances are running but unknown to the cluster.
1461

1462
    """
1463
    for node, n_img in node_image.items():
1464
      for o_inst in n_img.instances:
1465
        test = o_inst not in instancelist
1466
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1467
                      "instance %s on node %s should not exist", o_inst, node)
1468

    
1469
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1470
    """Verify N+1 Memory Resilience.
1471

1472
    Check that if one single node dies we can still start all the
1473
    instances it was primary for.
1474

1475
    """
1476
    for node, n_img in node_image.items():
1477
      # This code checks that every node which is now listed as
1478
      # secondary has enough memory to host all instances it is
1479
      # supposed to should a single other node in the cluster fail.
1480
      # FIXME: not ready for failover to an arbitrary node
1481
      # FIXME: does not support file-backed instances
1482
      # WARNING: we currently take into account down instances as well
1483
      # as up ones, considering that even if they're down someone
1484
      # might want to start them even in the event of a node failure.
1485
      for prinode, instances in n_img.sbp.items():
1486
        needed_mem = 0
1487
        for instance in instances:
1488
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1489
          if bep[constants.BE_AUTO_BALANCE]:
1490
            needed_mem += bep[constants.BE_MEMORY]
1491
        test = n_img.mfree < needed_mem
1492
        self._ErrorIf(test, self.ENODEN1, node,
1493
                      "not enough memory on to accommodate"
1494
                      " failovers should peer node %s fail", prinode)
1495

    
1496
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1497
                       master_files):
1498
    """Verifies and computes the node required file checksums.
1499

1500
    @type ninfo: L{objects.Node}
1501
    @param ninfo: the node to check
1502
    @param nresult: the remote results for the node
1503
    @param file_list: required list of files
1504
    @param local_cksum: dictionary of local files and their checksums
1505
    @param master_files: list of files that only masters should have
1506

1507
    """
1508
    node = ninfo.name
1509
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1510

    
1511
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1512
    test = not isinstance(remote_cksum, dict)
1513
    _ErrorIf(test, self.ENODEFILECHECK, node,
1514
             "node hasn't returned file checksum data")
1515
    if test:
1516
      return
1517

    
1518
    for file_name in file_list:
1519
      node_is_mc = ninfo.master_candidate
1520
      must_have = (file_name not in master_files) or node_is_mc
1521
      # missing
1522
      test1 = file_name not in remote_cksum
1523
      # invalid checksum
1524
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1525
      # existing and good
1526
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1527
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1528
               "file '%s' missing", file_name)
1529
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1530
               "file '%s' has wrong checksum", file_name)
1531
      # not candidate and this is not a must-have file
1532
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1533
               "file '%s' should not exist on non master"
1534
               " candidates (and the file is outdated)", file_name)
1535
      # all good, except non-master/non-must have combination
1536
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1537
               "file '%s' should not exist"
1538
               " on non master candidates", file_name)
1539

    
1540
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1541
    """Verifies and the node DRBD status.
1542

1543
    @type ninfo: L{objects.Node}
1544
    @param ninfo: the node to check
1545
    @param nresult: the remote results for the node
1546
    @param instanceinfo: the dict of instances
1547
    @param drbd_map: the DRBD map as returned by
1548
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1549

1550
    """
1551
    node = ninfo.name
1552
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1553

    
1554
    # compute the DRBD minors
1555
    node_drbd = {}
1556
    for minor, instance in drbd_map[node].items():
1557
      test = instance not in instanceinfo
1558
      _ErrorIf(test, self.ECLUSTERCFG, None,
1559
               "ghost instance '%s' in temporary DRBD map", instance)
1560
        # ghost instance should not be running, but otherwise we
1561
        # don't give double warnings (both ghost instance and
1562
        # unallocated minor in use)
1563
      if test:
1564
        node_drbd[minor] = (instance, False)
1565
      else:
1566
        instance = instanceinfo[instance]
1567
        node_drbd[minor] = (instance.name, instance.admin_up)
1568

    
1569
    # and now check them
1570
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1571
    test = not isinstance(used_minors, (tuple, list))
1572
    _ErrorIf(test, self.ENODEDRBD, node,
1573
             "cannot parse drbd status file: %s", str(used_minors))
1574
    if test:
1575
      # we cannot check drbd status
1576
      return
1577

    
1578
    for minor, (iname, must_exist) in node_drbd.items():
1579
      test = minor not in used_minors and must_exist
1580
      _ErrorIf(test, self.ENODEDRBD, node,
1581
               "drbd minor %d of instance %s is not active", minor, iname)
1582
    for minor in used_minors:
1583
      test = minor not in node_drbd
1584
      _ErrorIf(test, self.ENODEDRBD, node,
1585
               "unallocated drbd minor %d is in use", minor)
1586

    
1587
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1588
    """Builds the node OS structures.
1589

1590
    @type ninfo: L{objects.Node}
1591
    @param ninfo: the node to check
1592
    @param nresult: the remote results for the node
1593
    @param nimg: the node image object
1594

1595
    """
1596
    node = ninfo.name
1597
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1598

    
1599
    remote_os = nresult.get(constants.NV_OSLIST, None)
1600
    test = (not isinstance(remote_os, list) or
1601
            not compat.all(remote_os,
1602
                           lambda v: isinstance(v, list) and len(v) == 7))
1603

    
1604
    _ErrorIf(test, self.ENODEOS, node,
1605
             "node hasn't returned valid OS data")
1606

    
1607
    nimg.os_fail = test
1608

    
1609
    if test:
1610
      return
1611

    
1612
    os_dict = {}
1613

    
1614
    for (name, os_path, status, diagnose,
1615
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1616

    
1617
      if name not in os_dict:
1618
        os_dict[name] = []
1619

    
1620
      # parameters is a list of lists instead of list of tuples due to
1621
      # JSON lacking a real tuple type, fix it:
1622
      parameters = [tuple(v) for v in parameters]
1623
      os_dict[name].append((os_path, status, diagnose,
1624
                            set(variants), set(parameters), set(api_ver)))
1625

    
1626
    nimg.oslist = os_dict
1627

    
1628
  def _VerifyNodeOS(self, ninfo, nimg, base):
1629
    """Verifies the node OS list.
1630

1631
    @type ninfo: L{objects.Node}
1632
    @param ninfo: the node to check
1633
    @param nimg: the node image object
1634
    @param base: the 'template' node we match against (e.g. from the master)
1635

1636
    """
1637
    node = ninfo.name
1638
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1639

    
1640
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1641

    
1642
    for os_name, os_data in nimg.oslist.items():
1643
      assert os_data, "Empty OS status for OS %s?!" % os_name
1644
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1645
      _ErrorIf(not f_status, self.ENODEOS, node,
1646
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1647
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1648
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1649
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1650
      # this will catched in backend too
1651
      _ErrorIf(compat.any(f_api, lambda v: v >= constants.OS_API_V15)
1652
               and not f_var, self.ENODEOS, node,
1653
               "OS %s with API at least %d does not declare any variant",
1654
               os_name, constants.OS_API_V15)
1655
      # comparisons with the 'base' image
1656
      test = os_name not in base.oslist
1657
      _ErrorIf(test, self.ENODEOS, node,
1658
               "Extra OS %s not present on reference node (%s)",
1659
               os_name, base.name)
1660
      if test:
1661
        continue
1662
      assert base.oslist[os_name], "Base node has empty OS status?"
1663
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1664
      if not b_status:
1665
        # base OS is invalid, skipping
1666
        continue
1667
      for kind, a, b in [("API version", f_api, b_api),
1668
                         ("variants list", f_var, b_var),
1669
                         ("parameters", f_param, b_param)]:
1670
        _ErrorIf(a != b, self.ENODEOS, node,
1671
                 "OS %s %s differs from reference node %s: %s vs. %s",
1672
                 kind, os_name, base.name,
1673
                 utils.CommaJoin(a), utils.CommaJoin(a))
1674

    
1675
    # check any missing OSes
1676
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1677
    _ErrorIf(missing, self.ENODEOS, node,
1678
             "OSes present on reference node %s but missing on this node: %s",
1679
             base.name, utils.CommaJoin(missing))
1680

    
1681
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1682
    """Verifies and updates the node volume data.
1683

1684
    This function will update a L{NodeImage}'s internal structures
1685
    with data from the remote call.
1686

1687
    @type ninfo: L{objects.Node}
1688
    @param ninfo: the node to check
1689
    @param nresult: the remote results for the node
1690
    @param nimg: the node image object
1691
    @param vg_name: the configured VG name
1692

1693
    """
1694
    node = ninfo.name
1695
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1696

    
1697
    nimg.lvm_fail = True
1698
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1699
    if vg_name is None:
1700
      pass
1701
    elif isinstance(lvdata, basestring):
1702
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1703
               utils.SafeEncode(lvdata))
1704
    elif not isinstance(lvdata, dict):
1705
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1706
    else:
1707
      nimg.volumes = lvdata
1708
      nimg.lvm_fail = False
1709

    
1710
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1711
    """Verifies and updates the node instance list.
1712

1713
    If the listing was successful, then updates this node's instance
1714
    list. Otherwise, it marks the RPC call as failed for the instance
1715
    list key.
1716

1717
    @type ninfo: L{objects.Node}
1718
    @param ninfo: the node to check
1719
    @param nresult: the remote results for the node
1720
    @param nimg: the node image object
1721

1722
    """
1723
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1724
    test = not isinstance(idata, list)
1725
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1726
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1727
    if test:
1728
      nimg.hyp_fail = True
1729
    else:
1730
      nimg.instances = idata
1731

    
1732
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1733
    """Verifies and computes a node information map
1734

1735
    @type ninfo: L{objects.Node}
1736
    @param ninfo: the node to check
1737
    @param nresult: the remote results for the node
1738
    @param nimg: the node image object
1739
    @param vg_name: the configured VG name
1740

1741
    """
1742
    node = ninfo.name
1743
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1744

    
1745
    # try to read free memory (from the hypervisor)
1746
    hv_info = nresult.get(constants.NV_HVINFO, None)
1747
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1748
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1749
    if not test:
1750
      try:
1751
        nimg.mfree = int(hv_info["memory_free"])
1752
      except (ValueError, TypeError):
1753
        _ErrorIf(True, self.ENODERPC, node,
1754
                 "node returned invalid nodeinfo, check hypervisor")
1755

    
1756
    # FIXME: devise a free space model for file based instances as well
1757
    if vg_name is not None:
1758
      test = (constants.NV_VGLIST not in nresult or
1759
              vg_name not in nresult[constants.NV_VGLIST])
1760
      _ErrorIf(test, self.ENODELVM, node,
1761
               "node didn't return data for the volume group '%s'"
1762
               " - it is either missing or broken", vg_name)
1763
      if not test:
1764
        try:
1765
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1766
        except (ValueError, TypeError):
1767
          _ErrorIf(True, self.ENODERPC, node,
1768
                   "node returned invalid LVM info, check LVM status")
1769

    
1770
  def CheckPrereq(self):
1771
    """Check prerequisites.
1772

1773
    Transform the list of checks we're going to skip into a set and check that
1774
    all its members are valid.
1775

1776
    """
1777
    self.skip_set = frozenset(self.op.skip_checks)
1778
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1779
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1780
                                 errors.ECODE_INVAL)
1781

    
1782
  def BuildHooksEnv(self):
1783
    """Build hooks env.
1784

1785
    Cluster-Verify hooks just ran in the post phase and their failure makes
1786
    the output be logged in the verify output and the verification to fail.
1787

1788
    """
1789
    all_nodes = self.cfg.GetNodeList()
1790
    env = {
1791
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1792
      }
1793
    for node in self.cfg.GetAllNodesInfo().values():
1794
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1795

    
1796
    return env, [], all_nodes
1797

    
1798
  def Exec(self, feedback_fn):
1799
    """Verify integrity of cluster, performing various test on nodes.
1800

1801
    """
1802
    self.bad = False
1803
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1804
    verbose = self.op.verbose
1805
    self._feedback_fn = feedback_fn
1806
    feedback_fn("* Verifying global settings")
1807
    for msg in self.cfg.VerifyConfig():
1808
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1809

    
1810
    # Check the cluster certificates
1811
    for cert_filename in constants.ALL_CERT_FILES:
1812
      (errcode, msg) = _VerifyCertificate(cert_filename)
1813
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1814

    
1815
    vg_name = self.cfg.GetVGName()
1816
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1817
    cluster = self.cfg.GetClusterInfo()
1818
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1819
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1820
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1821
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1822
                        for iname in instancelist)
1823
    i_non_redundant = [] # Non redundant instances
1824
    i_non_a_balanced = [] # Non auto-balanced instances
1825
    n_offline = 0 # Count of offline nodes
1826
    n_drained = 0 # Count of nodes being drained
1827
    node_vol_should = {}
1828

    
1829
    # FIXME: verify OS list
1830
    # do local checksums
1831
    master_files = [constants.CLUSTER_CONF_FILE]
1832
    master_node = self.master_node = self.cfg.GetMasterNode()
1833
    master_ip = self.cfg.GetMasterIP()
1834

    
1835
    file_names = ssconf.SimpleStore().GetFileList()
1836
    file_names.extend(constants.ALL_CERT_FILES)
1837
    file_names.extend(master_files)
1838
    if cluster.modify_etc_hosts:
1839
      file_names.append(constants.ETC_HOSTS)
1840

    
1841
    local_checksums = utils.FingerprintFiles(file_names)
1842

    
1843
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1844
    node_verify_param = {
1845
      constants.NV_FILELIST: file_names,
1846
      constants.NV_NODELIST: [node.name for node in nodeinfo
1847
                              if not node.offline],
1848
      constants.NV_HYPERVISOR: hypervisors,
1849
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1850
                                  node.secondary_ip) for node in nodeinfo
1851
                                 if not node.offline],
1852
      constants.NV_INSTANCELIST: hypervisors,
1853
      constants.NV_VERSION: None,
1854
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1855
      constants.NV_NODESETUP: None,
1856
      constants.NV_TIME: None,
1857
      constants.NV_MASTERIP: (master_node, master_ip),
1858
      constants.NV_OSLIST: None,
1859
      }
1860

    
1861
    if vg_name is not None:
1862
      node_verify_param[constants.NV_VGLIST] = None
1863
      node_verify_param[constants.NV_LVLIST] = vg_name
1864
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1865
      node_verify_param[constants.NV_DRBDLIST] = None
1866

    
1867
    # Build our expected cluster state
1868
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
1869
                                                 name=node.name))
1870
                      for node in nodeinfo)
1871

    
1872
    for instance in instancelist:
1873
      inst_config = instanceinfo[instance]
1874

    
1875
      for nname in inst_config.all_nodes:
1876
        if nname not in node_image:
1877
          # ghost node
1878
          gnode = self.NodeImage(name=nname)
1879
          gnode.ghost = True
1880
          node_image[nname] = gnode
1881

    
1882
      inst_config.MapLVsByNode(node_vol_should)
1883

    
1884
      pnode = inst_config.primary_node
1885
      node_image[pnode].pinst.append(instance)
1886

    
1887
      for snode in inst_config.secondary_nodes:
1888
        nimg = node_image[snode]
1889
        nimg.sinst.append(instance)
1890
        if pnode not in nimg.sbp:
1891
          nimg.sbp[pnode] = []
1892
        nimg.sbp[pnode].append(instance)
1893

    
1894
    # At this point, we have the in-memory data structures complete,
1895
    # except for the runtime information, which we'll gather next
1896

    
1897
    # Due to the way our RPC system works, exact response times cannot be
1898
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1899
    # time before and after executing the request, we can at least have a time
1900
    # window.
1901
    nvinfo_starttime = time.time()
1902
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1903
                                           self.cfg.GetClusterName())
1904
    nvinfo_endtime = time.time()
1905

    
1906
    all_drbd_map = self.cfg.ComputeDRBDMap()
1907

    
1908
    feedback_fn("* Verifying node status")
1909

    
1910
    refos_img = None
1911

    
1912
    for node_i in nodeinfo:
1913
      node = node_i.name
1914
      nimg = node_image[node]
1915

    
1916
      if node_i.offline:
1917
        if verbose:
1918
          feedback_fn("* Skipping offline node %s" % (node,))
1919
        n_offline += 1
1920
        continue
1921

    
1922
      if node == master_node:
1923
        ntype = "master"
1924
      elif node_i.master_candidate:
1925
        ntype = "master candidate"
1926
      elif node_i.drained:
1927
        ntype = "drained"
1928
        n_drained += 1
1929
      else:
1930
        ntype = "regular"
1931
      if verbose:
1932
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1933

    
1934
      msg = all_nvinfo[node].fail_msg
1935
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1936
      if msg:
1937
        nimg.rpc_fail = True
1938
        continue
1939

    
1940
      nresult = all_nvinfo[node].payload
1941

    
1942
      nimg.call_ok = self._VerifyNode(node_i, nresult)
1943
      self._VerifyNodeNetwork(node_i, nresult)
1944
      self._VerifyNodeLVM(node_i, nresult, vg_name)
1945
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1946
                            master_files)
1947
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1948
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1949

    
1950
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1951
      self._UpdateNodeInstances(node_i, nresult, nimg)
1952
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1953
      self._UpdateNodeOS(node_i, nresult, nimg)
1954
      if not nimg.os_fail:
1955
        if refos_img is None:
1956
          refos_img = nimg
1957
        self._VerifyNodeOS(node_i, nimg, refos_img)
1958

    
1959
    feedback_fn("* Verifying instance status")
1960
    for instance in instancelist:
1961
      if verbose:
1962
        feedback_fn("* Verifying instance %s" % instance)
1963
      inst_config = instanceinfo[instance]
1964
      self._VerifyInstance(instance, inst_config, node_image)
1965
      inst_nodes_offline = []
1966

    
1967
      pnode = inst_config.primary_node
1968
      pnode_img = node_image[pnode]
1969
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1970
               self.ENODERPC, pnode, "instance %s, connection to"
1971
               " primary node failed", instance)
1972

    
1973
      if pnode_img.offline:
1974
        inst_nodes_offline.append(pnode)
1975

    
1976
      # If the instance is non-redundant we cannot survive losing its primary
1977
      # node, so we are not N+1 compliant. On the other hand we have no disk
1978
      # templates with more than one secondary so that situation is not well
1979
      # supported either.
1980
      # FIXME: does not support file-backed instances
1981
      if not inst_config.secondary_nodes:
1982
        i_non_redundant.append(instance)
1983
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1984
               instance, "instance has multiple secondary nodes: %s",
1985
               utils.CommaJoin(inst_config.secondary_nodes),
1986
               code=self.ETYPE_WARNING)
1987

    
1988
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1989
        i_non_a_balanced.append(instance)
1990

    
1991
      for snode in inst_config.secondary_nodes:
1992
        s_img = node_image[snode]
1993
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1994
                 "instance %s, connection to secondary node failed", instance)
1995

    
1996
        if s_img.offline:
1997
          inst_nodes_offline.append(snode)
1998

    
1999
      # warn that the instance lives on offline nodes
2000
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2001
               "instance lives on offline node(s) %s",
2002
               utils.CommaJoin(inst_nodes_offline))
2003
      # ... or ghost nodes
2004
      for node in inst_config.all_nodes:
2005
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2006
                 "instance lives on ghost node %s", node)
2007

    
2008
    feedback_fn("* Verifying orphan volumes")
2009
    self._VerifyOrphanVolumes(node_vol_should, node_image)
2010

    
2011
    feedback_fn("* Verifying orphan instances")
2012
    self._VerifyOrphanInstances(instancelist, node_image)
2013

    
2014
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
2015
      feedback_fn("* Verifying N+1 Memory redundancy")
2016
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2017

    
2018
    feedback_fn("* Other Notes")
2019
    if i_non_redundant:
2020
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2021
                  % len(i_non_redundant))
2022

    
2023
    if i_non_a_balanced:
2024
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2025
                  % len(i_non_a_balanced))
2026

    
2027
    if n_offline:
2028
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2029

    
2030
    if n_drained:
2031
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2032

    
2033
    return not self.bad
2034

    
2035
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2036
    """Analyze the post-hooks' result
2037

2038
    This method analyses the hook result, handles it, and sends some
2039
    nicely-formatted feedback back to the user.
2040

2041
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2042
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2043
    @param hooks_results: the results of the multi-node hooks rpc call
2044
    @param feedback_fn: function used send feedback back to the caller
2045
    @param lu_result: previous Exec result
2046
    @return: the new Exec result, based on the previous result
2047
        and hook results
2048

2049
    """
2050
    # We only really run POST phase hooks, and are only interested in
2051
    # their results
2052
    if phase == constants.HOOKS_PHASE_POST:
2053
      # Used to change hooks' output to proper indentation
2054
      indent_re = re.compile('^', re.M)
2055
      feedback_fn("* Hooks Results")
2056
      assert hooks_results, "invalid result from hooks"
2057

    
2058
      for node_name in hooks_results:
2059
        res = hooks_results[node_name]
2060
        msg = res.fail_msg
2061
        test = msg and not res.offline
2062
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2063
                      "Communication failure in hooks execution: %s", msg)
2064
        if res.offline or msg:
2065
          # No need to investigate payload if node is offline or gave an error.
2066
          # override manually lu_result here as _ErrorIf only
2067
          # overrides self.bad
2068
          lu_result = 1
2069
          continue
2070
        for script, hkr, output in res.payload:
2071
          test = hkr == constants.HKR_FAIL
2072
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2073
                        "Script %s failed, output:", script)
2074
          if test:
2075
            output = indent_re.sub('      ', output)
2076
            feedback_fn("%s" % output)
2077
            lu_result = 0
2078

    
2079
      return lu_result
2080

    
2081

    
2082
class LUVerifyDisks(NoHooksLU):
2083
  """Verifies the cluster disks status.
2084

2085
  """
2086
  _OP_REQP = []
2087
  REQ_BGL = False
2088

    
2089
  def ExpandNames(self):
2090
    self.needed_locks = {
2091
      locking.LEVEL_NODE: locking.ALL_SET,
2092
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2093
    }
2094
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2095

    
2096
  def CheckPrereq(self):
2097
    """Check prerequisites.
2098

2099
    This has no prerequisites.
2100

2101
    """
2102
    pass
2103

    
2104
  def Exec(self, feedback_fn):
2105
    """Verify integrity of cluster disks.
2106

2107
    @rtype: tuple of three items
2108
    @return: a tuple of (dict of node-to-node_error, list of instances
2109
        which need activate-disks, dict of instance: (node, volume) for
2110
        missing volumes
2111

2112
    """
2113
    result = res_nodes, res_instances, res_missing = {}, [], {}
2114

    
2115
    vg_name = self.cfg.GetVGName()
2116
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2117
    instances = [self.cfg.GetInstanceInfo(name)
2118
                 for name in self.cfg.GetInstanceList()]
2119

    
2120
    nv_dict = {}
2121
    for inst in instances:
2122
      inst_lvs = {}
2123
      if (not inst.admin_up or
2124
          inst.disk_template not in constants.DTS_NET_MIRROR):
2125
        continue
2126
      inst.MapLVsByNode(inst_lvs)
2127
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2128
      for node, vol_list in inst_lvs.iteritems():
2129
        for vol in vol_list:
2130
          nv_dict[(node, vol)] = inst
2131

    
2132
    if not nv_dict:
2133
      return result
2134

    
2135
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2136

    
2137
    for node in nodes:
2138
      # node_volume
2139
      node_res = node_lvs[node]
2140
      if node_res.offline:
2141
        continue
2142
      msg = node_res.fail_msg
2143
      if msg:
2144
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2145
        res_nodes[node] = msg
2146
        continue
2147

    
2148
      lvs = node_res.payload
2149
      for lv_name, (_, _, lv_online) in lvs.items():
2150
        inst = nv_dict.pop((node, lv_name), None)
2151
        if (not lv_online and inst is not None
2152
            and inst.name not in res_instances):
2153
          res_instances.append(inst.name)
2154

    
2155
    # any leftover items in nv_dict are missing LVs, let's arrange the
2156
    # data better
2157
    for key, inst in nv_dict.iteritems():
2158
      if inst.name not in res_missing:
2159
        res_missing[inst.name] = []
2160
      res_missing[inst.name].append(key)
2161

    
2162
    return result
2163

    
2164

    
2165
class LURepairDiskSizes(NoHooksLU):
2166
  """Verifies the cluster disks sizes.
2167

2168
  """
2169
  _OP_REQP = ["instances"]
2170
  REQ_BGL = False
2171

    
2172
  def ExpandNames(self):
2173
    if not isinstance(self.op.instances, list):
2174
      raise errors.OpPrereqError("Invalid argument type 'instances'",
2175
                                 errors.ECODE_INVAL)
2176

    
2177
    if self.op.instances:
2178
      self.wanted_names = []
2179
      for name in self.op.instances:
2180
        full_name = _ExpandInstanceName(self.cfg, name)
2181
        self.wanted_names.append(full_name)
2182
      self.needed_locks = {
2183
        locking.LEVEL_NODE: [],
2184
        locking.LEVEL_INSTANCE: self.wanted_names,
2185
        }
2186
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2187
    else:
2188
      self.wanted_names = None
2189
      self.needed_locks = {
2190
        locking.LEVEL_NODE: locking.ALL_SET,
2191
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2192
        }
2193
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2194

    
2195
  def DeclareLocks(self, level):
2196
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2197
      self._LockInstancesNodes(primary_only=True)
2198

    
2199
  def CheckPrereq(self):
2200
    """Check prerequisites.
2201

2202
    This only checks the optional instance list against the existing names.
2203

2204
    """
2205
    if self.wanted_names is None:
2206
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2207

    
2208
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2209
                             in self.wanted_names]
2210

    
2211
  def _EnsureChildSizes(self, disk):
2212
    """Ensure children of the disk have the needed disk size.
2213

2214
    This is valid mainly for DRBD8 and fixes an issue where the
2215
    children have smaller disk size.
2216

2217
    @param disk: an L{ganeti.objects.Disk} object
2218

2219
    """
2220
    if disk.dev_type == constants.LD_DRBD8:
2221
      assert disk.children, "Empty children for DRBD8?"
2222
      fchild = disk.children[0]
2223
      mismatch = fchild.size < disk.size
2224
      if mismatch:
2225
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2226
                     fchild.size, disk.size)
2227
        fchild.size = disk.size
2228

    
2229
      # and we recurse on this child only, not on the metadev
2230
      return self._EnsureChildSizes(fchild) or mismatch
2231
    else:
2232
      return False
2233

    
2234
  def Exec(self, feedback_fn):
2235
    """Verify the size of cluster disks.
2236

2237
    """
2238
    # TODO: check child disks too
2239
    # TODO: check differences in size between primary/secondary nodes
2240
    per_node_disks = {}
2241
    for instance in self.wanted_instances:
2242
      pnode = instance.primary_node
2243
      if pnode not in per_node_disks:
2244
        per_node_disks[pnode] = []
2245
      for idx, disk in enumerate(instance.disks):
2246
        per_node_disks[pnode].append((instance, idx, disk))
2247

    
2248
    changed = []
2249
    for node, dskl in per_node_disks.items():
2250
      newl = [v[2].Copy() for v in dskl]
2251
      for dsk in newl:
2252
        self.cfg.SetDiskID(dsk, node)
2253
      result = self.rpc.call_blockdev_getsizes(node, newl)
2254
      if result.fail_msg:
2255
        self.LogWarning("Failure in blockdev_getsizes call to node"
2256
                        " %s, ignoring", node)
2257
        continue
2258
      if len(result.data) != len(dskl):
2259
        self.LogWarning("Invalid result from node %s, ignoring node results",
2260
                        node)
2261
        continue
2262
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2263
        if size is None:
2264
          self.LogWarning("Disk %d of instance %s did not return size"
2265
                          " information, ignoring", idx, instance.name)
2266
          continue
2267
        if not isinstance(size, (int, long)):
2268
          self.LogWarning("Disk %d of instance %s did not return valid"
2269
                          " size information, ignoring", idx, instance.name)
2270
          continue
2271
        size = size >> 20
2272
        if size != disk.size:
2273
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2274
                       " correcting: recorded %d, actual %d", idx,
2275
                       instance.name, disk.size, size)
2276
          disk.size = size
2277
          self.cfg.Update(instance, feedback_fn)
2278
          changed.append((instance.name, idx, size))
2279
        if self._EnsureChildSizes(disk):
2280
          self.cfg.Update(instance, feedback_fn)
2281
          changed.append((instance.name, idx, disk.size))
2282
    return changed
2283

    
2284

    
2285
class LURenameCluster(LogicalUnit):
2286
  """Rename the cluster.
2287

2288
  """
2289
  HPATH = "cluster-rename"
2290
  HTYPE = constants.HTYPE_CLUSTER
2291
  _OP_REQP = ["name"]
2292

    
2293
  def BuildHooksEnv(self):
2294
    """Build hooks env.
2295

2296
    """
2297
    env = {
2298
      "OP_TARGET": self.cfg.GetClusterName(),
2299
      "NEW_NAME": self.op.name,
2300
      }
2301
    mn = self.cfg.GetMasterNode()
2302
    all_nodes = self.cfg.GetNodeList()
2303
    return env, [mn], all_nodes
2304

    
2305
  def CheckPrereq(self):
2306
    """Verify that the passed name is a valid one.
2307

2308
    """
2309
    hostname = utils.GetHostInfo(self.op.name)
2310

    
2311
    new_name = hostname.name
2312
    self.ip = new_ip = hostname.ip
2313
    old_name = self.cfg.GetClusterName()
2314
    old_ip = self.cfg.GetMasterIP()
2315
    if new_name == old_name and new_ip == old_ip:
2316
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2317
                                 " cluster has changed",
2318
                                 errors.ECODE_INVAL)
2319
    if new_ip != old_ip:
2320
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2321
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2322
                                   " reachable on the network. Aborting." %
2323
                                   new_ip, errors.ECODE_NOTUNIQUE)
2324

    
2325
    self.op.name = new_name
2326

    
2327
  def Exec(self, feedback_fn):
2328
    """Rename the cluster.
2329

2330
    """
2331
    clustername = self.op.name
2332
    ip = self.ip
2333

    
2334
    # shutdown the master IP
2335
    master = self.cfg.GetMasterNode()
2336
    result = self.rpc.call_node_stop_master(master, False)
2337
    result.Raise("Could not disable the master role")
2338

    
2339
    try:
2340
      cluster = self.cfg.GetClusterInfo()
2341
      cluster.cluster_name = clustername
2342
      cluster.master_ip = ip
2343
      self.cfg.Update(cluster, feedback_fn)
2344

    
2345
      # update the known hosts file
2346
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2347
      node_list = self.cfg.GetNodeList()
2348
      try:
2349
        node_list.remove(master)
2350
      except ValueError:
2351
        pass
2352
      result = self.rpc.call_upload_file(node_list,
2353
                                         constants.SSH_KNOWN_HOSTS_FILE)
2354
      for to_node, to_result in result.iteritems():
2355
        msg = to_result.fail_msg
2356
        if msg:
2357
          msg = ("Copy of file %s to node %s failed: %s" %
2358
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2359
          self.proc.LogWarning(msg)
2360

    
2361
    finally:
2362
      result = self.rpc.call_node_start_master(master, False, False)
2363
      msg = result.fail_msg
2364
      if msg:
2365
        self.LogWarning("Could not re-enable the master role on"
2366
                        " the master, please restart manually: %s", msg)
2367

    
2368

    
2369
def _RecursiveCheckIfLVMBased(disk):
2370
  """Check if the given disk or its children are lvm-based.
2371

2372
  @type disk: L{objects.Disk}
2373
  @param disk: the disk to check
2374
  @rtype: boolean
2375
  @return: boolean indicating whether a LD_LV dev_type was found or not
2376

2377
  """
2378
  if disk.children:
2379
    for chdisk in disk.children:
2380
      if _RecursiveCheckIfLVMBased(chdisk):
2381
        return True
2382
  return disk.dev_type == constants.LD_LV
2383

    
2384

    
2385
class LUSetClusterParams(LogicalUnit):
2386
  """Change the parameters of the cluster.
2387

2388
  """
2389
  HPATH = "cluster-modify"
2390
  HTYPE = constants.HTYPE_CLUSTER
2391
  _OP_REQP = []
2392
  REQ_BGL = False
2393

    
2394
  def CheckArguments(self):
2395
    """Check parameters
2396

2397
    """
2398
    for attr in ["candidate_pool_size",
2399
                 "uid_pool", "add_uids", "remove_uids"]:
2400
      if not hasattr(self.op, attr):
2401
        setattr(self.op, attr, None)
2402

    
2403
    if self.op.candidate_pool_size is not None:
2404
      try:
2405
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2406
      except (ValueError, TypeError), err:
2407
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2408
                                   str(err), errors.ECODE_INVAL)
2409
      if self.op.candidate_pool_size < 1:
2410
        raise errors.OpPrereqError("At least one master candidate needed",
2411
                                   errors.ECODE_INVAL)
2412

    
2413
    _CheckBooleanOpField(self.op, "maintain_node_health")
2414

    
2415
    if self.op.uid_pool:
2416
      uidpool.CheckUidPool(self.op.uid_pool)
2417

    
2418
    if self.op.add_uids:
2419
      uidpool.CheckUidPool(self.op.add_uids)
2420

    
2421
    if self.op.remove_uids:
2422
      uidpool.CheckUidPool(self.op.remove_uids)
2423

    
2424
  def ExpandNames(self):
2425
    # FIXME: in the future maybe other cluster params won't require checking on
2426
    # all nodes to be modified.
2427
    self.needed_locks = {
2428
      locking.LEVEL_NODE: locking.ALL_SET,
2429
    }
2430
    self.share_locks[locking.LEVEL_NODE] = 1
2431

    
2432
  def BuildHooksEnv(self):
2433
    """Build hooks env.
2434

2435
    """
2436
    env = {
2437
      "OP_TARGET": self.cfg.GetClusterName(),
2438
      "NEW_VG_NAME": self.op.vg_name,
2439
      }
2440
    mn = self.cfg.GetMasterNode()
2441
    return env, [mn], [mn]
2442

    
2443
  def CheckPrereq(self):
2444
    """Check prerequisites.
2445

2446
    This checks whether the given params don't conflict and
2447
    if the given volume group is valid.
2448

2449
    """
2450
    if self.op.vg_name is not None and not self.op.vg_name:
2451
      instances = self.cfg.GetAllInstancesInfo().values()
2452
      for inst in instances:
2453
        for disk in inst.disks:
2454
          if _RecursiveCheckIfLVMBased(disk):
2455
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2456
                                       " lvm-based instances exist",
2457
                                       errors.ECODE_INVAL)
2458

    
2459
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2460

    
2461
    # if vg_name not None, checks given volume group on all nodes
2462
    if self.op.vg_name:
2463
      vglist = self.rpc.call_vg_list(node_list)
2464
      for node in node_list:
2465
        msg = vglist[node].fail_msg
2466
        if msg:
2467
          # ignoring down node
2468
          self.LogWarning("Error while gathering data on node %s"
2469
                          " (ignoring node): %s", node, msg)
2470
          continue
2471
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2472
                                              self.op.vg_name,
2473
                                              constants.MIN_VG_SIZE)
2474
        if vgstatus:
2475
          raise errors.OpPrereqError("Error on node '%s': %s" %
2476
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2477

    
2478
    self.cluster = cluster = self.cfg.GetClusterInfo()
2479
    # validate params changes
2480
    if self.op.beparams:
2481
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2482
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2483

    
2484
    if self.op.nicparams:
2485
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2486
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2487
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2488
      nic_errors = []
2489

    
2490
      # check all instances for consistency
2491
      for instance in self.cfg.GetAllInstancesInfo().values():
2492
        for nic_idx, nic in enumerate(instance.nics):
2493
          params_copy = copy.deepcopy(nic.nicparams)
2494
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2495

    
2496
          # check parameter syntax
2497
          try:
2498
            objects.NIC.CheckParameterSyntax(params_filled)
2499
          except errors.ConfigurationError, err:
2500
            nic_errors.append("Instance %s, nic/%d: %s" %
2501
                              (instance.name, nic_idx, err))
2502

    
2503
          # if we're moving instances to routed, check that they have an ip
2504
          target_mode = params_filled[constants.NIC_MODE]
2505
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2506
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2507
                              (instance.name, nic_idx))
2508
      if nic_errors:
2509
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2510
                                   "\n".join(nic_errors))
2511

    
2512
    # hypervisor list/parameters
2513
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2514
    if self.op.hvparams:
2515
      if not isinstance(self.op.hvparams, dict):
2516
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2517
                                   errors.ECODE_INVAL)
2518
      for hv_name, hv_dict in self.op.hvparams.items():
2519
        if hv_name not in self.new_hvparams:
2520
          self.new_hvparams[hv_name] = hv_dict
2521
        else:
2522
          self.new_hvparams[hv_name].update(hv_dict)
2523

    
2524
    # os hypervisor parameters
2525
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2526
    if self.op.os_hvp:
2527
      if not isinstance(self.op.os_hvp, dict):
2528
        raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2529
                                   errors.ECODE_INVAL)
2530
      for os_name, hvs in self.op.os_hvp.items():
2531
        if not isinstance(hvs, dict):
2532
          raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2533
                                      " input"), errors.ECODE_INVAL)
2534
        if os_name not in self.new_os_hvp:
2535
          self.new_os_hvp[os_name] = hvs
2536
        else:
2537
          for hv_name, hv_dict in hvs.items():
2538
            if hv_name not in self.new_os_hvp[os_name]:
2539
              self.new_os_hvp[os_name][hv_name] = hv_dict
2540
            else:
2541
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2542

    
2543
    # os parameters
2544
    self.new_osp = objects.FillDict(cluster.osparams, {})
2545
    if self.op.osparams:
2546
      if not isinstance(self.op.osparams, dict):
2547
        raise errors.OpPrereqError("Invalid 'osparams' parameter on input",
2548
                                   errors.ECODE_INVAL)
2549
      for os_name, osp in self.op.osparams.items():
2550
        if not isinstance(osp, dict):
2551
          raise errors.OpPrereqError(("Invalid 'osparams' parameter on"
2552
                                      " input"), errors.ECODE_INVAL)
2553
        if os_name not in self.new_osp:
2554
          self.new_osp[os_name] = {}
2555

    
2556
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2557
                                                  use_none=True)
2558

    
2559
        if not self.new_osp[os_name]:
2560
          # we removed all parameters
2561
          del self.new_osp[os_name]
2562
        else:
2563
          # check the parameter validity (remote check)
2564
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2565
                         os_name, self.new_osp[os_name])
2566

    
2567
    # changes to the hypervisor list
2568
    if self.op.enabled_hypervisors is not None:
2569
      self.hv_list = self.op.enabled_hypervisors
2570
      if not self.hv_list:
2571
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2572
                                   " least one member",
2573
                                   errors.ECODE_INVAL)
2574
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2575
      if invalid_hvs:
2576
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2577
                                   " entries: %s" %
2578
                                   utils.CommaJoin(invalid_hvs),
2579
                                   errors.ECODE_INVAL)
2580
      for hv in self.hv_list:
2581
        # if the hypervisor doesn't already exist in the cluster
2582
        # hvparams, we initialize it to empty, and then (in both
2583
        # cases) we make sure to fill the defaults, as we might not
2584
        # have a complete defaults list if the hypervisor wasn't
2585
        # enabled before
2586
        if hv not in new_hvp:
2587
          new_hvp[hv] = {}
2588
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2589
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2590
    else:
2591
      self.hv_list = cluster.enabled_hypervisors
2592

    
2593
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2594
      # either the enabled list has changed, or the parameters have, validate
2595
      for hv_name, hv_params in self.new_hvparams.items():
2596
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2597
            (self.op.enabled_hypervisors and
2598
             hv_name in self.op.enabled_hypervisors)):
2599
          # either this is a new hypervisor, or its parameters have changed
2600
          hv_class = hypervisor.GetHypervisor(hv_name)
2601
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2602
          hv_class.CheckParameterSyntax(hv_params)
2603
          _CheckHVParams(self, node_list, hv_name, hv_params)
2604

    
2605
    if self.op.os_hvp:
2606
      # no need to check any newly-enabled hypervisors, since the
2607
      # defaults have already been checked in the above code-block
2608
      for os_name, os_hvp in self.new_os_hvp.items():
2609
        for hv_name, hv_params in os_hvp.items():
2610
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2611
          # we need to fill in the new os_hvp on top of the actual hv_p
2612
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2613
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2614
          hv_class = hypervisor.GetHypervisor(hv_name)
2615
          hv_class.CheckParameterSyntax(new_osp)
2616
          _CheckHVParams(self, node_list, hv_name, new_osp)
2617

    
2618

    
2619
  def Exec(self, feedback_fn):
2620
    """Change the parameters of the cluster.
2621

2622
    """
2623
    if self.op.vg_name is not None:
2624
      new_volume = self.op.vg_name
2625
      if not new_volume:
2626
        new_volume = None
2627
      if new_volume != self.cfg.GetVGName():
2628
        self.cfg.SetVGName(new_volume)
2629
      else:
2630
        feedback_fn("Cluster LVM configuration already in desired"
2631
                    " state, not changing")
2632
    if self.op.hvparams:
2633
      self.cluster.hvparams = self.new_hvparams
2634
    if self.op.os_hvp:
2635
      self.cluster.os_hvp = self.new_os_hvp
2636
    if self.op.enabled_hypervisors is not None:
2637
      self.cluster.hvparams = self.new_hvparams
2638
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2639
    if self.op.beparams:
2640
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2641
    if self.op.nicparams:
2642
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2643
    if self.op.osparams:
2644
      self.cluster.osparams = self.new_osp
2645

    
2646
    if self.op.candidate_pool_size is not None:
2647
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2648
      # we need to update the pool size here, otherwise the save will fail
2649
      _AdjustCandidatePool(self, [])
2650

    
2651
    if self.op.maintain_node_health is not None:
2652
      self.cluster.maintain_node_health = self.op.maintain_node_health
2653

    
2654
    if self.op.add_uids is not None:
2655
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2656

    
2657
    if self.op.remove_uids is not None:
2658
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2659

    
2660
    if self.op.uid_pool is not None:
2661
      self.cluster.uid_pool = self.op.uid_pool
2662

    
2663
    self.cfg.Update(self.cluster, feedback_fn)
2664

    
2665

    
2666
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2667
  """Distribute additional files which are part of the cluster configuration.
2668

2669
  ConfigWriter takes care of distributing the config and ssconf files, but
2670
  there are more files which should be distributed to all nodes. This function
2671
  makes sure those are copied.
2672

2673
  @param lu: calling logical unit
2674
  @param additional_nodes: list of nodes not in the config to distribute to
2675

2676
  """
2677
  # 1. Gather target nodes
2678
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2679
  dist_nodes = lu.cfg.GetOnlineNodeList()
2680
  if additional_nodes is not None:
2681
    dist_nodes.extend(additional_nodes)
2682
  if myself.name in dist_nodes:
2683
    dist_nodes.remove(myself.name)
2684

    
2685
  # 2. Gather files to distribute
2686
  dist_files = set([constants.ETC_HOSTS,
2687
                    constants.SSH_KNOWN_HOSTS_FILE,
2688
                    constants.RAPI_CERT_FILE,
2689
                    constants.RAPI_USERS_FILE,
2690
                    constants.CONFD_HMAC_KEY,
2691
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2692
                   ])
2693

    
2694
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2695
  for hv_name in enabled_hypervisors:
2696
    hv_class = hypervisor.GetHypervisor(hv_name)
2697
    dist_files.update(hv_class.GetAncillaryFiles())
2698

    
2699
  # 3. Perform the files upload
2700
  for fname in dist_files:
2701
    if os.path.exists(fname):
2702
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2703
      for to_node, to_result in result.items():
2704
        msg = to_result.fail_msg
2705
        if msg:
2706
          msg = ("Copy of file %s to node %s failed: %s" %
2707
                 (fname, to_node, msg))
2708
          lu.proc.LogWarning(msg)
2709

    
2710

    
2711
class LURedistributeConfig(NoHooksLU):
2712
  """Force the redistribution of cluster configuration.
2713

2714
  This is a very simple LU.
2715

2716
  """
2717
  _OP_REQP = []
2718
  REQ_BGL = False
2719

    
2720
  def ExpandNames(self):
2721
    self.needed_locks = {
2722
      locking.LEVEL_NODE: locking.ALL_SET,
2723
    }
2724
    self.share_locks[locking.LEVEL_NODE] = 1
2725

    
2726
  def CheckPrereq(self):
2727
    """Check prerequisites.
2728

2729
    """
2730

    
2731
  def Exec(self, feedback_fn):
2732
    """Redistribute the configuration.
2733

2734
    """
2735
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2736
    _RedistributeAncillaryFiles(self)
2737

    
2738

    
2739
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2740
  """Sleep and poll for an instance's disk to sync.
2741

2742
  """
2743
  if not instance.disks or disks is not None and not disks:
2744
    return True
2745

    
2746
  disks = _ExpandCheckDisks(instance, disks)
2747

    
2748
  if not oneshot:
2749
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2750

    
2751
  node = instance.primary_node
2752

    
2753
  for dev in disks:
2754
    lu.cfg.SetDiskID(dev, node)
2755

    
2756
  # TODO: Convert to utils.Retry
2757

    
2758
  retries = 0
2759
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2760
  while True:
2761
    max_time = 0
2762
    done = True
2763
    cumul_degraded = False
2764
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2765
    msg = rstats.fail_msg
2766
    if msg:
2767
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2768
      retries += 1
2769
      if retries >= 10:
2770
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2771
                                 " aborting." % node)
2772
      time.sleep(6)
2773
      continue
2774
    rstats = rstats.payload
2775
    retries = 0
2776
    for i, mstat in enumerate(rstats):
2777
      if mstat is None:
2778
        lu.LogWarning("Can't compute data for node %s/%s",
2779
                           node, disks[i].iv_name)
2780
        continue
2781

    
2782
      cumul_degraded = (cumul_degraded or
2783
                        (mstat.is_degraded and mstat.sync_percent is None))
2784
      if mstat.sync_percent is not None:
2785
        done = False
2786
        if mstat.estimated_time is not None:
2787
          rem_time = ("%s remaining (estimated)" %
2788
                      utils.FormatSeconds(mstat.estimated_time))
2789
          max_time = mstat.estimated_time
2790
        else:
2791
          rem_time = "no time estimate"
2792
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2793
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
2794

    
2795
    # if we're done but degraded, let's do a few small retries, to
2796
    # make sure we see a stable and not transient situation; therefore
2797
    # we force restart of the loop
2798
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2799
      logging.info("Degraded disks found, %d retries left", degr_retries)
2800
      degr_retries -= 1
2801
      time.sleep(1)
2802
      continue
2803

    
2804
    if done or oneshot:
2805
      break
2806

    
2807
    time.sleep(min(60, max_time))
2808

    
2809
  if done:
2810
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2811
  return not cumul_degraded
2812

    
2813

    
2814
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2815
  """Check that mirrors are not degraded.
2816

2817
  The ldisk parameter, if True, will change the test from the
2818
  is_degraded attribute (which represents overall non-ok status for
2819
  the device(s)) to the ldisk (representing the local storage status).
2820

2821
  """
2822
  lu.cfg.SetDiskID(dev, node)
2823

    
2824
  result = True
2825

    
2826
  if on_primary or dev.AssembleOnSecondary():
2827
    rstats = lu.rpc.call_blockdev_find(node, dev)
2828
    msg = rstats.fail_msg
2829
    if msg:
2830
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2831
      result = False
2832
    elif not rstats.payload:
2833
      lu.LogWarning("Can't find disk on node %s", node)
2834
      result = False
2835
    else:
2836
      if ldisk:
2837
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2838
      else:
2839
        result = result and not rstats.payload.is_degraded
2840

    
2841
  if dev.children:
2842
    for child in dev.children:
2843
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2844

    
2845
  return result
2846

    
2847

    
2848
class LUDiagnoseOS(NoHooksLU):
2849
  """Logical unit for OS diagnose/query.
2850

2851
  """
2852
  _OP_REQP = ["output_fields", "names"]
2853
  REQ_BGL = False
2854
  _FIELDS_STATIC = utils.FieldSet()
2855
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2856
                                   "parameters", "api_versions")
2857

    
2858
  def ExpandNames(self):
2859
    if self.op.names:
2860
      raise errors.OpPrereqError("Selective OS query not supported",
2861
                                 errors.ECODE_INVAL)
2862

    
2863
    _CheckOutputFields(static=self._FIELDS_STATIC,
2864
                       dynamic=self._FIELDS_DYNAMIC,
2865
                       selected=self.op.output_fields)
2866

    
2867
    # Lock all nodes, in shared mode
2868
    # Temporary removal of locks, should be reverted later
2869
    # TODO: reintroduce locks when they are lighter-weight
2870
    self.needed_locks = {}
2871
    #self.share_locks[locking.LEVEL_NODE] = 1
2872
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2873

    
2874
  def CheckPrereq(self):
2875
    """Check prerequisites.
2876

2877
    """
2878

    
2879
  @staticmethod
2880
  def _DiagnoseByOS(rlist):
2881
    """Remaps a per-node return list into an a per-os per-node dictionary
2882

2883
    @param rlist: a map with node names as keys and OS objects as values
2884

2885
    @rtype: dict
2886
    @return: a dictionary with osnames as keys and as value another
2887
        map, with nodes as keys and tuples of (path, status, diagnose,
2888
        variants, parameters, api_versions) as values, eg::
2889

2890
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2891
                                     (/srv/..., False, "invalid api")],
2892
                           "node2": [(/srv/..., True, "", [], [])]}
2893
          }
2894

2895
    """
2896
    all_os = {}
2897
    # we build here the list of nodes that didn't fail the RPC (at RPC
2898
    # level), so that nodes with a non-responding node daemon don't
2899
    # make all OSes invalid
2900
    good_nodes = [node_name for node_name in rlist
2901
                  if not rlist[node_name].fail_msg]
2902
    for node_name, nr in rlist.items():
2903
      if nr.fail_msg or not nr.payload:
2904
        continue
2905
      for (name, path, status, diagnose, variants,
2906
           params, api_versions) in nr.payload:
2907
        if name not in all_os:
2908
          # build a list of nodes for this os containing empty lists
2909
          # for each node in node_list
2910
          all_os[name] = {}
2911
          for nname in good_nodes:
2912
            all_os[name][nname] = []
2913
        # convert params from [name, help] to (name, help)
2914
        params = [tuple(v) for v in params]
2915
        all_os[name][node_name].append((path, status, diagnose,
2916
                                        variants, params, api_versions))
2917
    return all_os
2918

    
2919
  def Exec(self, feedback_fn):
2920
    """Compute the list of OSes.
2921

2922
    """
2923
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2924
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2925
    pol = self._DiagnoseByOS(node_data)
2926
    output = []
2927

    
2928
    for os_name, os_data in pol.items():
2929
      row = []
2930
      valid = True
2931
      (variants, params, api_versions) = null_state = (set(), set(), set())
2932
      for idx, osl in enumerate(os_data.values()):
2933
        valid = bool(valid and osl and osl[0][1])
2934
        if not valid:
2935
          (variants, params, api_versions) = null_state
2936
          break
2937
        node_variants, node_params, node_api = osl[0][3:6]
2938
        if idx == 0: # first entry
2939
          variants = set(node_variants)
2940
          params = set(node_params)
2941
          api_versions = set(node_api)
2942
        else: # keep consistency
2943
          variants.intersection_update(node_variants)
2944
          params.intersection_update(node_params)
2945
          api_versions.intersection_update(node_api)
2946

    
2947
      for field in self.op.output_fields:
2948
        if field == "name":
2949
          val = os_name
2950
        elif field == "valid":
2951
          val = valid
2952
        elif field == "node_status":
2953
          # this is just a copy of the dict
2954
          val = {}
2955
          for node_name, nos_list in os_data.items():
2956
            val[node_name] = nos_list
2957
        elif field == "variants":
2958
          val = list(variants)
2959
        elif field == "parameters":
2960
          val = list(params)
2961
        elif field == "api_versions":
2962
          val = list(api_versions)
2963
        else:
2964
          raise errors.ParameterError(field)
2965
        row.append(val)
2966
      output.append(row)
2967

    
2968
    return output
2969

    
2970

    
2971
class LURemoveNode(LogicalUnit):
2972
  """Logical unit for removing a node.
2973

2974
  """
2975
  HPATH = "node-remove"
2976
  HTYPE = constants.HTYPE_NODE
2977
  _OP_REQP = ["node_name"]
2978

    
2979
  def BuildHooksEnv(self):
2980
    """Build hooks env.
2981

2982
    This doesn't run on the target node in the pre phase as a failed
2983
    node would then be impossible to remove.
2984

2985
    """
2986
    env = {
2987
      "OP_TARGET": self.op.node_name,
2988
      "NODE_NAME": self.op.node_name,
2989
      }
2990
    all_nodes = self.cfg.GetNodeList()
2991
    try:
2992
      all_nodes.remove(self.op.node_name)
2993
    except ValueError:
2994
      logging.warning("Node %s which is about to be removed not found"
2995
                      " in the all nodes list", self.op.node_name)
2996
    return env, all_nodes, all_nodes
2997

    
2998
  def CheckPrereq(self):
2999
    """Check prerequisites.
3000

3001
    This checks:
3002
     - the node exists in the configuration
3003
     - it does not have primary or secondary instances
3004
     - it's not the master
3005

3006
    Any errors are signaled by raising errors.OpPrereqError.
3007

3008
    """
3009
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3010
    node = self.cfg.GetNodeInfo(self.op.node_name)
3011
    assert node is not None
3012

    
3013
    instance_list = self.cfg.GetInstanceList()
3014

    
3015
    masternode = self.cfg.GetMasterNode()
3016
    if node.name == masternode:
3017
      raise errors.OpPrereqError("Node is the master node,"
3018
                                 " you need to failover first.",
3019
                                 errors.ECODE_INVAL)
3020

    
3021
    for instance_name in instance_list:
3022
      instance = self.cfg.GetInstanceInfo(instance_name)
3023
      if node.name in instance.all_nodes:
3024
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3025
                                   " please remove first." % instance_name,
3026
                                   errors.ECODE_INVAL)
3027
    self.op.node_name = node.name
3028
    self.node = node
3029

    
3030
  def Exec(self, feedback_fn):
3031
    """Removes the node from the cluster.
3032

3033
    """
3034
    node = self.node
3035
    logging.info("Stopping the node daemon and removing configs from node %s",
3036
                 node.name)
3037

    
3038
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3039

    
3040
    # Promote nodes to master candidate as needed
3041
    _AdjustCandidatePool(self, exceptions=[node.name])
3042
    self.context.RemoveNode(node.name)
3043

    
3044
    # Run post hooks on the node before it's removed
3045
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3046
    try:
3047
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3048
    except:
3049
      # pylint: disable-msg=W0702
3050
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3051

    
3052
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3053
    msg = result.fail_msg
3054
    if msg:
3055
      self.LogWarning("Errors encountered on the remote node while leaving"
3056
                      " the cluster: %s", msg)
3057

    
3058
    # Remove node from our /etc/hosts
3059
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3060
      # FIXME: this should be done via an rpc call to node daemon
3061
      utils.RemoveHostFromEtcHosts(node.name)
3062
      _RedistributeAncillaryFiles(self)
3063

    
3064

    
3065
class LUQueryNodes(NoHooksLU):
3066
  """Logical unit for querying nodes.
3067

3068
  """
3069
  # pylint: disable-msg=W0142
3070
  _OP_REQP = ["output_fields", "names", "use_locking"]
3071
  REQ_BGL = False
3072

    
3073
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3074
                    "master_candidate", "offline", "drained"]
3075

    
3076
  _FIELDS_DYNAMIC = utils.FieldSet(
3077
    "dtotal", "dfree",
3078
    "mtotal", "mnode", "mfree",
3079
    "bootid",
3080
    "ctotal", "cnodes", "csockets",
3081
    )
3082

    
3083
  _FIELDS_STATIC = utils.FieldSet(*[
3084
    "pinst_cnt", "sinst_cnt",
3085
    "pinst_list", "sinst_list",
3086
    "pip", "sip", "tags",
3087
    "master",
3088
    "role"] + _SIMPLE_FIELDS
3089
    )
3090

    
3091
  def ExpandNames(self):
3092
    _CheckOutputFields(static=self._FIELDS_STATIC,
3093
                       dynamic=self._FIELDS_DYNAMIC,
3094
                       selected=self.op.output_fields)
3095

    
3096
    self.needed_locks = {}
3097
    self.share_locks[locking.LEVEL_NODE] = 1
3098

    
3099
    if self.op.names:
3100
      self.wanted = _GetWantedNodes(self, self.op.names)
3101
    else:
3102
      self.wanted = locking.ALL_SET
3103

    
3104
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3105
    self.do_locking = self.do_node_query and self.op.use_locking
3106
    if self.do_locking:
3107
      # if we don't request only static fields, we need to lock the nodes
3108
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
3109

    
3110
  def CheckPrereq(self):
3111
    """Check prerequisites.
3112

3113
    """
3114
    # The validation of the node list is done in the _GetWantedNodes,
3115
    # if non empty, and if empty, there's no validation to do
3116
    pass
3117

    
3118
  def Exec(self, feedback_fn):
3119
    """Computes the list of nodes and their attributes.
3120

3121
    """
3122
    all_info = self.cfg.GetAllNodesInfo()
3123
    if self.do_locking:
3124
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
3125
    elif self.wanted != locking.ALL_SET:
3126
      nodenames = self.wanted
3127
      missing = set(nodenames).difference(all_info.keys())
3128
      if missing:
3129
        raise errors.OpExecError(
3130
          "Some nodes were removed before retrieving their data: %s" % missing)
3131
    else:
3132
      nodenames = all_info.keys()
3133

    
3134
    nodenames = utils.NiceSort(nodenames)
3135
    nodelist = [all_info[name] for name in nodenames]
3136

    
3137
    # begin data gathering
3138

    
3139
    if self.do_node_query:
3140
      live_data = {}
3141
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3142
                                          self.cfg.GetHypervisorType())
3143
      for name in nodenames:
3144
        nodeinfo = node_data[name]
3145
        if not nodeinfo.fail_msg and nodeinfo.payload:
3146
          nodeinfo = nodeinfo.payload
3147
          fn = utils.TryConvert
3148
          live_data[name] = {
3149
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3150
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3151
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
3152
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3153
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3154
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3155
            "bootid": nodeinfo.get('bootid', None),
3156
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3157
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3158
            }
3159
        else:
3160
          live_data[name] = {}
3161
    else:
3162
      live_data = dict.fromkeys(nodenames, {})
3163

    
3164
    node_to_primary = dict([(name, set()) for name in nodenames])
3165
    node_to_secondary = dict([(name, set()) for name in nodenames])
3166

    
3167
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3168
                             "sinst_cnt", "sinst_list"))
3169
    if inst_fields & frozenset(self.op.output_fields):
3170
      inst_data = self.cfg.GetAllInstancesInfo()
3171

    
3172
      for inst in inst_data.values():
3173
        if inst.primary_node in node_to_primary:
3174
          node_to_primary[inst.primary_node].add(inst.name)
3175
        for secnode in inst.secondary_nodes:
3176
          if secnode in node_to_secondary:
3177
            node_to_secondary[secnode].add(inst.name)
3178

    
3179
    master_node = self.cfg.GetMasterNode()
3180

    
3181
    # end data gathering
3182

    
3183
    output = []
3184
    for node in nodelist:
3185
      node_output = []
3186
      for field in self.op.output_fields:
3187
        if field in self._SIMPLE_FIELDS:
3188
          val = getattr(node, field)
3189
        elif field == "pinst_list":
3190
          val = list(node_to_primary[node.name])
3191
        elif field == "sinst_list":
3192
          val = list(node_to_secondary[node.name])
3193
        elif field == "pinst_cnt":
3194
          val = len(node_to_primary[node.name])
3195
        elif field == "sinst_cnt":
3196
          val = len(node_to_secondary[node.name])
3197
        elif field == "pip":
3198
          val = node.primary_ip
3199
        elif field == "sip":
3200
          val = node.secondary_ip
3201
        elif field == "tags":
3202
          val = list(node.GetTags())
3203
        elif field == "master":
3204
          val = node.name == master_node
3205
        elif self._FIELDS_DYNAMIC.Matches(field):
3206
          val = live_data[node.name].get(field, None)
3207
        elif field == "role":
3208
          if node.name == master_node:
3209
            val = "M"
3210
          elif node.master_candidate:
3211
            val = "C"
3212
          elif node.drained:
3213
            val = "D"
3214
          elif node.offline:
3215
            val = "O"
3216
          else:
3217
            val = "R"
3218
        else:
3219
          raise errors.ParameterError(field)
3220
        node_output.append(val)
3221
      output.append(node_output)
3222

    
3223
    return output
3224

    
3225

    
3226
class LUQueryNodeVolumes(NoHooksLU):
3227
  """Logical unit for getting volumes on node(s).
3228

3229
  """
3230
  _OP_REQP = ["nodes", "output_fields"]
3231
  REQ_BGL = False
3232
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3233
  _FIELDS_STATIC = utils.FieldSet("node")
3234

    
3235
  def ExpandNames(self):
3236
    _CheckOutputFields(static=self._FIELDS_STATIC,
3237
                       dynamic=self._FIELDS_DYNAMIC,
3238
                       selected=self.op.output_fields)
3239

    
3240
    self.needed_locks = {}
3241
    self.share_locks[locking.LEVEL_NODE] = 1
3242
    if not self.op.nodes:
3243
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3244
    else:
3245
      self.needed_locks[locking.LEVEL_NODE] = \
3246
        _GetWantedNodes(self, self.op.nodes)
3247

    
3248
  def CheckPrereq(self):
3249
    """Check prerequisites.
3250

3251
    This checks that the fields required are valid output fields.
3252

3253
    """
3254
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3255

    
3256
  def Exec(self, feedback_fn):
3257
    """Computes the list of nodes and their attributes.
3258

3259
    """
3260
    nodenames = self.nodes
3261
    volumes = self.rpc.call_node_volumes(nodenames)
3262

    
3263
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3264
             in self.cfg.GetInstanceList()]
3265

    
3266
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3267

    
3268
    output = []
3269
    for node in nodenames:
3270
      nresult = volumes[node]
3271
      if nresult.offline:
3272
        continue
3273
      msg = nresult.fail_msg
3274
      if msg:
3275
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3276
        continue
3277

    
3278
      node_vols = nresult.payload[:]
3279
      node_vols.sort(key=lambda vol: vol['dev'])
3280

    
3281
      for vol in node_vols:
3282
        node_output = []
3283
        for field in self.op.output_fields:
3284
          if field == "node":
3285
            val = node
3286
          elif field == "phys":
3287
            val = vol['dev']
3288
          elif field == "vg":
3289
            val = vol['vg']
3290
          elif field == "name":
3291
            val = vol['name']
3292
          elif field == "size":
3293
            val = int(float(vol['size']))
3294
          elif field == "instance":
3295
            for inst in ilist:
3296
              if node not in lv_by_node[inst]:
3297
                continue
3298
              if vol['name'] in lv_by_node[inst][node]:
3299
                val = inst.name
3300
                break
3301
            else:
3302
              val = '-'
3303
          else:
3304
            raise errors.ParameterError(field)
3305
          node_output.append(str(val))
3306

    
3307
        output.append(node_output)
3308

    
3309
    return output
3310

    
3311

    
3312
class LUQueryNodeStorage(NoHooksLU):
3313
  """Logical unit for getting information on storage units on node(s).
3314

3315
  """
3316
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
3317
  REQ_BGL = False
3318
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3319

    
3320
  def CheckArguments(self):
3321
    _CheckStorageType(self.op.storage_type)
3322

    
3323
    _CheckOutputFields(static=self._FIELDS_STATIC,
3324
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3325
                       selected=self.op.output_fields)
3326

    
3327
  def ExpandNames(self):
3328
    self.needed_locks = {}
3329
    self.share_locks[locking.LEVEL_NODE] = 1
3330

    
3331
    if self.op.nodes:
3332
      self.needed_locks[locking.LEVEL_NODE] = \
3333
        _GetWantedNodes(self, self.op.nodes)
3334
    else:
3335
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3336

    
3337
  def CheckPrereq(self):
3338
    """Check prerequisites.
3339

3340
    This checks that the fields required are valid output fields.
3341

3342
    """
3343
    self.op.name = getattr(self.op, "name", None)
3344

    
3345
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3346

    
3347
  def Exec(self, feedback_fn):
3348
    """Computes the list of nodes and their attributes.
3349

3350
    """
3351
    # Always get name to sort by
3352
    if constants.SF_NAME in self.op.output_fields:
3353
      fields = self.op.output_fields[:]
3354
    else:
3355
      fields = [constants.SF_NAME] + self.op.output_fields
3356

    
3357
    # Never ask for node or type as it's only known to the LU
3358
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3359
      while extra in fields:
3360
        fields.remove(extra)
3361

    
3362
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3363
    name_idx = field_idx[constants.SF_NAME]
3364

    
3365
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3366
    data = self.rpc.call_storage_list(self.nodes,
3367
                                      self.op.storage_type, st_args,
3368
                                      self.op.name, fields)
3369

    
3370
    result = []
3371

    
3372
    for node in utils.NiceSort(self.nodes):
3373
      nresult = data[node]
3374
      if nresult.offline:
3375
        continue
3376

    
3377
      msg = nresult.fail_msg
3378
      if msg:
3379
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3380
        continue
3381

    
3382
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3383

    
3384
      for name in utils.NiceSort(rows.keys()):
3385
        row = rows[name]
3386

    
3387
        out = []
3388

    
3389
        for field in self.op.output_fields:
3390
          if field == constants.SF_NODE:
3391
            val = node
3392
          elif field == constants.SF_TYPE:
3393
            val = self.op.storage_type
3394
          elif field in field_idx:
3395
            val = row[field_idx[field]]
3396
          else:
3397
            raise errors.ParameterError(field)
3398

    
3399
          out.append(val)
3400

    
3401
        result.append(out)
3402

    
3403
    return result
3404

    
3405

    
3406
class LUModifyNodeStorage(NoHooksLU):
3407
  """Logical unit for modifying a storage volume on a node.
3408

3409
  """
3410
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3411
  REQ_BGL = False
3412

    
3413
  def CheckArguments(self):
3414
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3415

    
3416
    _CheckStorageType(self.op.storage_type)
3417

    
3418
  def ExpandNames(self):
3419
    self.needed_locks = {
3420
      locking.LEVEL_NODE: self.op.node_name,
3421
      }
3422

    
3423
  def CheckPrereq(self):
3424
    """Check prerequisites.
3425

3426
    """
3427
    storage_type = self.op.storage_type
3428

    
3429
    try:
3430
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3431
    except KeyError:
3432
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3433
                                 " modified" % storage_type,
3434
                                 errors.ECODE_INVAL)
3435

    
3436
    diff = set(self.op.changes.keys()) - modifiable
3437
    if diff:
3438
      raise errors.OpPrereqError("The following fields can not be modified for"
3439
                                 " storage units of type '%s': %r" %
3440
                                 (storage_type, list(diff)),
3441
                                 errors.ECODE_INVAL)
3442

    
3443
  def Exec(self, feedback_fn):
3444
    """Computes the list of nodes and their attributes.
3445

3446
    """
3447
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3448
    result = self.rpc.call_storage_modify(self.op.node_name,
3449
                                          self.op.storage_type, st_args,
3450
                                          self.op.name, self.op.changes)
3451
    result.Raise("Failed to modify storage unit '%s' on %s" %
3452
                 (self.op.name, self.op.node_name))
3453

    
3454

    
3455
class LUAddNode(LogicalUnit):
3456
  """Logical unit for adding node to the cluster.
3457

3458
  """
3459
  HPATH = "node-add"
3460
  HTYPE = constants.HTYPE_NODE
3461
  _OP_REQP = ["node_name"]
3462

    
3463
  def CheckArguments(self):
3464
    # validate/normalize the node name
3465
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3466

    
3467
  def BuildHooksEnv(self):
3468
    """Build hooks env.
3469

3470
    This will run on all nodes before, and on all nodes + the new node after.
3471

3472
    """
3473
    env = {
3474
      "OP_TARGET": self.op.node_name,
3475
      "NODE_NAME": self.op.node_name,
3476
      "NODE_PIP": self.op.primary_ip,
3477
      "NODE_SIP": self.op.secondary_ip,
3478
      }
3479
    nodes_0 = self.cfg.GetNodeList()
3480
    nodes_1 = nodes_0 + [self.op.node_name, ]
3481
    return env, nodes_0, nodes_1
3482

    
3483
  def CheckPrereq(self):
3484
    """Check prerequisites.
3485

3486
    This checks:
3487
     - the new node is not already in the config
3488
     - it is resolvable
3489
     - its parameters (single/dual homed) matches the cluster
3490

3491
    Any errors are signaled by raising errors.OpPrereqError.
3492

3493
    """
3494
    node_name = self.op.node_name
3495
    cfg = self.cfg
3496

    
3497
    dns_data = utils.GetHostInfo(node_name)
3498

    
3499
    node = dns_data.name
3500
    primary_ip = self.op.primary_ip = dns_data.ip
3501
    secondary_ip = getattr(self.op, "secondary_ip", None)
3502
    if secondary_ip is None:
3503
      secondary_ip = primary_ip
3504
    if not utils.IsValidIP(secondary_ip):
3505
      raise errors.OpPrereqError("Invalid secondary IP given",
3506
                                 errors.ECODE_INVAL)
3507
    self.op.secondary_ip = secondary_ip
3508

    
3509
    node_list = cfg.GetNodeList()
3510
    if not self.op.readd and node in node_list:
3511
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3512
                                 node, errors.ECODE_EXISTS)
3513
    elif self.op.readd and node not in node_list:
3514
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3515
                                 errors.ECODE_NOENT)
3516

    
3517
    self.changed_primary_ip = False
3518

    
3519
    for existing_node_name in node_list:
3520
      existing_node = cfg.GetNodeInfo(existing_node_name)
3521

    
3522
      if self.op.readd and node == existing_node_name:
3523
        if existing_node.secondary_ip != secondary_ip:
3524
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3525
                                     " address configuration as before",
3526
                                     errors.ECODE_INVAL)
3527
        if existing_node.primary_ip != primary_ip:
3528
          self.changed_primary_ip = True
3529

    
3530
        continue
3531

    
3532
      if (existing_node.primary_ip == primary_ip or
3533
          existing_node.secondary_ip == primary_ip or
3534
          existing_node.primary_ip == secondary_ip or
3535
          existing_node.secondary_ip == secondary_ip):
3536
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3537
                                   " existing node %s" % existing_node.name,
3538
                                   errors.ECODE_NOTUNIQUE)
3539

    
3540
    # check that the type of the node (single versus dual homed) is the
3541
    # same as for the master
3542
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3543
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3544
    newbie_singlehomed = secondary_ip == primary_ip
3545
    if master_singlehomed != newbie_singlehomed:
3546
      if master_singlehomed:
3547
        raise errors.OpPrereqError("The master has no private ip but the"
3548
                                   " new node has one",
3549
                                   errors.ECODE_INVAL)
3550
      else:
3551
        raise errors.OpPrereqError("The master has a private ip but the"
3552
                                   " new node doesn't have one",
3553
                                   errors.ECODE_INVAL)
3554

    
3555
    # checks reachability
3556
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3557
      raise errors.OpPrereqError("Node not reachable by ping",
3558
                                 errors.ECODE_ENVIRON)
3559

    
3560
    if not newbie_singlehomed:
3561
      # check reachability from my secondary ip to newbie's secondary ip
3562
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3563
                           source=myself.secondary_ip):
3564
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3565
                                   " based ping to noded port",
3566
                                   errors.ECODE_ENVIRON)
3567

    
3568
    if self.op.readd:
3569
      exceptions = [node]
3570
    else:
3571
      exceptions = []
3572

    
3573
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3574

    
3575
    if self.op.readd:
3576
      self.new_node = self.cfg.GetNodeInfo(node)
3577
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3578
    else:
3579
      self.new_node = objects.Node(name=node,
3580
                                   primary_ip=primary_ip,
3581
                                   secondary_ip=secondary_ip,
3582
                                   master_candidate=self.master_candidate,
3583
                                   offline=False, drained=False)
3584

    
3585
  def Exec(self, feedback_fn):
3586
    """Adds the new node to the cluster.
3587

3588
    """
3589
    new_node = self.new_node
3590
    node = new_node.name
3591

    
3592
    # for re-adds, reset the offline/drained/master-candidate flags;
3593
    # we need to reset here, otherwise offline would prevent RPC calls
3594
    # later in the procedure; this also means that if the re-add
3595
    # fails, we are left with a non-offlined, broken node
3596
    if self.op.readd:
3597
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3598
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3599
      # if we demote the node, we do cleanup later in the procedure
3600
      new_node.master_candidate = self.master_candidate
3601
      if self.changed_primary_ip:
3602
        new_node.primary_ip = self.op.primary_ip
3603

    
3604
    # notify the user about any possible mc promotion
3605
    if new_node.master_candidate:
3606
      self.LogInfo("Node will be a master candidate")
3607

    
3608
    # check connectivity
3609
    result = self.rpc.call_version([node])[node]
3610
    result.Raise("Can't get version information from node %s" % node)
3611
    if constants.PROTOCOL_VERSION == result.payload:
3612
      logging.info("Communication to node %s fine, sw version %s match",
3613
                   node, result.payload)
3614
    else:
3615
      raise errors.OpExecError("Version mismatch master version %s,"
3616
                               " node version %s" %
3617
                               (constants.PROTOCOL_VERSION, result.payload))
3618

    
3619
    # setup ssh on node
3620
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3621
      logging.info("Copy ssh key to node %s", node)
3622
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3623
      keyarray = []
3624
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3625
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3626
                  priv_key, pub_key]
3627

    
3628
      for i in keyfiles:
3629
        keyarray.append(utils.ReadFile(i))
3630

    
3631
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3632
                                      keyarray[2], keyarray[3], keyarray[4],
3633
                                      keyarray[5])
3634
      result.Raise("Cannot transfer ssh keys to the new node")
3635

    
3636
    # Add node to our /etc/hosts, and add key to known_hosts
3637
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3638
      # FIXME: this should be done via an rpc call to node daemon
3639
      utils.AddHostToEtcHosts(new_node.name)
3640

    
3641
    if new_node.secondary_ip != new_node.primary_ip:
3642
      result = self.rpc.call_node_has_ip_address(new_node.name,
3643
                                                 new_node.secondary_ip)
3644
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3645
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3646
      if not result.payload:
3647
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3648
                                 " you gave (%s). Please fix and re-run this"
3649
                                 " command." % new_node.secondary_ip)
3650

    
3651
    node_verify_list = [self.cfg.GetMasterNode()]
3652
    node_verify_param = {
3653
      constants.NV_NODELIST: [node],
3654
      # TODO: do a node-net-test as well?
3655
    }
3656

    
3657
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3658
                                       self.cfg.GetClusterName())
3659
    for verifier in node_verify_list:
3660
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3661
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3662
      if nl_payload:
3663
        for failed in nl_payload:
3664
          feedback_fn("ssh/hostname verification failed"
3665
                      " (checking from %s): %s" %
3666
                      (verifier, nl_payload[failed]))
3667
        raise errors.OpExecError("ssh/hostname verification failed.")
3668

    
3669
    if self.op.readd:
3670
      _RedistributeAncillaryFiles(self)
3671
      self.context.ReaddNode(new_node)
3672
      # make sure we redistribute the config
3673
      self.cfg.Update(new_node, feedback_fn)
3674
      # and make sure the new node will not have old files around
3675
      if not new_node.master_candidate:
3676
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3677
        msg = result.fail_msg
3678
        if msg:
3679
          self.LogWarning("Node failed to demote itself from master"
3680
                          " candidate status: %s" % msg)
3681
    else:
3682
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3683
      self.context.AddNode(new_node, self.proc.GetECId())
3684

    
3685

    
3686
class LUSetNodeParams(LogicalUnit):
3687
  """Modifies the parameters of a node.
3688

3689
  """
3690
  HPATH = "node-modify"
3691
  HTYPE = constants.HTYPE_NODE
3692
  _OP_REQP = ["node_name"]
3693
  REQ_BGL = False
3694

    
3695
  def CheckArguments(self):
3696
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3697
    _CheckBooleanOpField(self.op, 'master_candidate')
3698
    _CheckBooleanOpField(self.op, 'offline')
3699
    _CheckBooleanOpField(self.op, 'drained')
3700
    _CheckBooleanOpField(self.op, 'auto_promote')
3701
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3702
    if all_mods.count(None) == 3:
3703
      raise errors.OpPrereqError("Please pass at least one modification",
3704
                                 errors.ECODE_INVAL)
3705
    if all_mods.count(True) > 1:
3706
      raise errors.OpPrereqError("Can't set the node into more than one"
3707
                                 " state at the same time",
3708
                                 errors.ECODE_INVAL)
3709

    
3710
    # Boolean value that tells us whether we're offlining or draining the node
3711
    self.offline_or_drain = (self.op.offline == True or
3712
                             self.op.drained == True)
3713
    self.deoffline_or_drain = (self.op.offline == False or
3714
                               self.op.drained == False)
3715
    self.might_demote = (self.op.master_candidate == False or
3716
                         self.offline_or_drain)
3717

    
3718
    self.lock_all = self.op.auto_promote and self.might_demote
3719

    
3720

    
3721
  def ExpandNames(self):
3722
    if self.lock_all:
3723
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3724
    else:
3725
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3726

    
3727
  def BuildHooksEnv(self):
3728
    """Build hooks env.
3729

3730
    This runs on the master node.
3731

3732
    """
3733
    env = {
3734
      "OP_TARGET": self.op.node_name,
3735
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3736
      "OFFLINE": str(self.op.offline),
3737
      "DRAINED": str(self.op.drained),
3738
      }
3739
    nl = [self.cfg.GetMasterNode(),
3740
          self.op.node_name]
3741
    return env, nl, nl
3742

    
3743
  def CheckPrereq(self):
3744
    """Check prerequisites.
3745

3746
    This only checks the instance list against the existing names.
3747

3748
    """
3749
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3750

    
3751
    if (self.op.master_candidate is not None or
3752
        self.op.drained is not None or
3753
        self.op.offline is not None):
3754
      # we can't change the master's node flags
3755
      if self.op.node_name == self.cfg.GetMasterNode():
3756
        raise errors.OpPrereqError("The master role can be changed"
3757
                                   " only via masterfailover",
3758
                                   errors.ECODE_INVAL)
3759

    
3760

    
3761
    if node.master_candidate and self.might_demote and not self.lock_all:
3762
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3763
      # check if after removing the current node, we're missing master
3764
      # candidates
3765
      (mc_remaining, mc_should, _) = \
3766
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3767
      if mc_remaining < mc_should:
3768
        raise errors.OpPrereqError("Not enough master candidates, please"
3769
                                   " pass auto_promote to allow promotion",
3770
                                   errors.ECODE_INVAL)
3771

    
3772
    if (self.op.master_candidate == True and
3773
        ((node.offline and not self.op.offline == False) or
3774
         (node.drained and not self.op.drained == False))):
3775
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3776
                                 " to master_candidate" % node.name,
3777
                                 errors.ECODE_INVAL)
3778

    
3779
    # If we're being deofflined/drained, we'll MC ourself if needed
3780
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3781
        self.op.master_candidate == True and not node.master_candidate):
3782
      self.op.master_candidate = _DecideSelfPromotion(self)
3783
      if self.op.master_candidate:
3784
        self.LogInfo("Autopromoting node to master candidate")
3785

    
3786
    return
3787

    
3788
  def Exec(self, feedback_fn):
3789
    """Modifies a node.
3790

3791
    """
3792
    node = self.node
3793

    
3794
    result = []
3795
    changed_mc = False
3796

    
3797
    if self.op.offline is not None:
3798
      node.offline = self.op.offline
3799
      result.append(("offline", str(self.op.offline)))
3800
      if self.op.offline == True:
3801
        if node.master_candidate:
3802
          node.master_candidate = False
3803
          changed_mc = True
3804
          result.append(("master_candidate", "auto-demotion due to offline"))
3805
        if node.drained:
3806
          node.drained = False
3807
          result.append(("drained", "clear drained status due to offline"))
3808

    
3809
    if self.op.master_candidate is not None:
3810
      node.master_candidate = self.op.master_candidate
3811
      changed_mc = True
3812
      result.append(("master_candidate", str(self.op.master_candidate)))
3813
      if self.op.master_candidate == False:
3814
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3815
        msg = rrc.fail_msg
3816
        if msg:
3817
          self.LogWarning("Node failed to demote itself: %s" % msg)
3818

    
3819
    if self.op.drained is not None:
3820
      node.drained = self.op.drained
3821
      result.append(("drained", str(self.op.drained)))
3822
      if self.op.drained == True:
3823
        if node.master_candidate:
3824
          node.master_candidate = False
3825
          changed_mc = True
3826
          result.append(("master_candidate", "auto-demotion due to drain"))
3827
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3828
          msg = rrc.fail_msg
3829
          if msg:
3830
            self.LogWarning("Node failed to demote itself: %s" % msg)
3831
        if node.offline:
3832
          node.offline = False
3833
          result.append(("offline", "clear offline status due to drain"))
3834

    
3835
    # we locked all nodes, we adjust the CP before updating this node
3836
    if self.lock_all:
3837
      _AdjustCandidatePool(self, [node.name])
3838

    
3839
    # this will trigger configuration file update, if needed
3840
    self.cfg.Update(node, feedback_fn)
3841

    
3842
    # this will trigger job queue propagation or cleanup
3843
    if changed_mc:
3844
      self.context.ReaddNode(node)
3845

    
3846
    return result
3847

    
3848

    
3849
class LUPowercycleNode(NoHooksLU):
3850
  """Powercycles a node.
3851

3852
  """
3853
  _OP_REQP = ["node_name", "force"]
3854
  REQ_BGL = False
3855

    
3856
  def CheckArguments(self):
3857
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3858
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3859
      raise errors.OpPrereqError("The node is the master and the force"
3860
                                 " parameter was not set",
3861
                                 errors.ECODE_INVAL)
3862

    
3863
  def ExpandNames(self):
3864
    """Locking for PowercycleNode.
3865

3866
    This is a last-resort option and shouldn't block on other
3867
    jobs. Therefore, we grab no locks.
3868

3869
    """
3870
    self.needed_locks = {}
3871

    
3872
  def CheckPrereq(self):
3873
    """Check prerequisites.
3874

3875
    This LU has no prereqs.
3876

3877
    """
3878
    pass
3879

    
3880
  def Exec(self, feedback_fn):
3881
    """Reboots a node.
3882

3883
    """
3884
    result = self.rpc.call_node_powercycle(self.op.node_name,
3885
                                           self.cfg.GetHypervisorType())
3886
    result.Raise("Failed to schedule the reboot")
3887
    return result.payload
3888

    
3889

    
3890
class LUQueryClusterInfo(NoHooksLU):
3891
  """Query cluster configuration.
3892

3893
  """
3894
  _OP_REQP = []
3895
  REQ_BGL = False
3896

    
3897
  def ExpandNames(self):
3898
    self.needed_locks = {}
3899

    
3900
  def CheckPrereq(self):
3901
    """No prerequsites needed for this LU.
3902

3903
    """
3904
    pass
3905

    
3906
  def Exec(self, feedback_fn):
3907
    """Return cluster config.
3908

3909
    """
3910
    cluster = self.cfg.GetClusterInfo()
3911
    os_hvp = {}
3912

    
3913
    # Filter just for enabled hypervisors
3914
    for os_name, hv_dict in cluster.os_hvp.items():
3915
      os_hvp[os_name] = {}
3916
      for hv_name, hv_params in hv_dict.items():
3917
        if hv_name in cluster.enabled_hypervisors:
3918
          os_hvp[os_name][hv_name] = hv_params
3919

    
3920
    result = {
3921
      "software_version": constants.RELEASE_VERSION,
3922
      "protocol_version": constants.PROTOCOL_VERSION,
3923
      "config_version": constants.CONFIG_VERSION,
3924
      "os_api_version": max(constants.OS_API_VERSIONS),
3925
      "export_version": constants.EXPORT_VERSION,
3926
      "architecture": (platform.architecture()[0], platform.machine()),
3927
      "name": cluster.cluster_name,
3928
      "master": cluster.master_node,
3929
      "default_hypervisor": cluster.enabled_hypervisors[0],
3930
      "enabled_hypervisors": cluster.enabled_hypervisors,
3931
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3932
                        for hypervisor_name in cluster.enabled_hypervisors]),
3933
      "os_hvp": os_hvp,
3934
      "beparams": cluster.beparams,
3935
      "osparams": cluster.osparams,
3936
      "nicparams": cluster.nicparams,
3937
      "candidate_pool_size": cluster.candidate_pool_size,
3938
      "master_netdev": cluster.master_netdev,
3939
      "volume_group_name": cluster.volume_group_name,
3940
      "file_storage_dir": cluster.file_storage_dir,
3941
      "maintain_node_health": cluster.maintain_node_health,
3942
      "ctime": cluster.ctime,
3943
      "mtime": cluster.mtime,
3944
      "uuid": cluster.uuid,
3945
      "tags": list(cluster.GetTags()),
3946
      "uid_pool": cluster.uid_pool,
3947
      }
3948

    
3949
    return result
3950

    
3951

    
3952
class LUQueryConfigValues(NoHooksLU):
3953
  """Return configuration values.
3954

3955
  """
3956
  _OP_REQP = []
3957
  REQ_BGL = False
3958
  _FIELDS_DYNAMIC = utils.FieldSet()
3959
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3960
                                  "watcher_pause")
3961

    
3962
  def ExpandNames(self):
3963
    self.needed_locks = {}
3964

    
3965
    _CheckOutputFields(static=self._FIELDS_STATIC,
3966
                       dynamic=self._FIELDS_DYNAMIC,
3967
                       selected=self.op.output_fields)
3968

    
3969
  def CheckPrereq(self):
3970
    """No prerequisites.
3971

3972
    """
3973
    pass
3974

    
3975
  def Exec(self, feedback_fn):
3976
    """Dump a representation of the cluster config to the standard output.
3977

3978
    """
3979
    values = []
3980
    for field in self.op.output_fields:
3981
      if field == "cluster_name":
3982
        entry = self.cfg.GetClusterName()
3983
      elif field == "master_node":
3984
        entry = self.cfg.GetMasterNode()
3985
      elif field == "drain_flag":
3986
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3987
      elif field == "watcher_pause":
3988
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3989
      else:
3990
        raise errors.ParameterError(field)
3991
      values.append(entry)
3992
    return values
3993

    
3994

    
3995
class LUActivateInstanceDisks(NoHooksLU):
3996
  """Bring up an instance's disks.
3997

3998
  """
3999
  _OP_REQP = ["instance_name"]
4000
  REQ_BGL = False
4001

    
4002
  def ExpandNames(self):
4003
    self._ExpandAndLockInstance()
4004
    self.needed_locks[locking.LEVEL_NODE] = []
4005
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4006

    
4007
  def DeclareLocks(self, level):
4008
    if level == locking.LEVEL_NODE:
4009
      self._LockInstancesNodes()
4010

    
4011
  def CheckPrereq(self):
4012
    """Check prerequisites.
4013

4014
    This checks that the instance is in the cluster.
4015

4016
    """
4017
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4018
    assert self.instance is not None, \
4019
      "Cannot retrieve locked instance %s" % self.op.instance_name
4020
    _CheckNodeOnline(self, self.instance.primary_node)
4021
    if not hasattr(self.op, "ignore_size"):
4022
      self.op.ignore_size = False
4023

    
4024
  def Exec(self, feedback_fn):
4025
    """Activate the disks.
4026

4027
    """
4028
    disks_ok, disks_info = \
4029
              _AssembleInstanceDisks(self, self.instance,
4030
                                     ignore_size=self.op.ignore_size)
4031
    if not disks_ok:
4032
      raise errors.OpExecError("Cannot activate block devices")
4033

    
4034
    return disks_info
4035

    
4036

    
4037
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4038
                           ignore_size=False):
4039
  """Prepare the block devices for an instance.
4040

4041
  This sets up the block devices on all nodes.
4042

4043
  @type lu: L{LogicalUnit}
4044
  @param lu: the logical unit on whose behalf we execute
4045
  @type instance: L{objects.Instance}
4046
  @param instance: the instance for whose disks we assemble
4047
  @type disks: list of L{objects.Disk} or None
4048
  @param disks: which disks to assemble (or all, if None)
4049
  @type ignore_secondaries: boolean
4050
  @param ignore_secondaries: if true, errors on secondary nodes
4051
      won't result in an error return from the function
4052
  @type ignore_size: boolean
4053
  @param ignore_size: if true, the current known size of the disk
4054
      will not be used during the disk activation, useful for cases
4055
      when the size is wrong
4056
  @return: False if the operation failed, otherwise a list of
4057
      (host, instance_visible_name, node_visible_name)
4058
      with the mapping from node devices to instance devices
4059

4060
  """
4061
  device_info = []
4062
  disks_ok = True
4063
  iname = instance.name
4064
  disks = _ExpandCheckDisks(instance, disks)
4065

    
4066
  # With the two passes mechanism we try to reduce the window of
4067
  # opportunity for the race condition of switching DRBD to primary
4068
  # before handshaking occured, but we do not eliminate it
4069

    
4070
  # The proper fix would be to wait (with some limits) until the
4071
  # connection has been made and drbd transitions from WFConnection
4072
  # into any other network-connected state (Connected, SyncTarget,
4073
  # SyncSource, etc.)
4074

    
4075
  # 1st pass, assemble on all nodes in secondary mode
4076
  for inst_disk in disks:
4077
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4078
      if ignore_size:
4079
        node_disk = node_disk.Copy()
4080
        node_disk.UnsetSize()
4081
      lu.cfg.SetDiskID(node_disk, node)
4082
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4083
      msg = result.fail_msg
4084
      if msg:
4085
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4086
                           " (is_primary=False, pass=1): %s",
4087
                           inst_disk.iv_name, node, msg)
4088
        if not ignore_secondaries:
4089
          disks_ok = False
4090

    
4091
  # FIXME: race condition on drbd migration to primary
4092

    
4093
  # 2nd pass, do only the primary node
4094
  for inst_disk in disks:
4095
    dev_path = None
4096

    
4097
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4098
      if node != instance.primary_node:
4099
        continue
4100
      if ignore_size:
4101
        node_disk = node_disk.Copy()
4102
        node_disk.UnsetSize()
4103
      lu.cfg.SetDiskID(node_disk, node)
4104
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4105
      msg = result.fail_msg
4106
      if msg:
4107
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4108
                           " (is_primary=True, pass=2): %s",
4109
                           inst_disk.iv_name, node, msg)
4110
        disks_ok = False
4111
      else:
4112
        dev_path = result.payload
4113

    
4114
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4115

    
4116
  # leave the disks configured for the primary node
4117
  # this is a workaround that would be fixed better by
4118
  # improving the logical/physical id handling
4119
  for disk in disks:
4120
    lu.cfg.SetDiskID(disk, instance.primary_node)
4121

    
4122
  return disks_ok, device_info
4123

    
4124

    
4125
def _StartInstanceDisks(lu, instance, force):
4126
  """Start the disks of an instance.
4127

4128
  """
4129
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4130
                                           ignore_secondaries=force)
4131
  if not disks_ok:
4132
    _ShutdownInstanceDisks(lu, instance)
4133
    if force is not None and not force:
4134
      lu.proc.LogWarning("", hint="If the message above refers to a"
4135
                         " secondary node,"
4136
                         " you can retry the operation using '--force'.")
4137
    raise errors.OpExecError("Disk consistency error")
4138

    
4139

    
4140
class LUDeactivateInstanceDisks(NoHooksLU):
4141
  """Shutdown an instance's disks.
4142

4143
  """
4144
  _OP_REQP = ["instance_name"]
4145
  REQ_BGL = False
4146

    
4147
  def ExpandNames(self):
4148
    self._ExpandAndLockInstance()
4149
    self.needed_locks[locking.LEVEL_NODE] = []
4150
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4151

    
4152
  def DeclareLocks(self, level):
4153
    if level == locking.LEVEL_NODE:
4154
      self._LockInstancesNodes()
4155

    
4156
  def CheckPrereq(self):
4157
    """Check prerequisites.
4158

4159
    This checks that the instance is in the cluster.
4160

4161
    """
4162
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4163
    assert self.instance is not None, \
4164
      "Cannot retrieve locked instance %s" % self.op.instance_name
4165

    
4166
  def Exec(self, feedback_fn):
4167
    """Deactivate the disks
4168

4169
    """
4170
    instance = self.instance
4171
    _SafeShutdownInstanceDisks(self, instance)
4172

    
4173

    
4174
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4175
  """Shutdown block devices of an instance.
4176

4177
  This function checks if an instance is running, before calling
4178
  _ShutdownInstanceDisks.
4179

4180
  """
4181
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4182
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4183

    
4184

    
4185
def _ExpandCheckDisks(instance, disks):
4186
  """Return the instance disks selected by the disks list
4187

4188
  @type disks: list of L{objects.Disk} or None
4189
  @param disks: selected disks
4190
  @rtype: list of L{objects.Disk}
4191
  @return: selected instance disks to act on
4192

4193
  """
4194
  if disks is None:
4195
    return instance.disks
4196
  else:
4197
    if not set(disks).issubset(instance.disks):
4198
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4199
                                   " target instance")
4200
    return disks
4201

    
4202

    
4203
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4204
  """Shutdown block devices of an instance.
4205

4206
  This does the shutdown on all nodes of the instance.
4207

4208
  If the ignore_primary is false, errors on the primary node are
4209
  ignored.
4210

4211
  """
4212
  all_result = True
4213
  disks = _ExpandCheckDisks(instance, disks)
4214

    
4215
  for disk in disks:
4216
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4217
      lu.cfg.SetDiskID(top_disk, node)
4218
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4219
      msg = result.fail_msg
4220
      if msg:
4221
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4222
                      disk.iv_name, node, msg)
4223
        if not ignore_primary or node != instance.primary_node:
4224
          all_result = False
4225
  return all_result
4226

    
4227

    
4228
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4229
  """Checks if a node has enough free memory.
4230

4231
  This function check if a given node has the needed amount of free
4232
  memory. In case the node has less memory or we cannot get the
4233
  information from the node, this function raise an OpPrereqError
4234
  exception.
4235

4236
  @type lu: C{LogicalUnit}
4237
  @param lu: a logical unit from which we get configuration data
4238
  @type node: C{str}
4239
  @param node: the node to check
4240
  @type reason: C{str}
4241
  @param reason: string to use in the error message
4242
  @type requested: C{int}
4243
  @param requested: the amount of memory in MiB to check for
4244
  @type hypervisor_name: C{str}
4245
  @param hypervisor_name: the hypervisor to ask for memory stats
4246
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4247
      we cannot check the node
4248

4249
  """
4250
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4251
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4252
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4253
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4254
  if not isinstance(free_mem, int):
4255
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4256
                               " was '%s'" % (node, free_mem),
4257
                               errors.ECODE_ENVIRON)
4258
  if requested > free_mem:
4259
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4260
                               " needed %s MiB, available %s MiB" %
4261
                               (node, reason, requested, free_mem),
4262
                               errors.ECODE_NORES)
4263

    
4264

    
4265
def _CheckNodesFreeDisk(lu, nodenames, requested):
4266
  """Checks if nodes have enough free disk space in the default VG.
4267

4268
  This function check if all given nodes have the needed amount of
4269
  free disk. In case any node has less disk or we cannot get the
4270
  information from the node, this function raise an OpPrereqError
4271
  exception.
4272

4273
  @type lu: C{LogicalUnit}
4274
  @param lu: a logical unit from which we get configuration data
4275
  @type nodenames: C{list}
4276
  @param nodenames: the list of node names to check
4277
  @type requested: C{int}
4278
  @param requested: the amount of disk in MiB to check for
4279
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4280
      we cannot check the node
4281

4282
  """
4283
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4284
                                   lu.cfg.GetHypervisorType())
4285
  for node in nodenames:
4286
    info = nodeinfo[node]
4287
    info.Raise("Cannot get current information from node %s" % node,
4288
               prereq=True, ecode=errors.ECODE_ENVIRON)
4289
    vg_free = info.payload.get("vg_free", None)
4290
    if not isinstance(vg_free, int):
4291
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4292
                                 " result was '%s'" % (node, vg_free),
4293
                                 errors.ECODE_ENVIRON)
4294
    if requested > vg_free:
4295
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4296
                                 " required %d MiB, available %d MiB" %
4297
                                 (node, requested, vg_free),
4298
                                 errors.ECODE_NORES)
4299

    
4300

    
4301
class LUStartupInstance(LogicalUnit):
4302
  """Starts an instance.
4303

4304
  """
4305
  HPATH = "instance-start"
4306
  HTYPE = constants.HTYPE_INSTANCE
4307
  _OP_REQP = ["instance_name", "force"]
4308
  REQ_BGL = False
4309

    
4310
  def ExpandNames(self):
4311
    self._ExpandAndLockInstance()
4312

    
4313
  def BuildHooksEnv(self):
4314
    """Build hooks env.
4315

4316
    This runs on master, primary and secondary nodes of the instance.
4317

4318
    """
4319
    env = {
4320
      "FORCE": self.op.force,
4321
      }
4322
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4323
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4324
    return env, nl, nl
4325

    
4326
  def CheckPrereq(self):
4327
    """Check prerequisites.
4328

4329
    This checks that the instance is in the cluster.
4330

4331
    """
4332
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4333
    assert self.instance is not None, \
4334
      "Cannot retrieve locked instance %s" % self.op.instance_name
4335

    
4336
    # extra beparams
4337
    self.beparams = getattr(self.op, "beparams", {})
4338
    if self.beparams:
4339
      if not isinstance(self.beparams, dict):
4340
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4341
                                   " dict" % (type(self.beparams), ),
4342
                                   errors.ECODE_INVAL)
4343
      # fill the beparams dict
4344
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4345
      self.op.beparams = self.beparams
4346

    
4347
    # extra hvparams
4348
    self.hvparams = getattr(self.op, "hvparams", {})
4349
    if self.hvparams:
4350
      if not isinstance(self.hvparams, dict):
4351
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4352
                                   " dict" % (type(self.hvparams), ),
4353
                                   errors.ECODE_INVAL)
4354

    
4355
      # check hypervisor parameter syntax (locally)
4356
      cluster = self.cfg.GetClusterInfo()
4357
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4358
      filled_hvp = cluster.FillHV(instance)
4359
      filled_hvp.update(self.hvparams)
4360
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4361
      hv_type.CheckParameterSyntax(filled_hvp)
4362
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4363
      self.op.hvparams = self.hvparams
4364

    
4365
    _CheckNodeOnline(self, instance.primary_node)
4366

    
4367
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4368
    # check bridges existence
4369
    _CheckInstanceBridgesExist(self, instance)
4370

    
4371
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4372
                                              instance.name,
4373
                                              instance.hypervisor)
4374
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4375
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4376
    if not remote_info.payload: # not running already
4377
      _CheckNodeFreeMemory(self, instance.primary_node,
4378
                           "starting instance %s" % instance.name,
4379
                           bep[constants.BE_MEMORY], instance.hypervisor)
4380

    
4381
  def Exec(self, feedback_fn):
4382
    """Start the instance.
4383

4384
    """
4385
    instance = self.instance
4386
    force = self.op.force
4387

    
4388
    self.cfg.MarkInstanceUp(instance.name)
4389

    
4390
    node_current = instance.primary_node
4391

    
4392
    _StartInstanceDisks(self, instance, force)
4393

    
4394
    result = self.rpc.call_instance_start(node_current, instance,
4395
                                          self.hvparams, self.beparams)
4396
    msg = result.fail_msg
4397
    if msg:
4398
      _ShutdownInstanceDisks(self, instance)
4399
      raise errors.OpExecError("Could not start instance: %s" % msg)
4400

    
4401

    
4402
class LURebootInstance(LogicalUnit):
4403
  """Reboot an instance.
4404

4405
  """
4406
  HPATH = "instance-reboot"
4407
  HTYPE = constants.HTYPE_INSTANCE
4408
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4409
  REQ_BGL = False
4410

    
4411
  def CheckArguments(self):
4412
    """Check the arguments.
4413

4414
    """
4415
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4416
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4417

    
4418
  def ExpandNames(self):
4419
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4420
                                   constants.INSTANCE_REBOOT_HARD,
4421
                                   constants.INSTANCE_REBOOT_FULL]:
4422
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4423
                                  (constants.INSTANCE_REBOOT_SOFT,
4424
                                   constants.INSTANCE_REBOOT_HARD,
4425
                                   constants.INSTANCE_REBOOT_FULL))
4426
    self._ExpandAndLockInstance()
4427

    
4428
  def BuildHooksEnv(self):
4429
    """Build hooks env.
4430

4431
    This runs on master, primary and secondary nodes of the instance.
4432

4433
    """
4434
    env = {
4435
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4436
      "REBOOT_TYPE": self.op.reboot_type,
4437
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4438
      }
4439
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4440
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4441
    return env, nl, nl
4442

    
4443
  def CheckPrereq(self):
4444
    """Check prerequisites.
4445

4446
    This checks that the instance is in the cluster.
4447

4448
    """
4449
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4450
    assert self.instance is not None, \
4451
      "Cannot retrieve locked instance %s" % self.op.instance_name
4452

    
4453
    _CheckNodeOnline(self, instance.primary_node)
4454

    
4455
    # check bridges existence
4456
    _CheckInstanceBridgesExist(self, instance)
4457

    
4458
  def Exec(self, feedback_fn):
4459
    """Reboot the instance.
4460

4461
    """
4462
    instance = self.instance
4463
    ignore_secondaries = self.op.ignore_secondaries
4464
    reboot_type = self.op.reboot_type
4465

    
4466
    node_current = instance.primary_node
4467

    
4468
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4469
                       constants.INSTANCE_REBOOT_HARD]:
4470
      for disk in instance.disks:
4471
        self.cfg.SetDiskID(disk, node_current)
4472
      result = self.rpc.call_instance_reboot(node_current, instance,
4473
                                             reboot_type,
4474
                                             self.shutdown_timeout)
4475
      result.Raise("Could not reboot instance")
4476
    else:
4477
      result = self.rpc.call_instance_shutdown(node_current, instance,
4478
                                               self.shutdown_timeout)
4479
      result.Raise("Could not shutdown instance for full reboot")
4480
      _ShutdownInstanceDisks(self, instance)
4481
      _StartInstanceDisks(self, instance, ignore_secondaries)
4482
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4483
      msg = result.fail_msg
4484
      if msg:
4485
        _ShutdownInstanceDisks(self, instance)
4486
        raise errors.OpExecError("Could not start instance for"
4487
                                 " full reboot: %s" % msg)
4488

    
4489
    self.cfg.MarkInstanceUp(instance.name)
4490

    
4491

    
4492
class LUShutdownInstance(LogicalUnit):
4493
  """Shutdown an instance.
4494

4495
  """
4496
  HPATH = "instance-stop"
4497
  HTYPE = constants.HTYPE_INSTANCE
4498
  _OP_REQP = ["instance_name"]
4499
  REQ_BGL = False
4500

    
4501
  def CheckArguments(self):
4502
    """Check the arguments.
4503

4504
    """
4505
    self.timeout = getattr(self.op, "timeout",
4506
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
4507

    
4508
  def ExpandNames(self):
4509
    self._ExpandAndLockInstance()
4510

    
4511
  def BuildHooksEnv(self):
4512
    """Build hooks env.
4513

4514
    This runs on master, primary and secondary nodes of the instance.
4515

4516
    """
4517
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4518
    env["TIMEOUT"] = self.timeout
4519
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4520
    return env, nl, nl
4521

    
4522
  def CheckPrereq(self):
4523
    """Check prerequisites.
4524

4525
    This checks that the instance is in the cluster.
4526

4527
    """
4528
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4529
    assert self.instance is not None, \
4530
      "Cannot retrieve locked instance %s" % self.op.instance_name
4531
    _CheckNodeOnline(self, self.instance.primary_node)
4532

    
4533
  def Exec(self, feedback_fn):
4534
    """Shutdown the instance.
4535

4536
    """
4537
    instance = self.instance
4538
    node_current = instance.primary_node
4539
    timeout = self.timeout
4540
    self.cfg.MarkInstanceDown(instance.name)
4541
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4542
    msg = result.fail_msg
4543
    if msg:
4544
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4545

    
4546
    _ShutdownInstanceDisks(self, instance)
4547

    
4548

    
4549
class LUReinstallInstance(LogicalUnit):
4550
  """Reinstall an instance.
4551

4552
  """
4553
  HPATH = "instance-reinstall"
4554
  HTYPE = constants.HTYPE_INSTANCE
4555
  _OP_REQP = ["instance_name"]
4556
  REQ_BGL = False
4557

    
4558
  def ExpandNames(self):
4559
    self._ExpandAndLockInstance()
4560

    
4561
  def BuildHooksEnv(self):
4562
    """Build hooks env.
4563

4564
    This runs on master, primary and secondary nodes of the instance.
4565

4566
    """
4567
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4568
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4569
    return env, nl, nl
4570

    
4571
  def CheckPrereq(self):
4572
    """Check prerequisites.
4573

4574
    This checks that the instance is in the cluster and is not running.
4575

4576
    """
4577
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4578
    assert instance is not None, \
4579
      "Cannot retrieve locked instance %s" % self.op.instance_name
4580
    _CheckNodeOnline(self, instance.primary_node)
4581

    
4582
    if instance.disk_template == constants.DT_DISKLESS:
4583
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4584
                                 self.op.instance_name,
4585
                                 errors.ECODE_INVAL)
4586
    _CheckInstanceDown(self, instance, "cannot reinstall")
4587

    
4588
    self.op.os_type = getattr(self.op, "os_type", None)
4589
    self.op.force_variant = getattr(self.op, "force_variant", False)
4590
    if self.op.os_type is not None:
4591
      # OS verification
4592
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4593
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4594

    
4595
    self.instance = instance
4596

    
4597
  def Exec(self, feedback_fn):
4598
    """Reinstall the instance.
4599

4600
    """
4601
    inst = self.instance
4602

    
4603
    if self.op.os_type is not None:
4604
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4605
      inst.os = self.op.os_type
4606
      self.cfg.Update(inst, feedback_fn)
4607

    
4608
    _StartInstanceDisks(self, inst, None)
4609
    try:
4610
      feedback_fn("Running the instance OS create scripts...")
4611
      # FIXME: pass debug option from opcode to backend
4612
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4613
                                             self.op.debug_level)
4614
      result.Raise("Could not install OS for instance %s on node %s" %
4615
                   (inst.name, inst.primary_node))
4616
    finally:
4617
      _ShutdownInstanceDisks(self, inst)
4618

    
4619

    
4620
class LURecreateInstanceDisks(LogicalUnit):
4621
  """Recreate an instance's missing disks.
4622

4623
  """
4624
  HPATH = "instance-recreate-disks"
4625
  HTYPE = constants.HTYPE_INSTANCE
4626
  _OP_REQP = ["instance_name", "disks"]
4627
  REQ_BGL = False
4628

    
4629
  def CheckArguments(self):
4630
    """Check the arguments.
4631

4632
    """
4633
    if not isinstance(self.op.disks, list):
4634
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4635
    for item in self.op.disks:
4636
      if (not isinstance(item, int) or
4637
          item < 0):
4638
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4639
                                   str(item), errors.ECODE_INVAL)
4640

    
4641
  def ExpandNames(self):
4642
    self._ExpandAndLockInstance()
4643

    
4644
  def BuildHooksEnv(self):
4645
    """Build hooks env.
4646

4647
    This runs on master, primary and secondary nodes of the instance.
4648

4649
    """
4650
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4651
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4652
    return env, nl, nl
4653

    
4654
  def CheckPrereq(self):
4655
    """Check prerequisites.
4656

4657
    This checks that the instance is in the cluster and is not running.
4658

4659
    """
4660
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4661
    assert instance is not None, \
4662
      "Cannot retrieve locked instance %s" % self.op.instance_name
4663
    _CheckNodeOnline(self, instance.primary_node)
4664

    
4665
    if instance.disk_template == constants.DT_DISKLESS:
4666
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4667
                                 self.op.instance_name, errors.ECODE_INVAL)
4668
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4669

    
4670
    if not self.op.disks:
4671
      self.op.disks = range(len(instance.disks))
4672
    else:
4673
      for idx in self.op.disks:
4674
        if idx >= len(instance.disks):
4675
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4676
                                     errors.ECODE_INVAL)
4677

    
4678
    self.instance = instance
4679

    
4680
  def Exec(self, feedback_fn):
4681
    """Recreate the disks.
4682

4683
    """
4684
    to_skip = []
4685
    for idx, _ in enumerate(self.instance.disks):
4686
      if idx not in self.op.disks: # disk idx has not been passed in
4687
        to_skip.append(idx)
4688
        continue
4689

    
4690
    _CreateDisks(self, self.instance, to_skip=to_skip)
4691

    
4692

    
4693
class LURenameInstance(LogicalUnit):
4694
  """Rename an instance.
4695

4696
  """
4697
  HPATH = "instance-rename"
4698
  HTYPE = constants.HTYPE_INSTANCE
4699
  _OP_REQP = ["instance_name", "new_name"]
4700

    
4701
  def BuildHooksEnv(self):
4702
    """Build hooks env.
4703

4704
    This runs on master, primary and secondary nodes of the instance.
4705

4706
    """
4707
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4708
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4709
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4710
    return env, nl, nl
4711

    
4712
  def CheckPrereq(self):
4713
    """Check prerequisites.
4714

4715
    This checks that the instance is in the cluster and is not running.
4716

4717
    """
4718
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4719
                                                self.op.instance_name)
4720
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4721
    assert instance is not None
4722
    _CheckNodeOnline(self, instance.primary_node)
4723
    _CheckInstanceDown(self, instance, "cannot rename")
4724
    self.instance = instance
4725

    
4726
    # new name verification
4727
    name_info = utils.GetHostInfo(self.op.new_name)
4728

    
4729
    self.op.new_name = new_name = name_info.name
4730
    instance_list = self.cfg.GetInstanceList()
4731
    if new_name in instance_list:
4732
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4733
                                 new_name, errors.ECODE_EXISTS)
4734

    
4735
    if not getattr(self.op, "ignore_ip", False):
4736
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4737
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4738
                                   (name_info.ip, new_name),
4739
                                   errors.ECODE_NOTUNIQUE)
4740

    
4741

    
4742
  def Exec(self, feedback_fn):
4743
    """Reinstall the instance.
4744

4745
    """
4746
    inst = self.instance
4747
    old_name = inst.name
4748

    
4749
    if inst.disk_template == constants.DT_FILE:
4750
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4751

    
4752
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4753
    # Change the instance lock. This is definitely safe while we hold the BGL
4754
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4755
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4756

    
4757
    # re-read the instance from the configuration after rename
4758
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4759

    
4760
    if inst.disk_template == constants.DT_FILE:
4761
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4762
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4763
                                                     old_file_storage_dir,
4764
                                                     new_file_storage_dir)
4765
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4766
                   " (but the instance has been renamed in Ganeti)" %
4767
                   (inst.primary_node, old_file_storage_dir,
4768
                    new_file_storage_dir))
4769

    
4770
    _StartInstanceDisks(self, inst, None)
4771
    try:
4772
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4773
                                                 old_name, self.op.debug_level)
4774
      msg = result.fail_msg
4775
      if msg:
4776
        msg = ("Could not run OS rename script for instance %s on node %s"
4777
               " (but the instance has been renamed in Ganeti): %s" %
4778
               (inst.name, inst.primary_node, msg))
4779
        self.proc.LogWarning(msg)
4780
    finally:
4781
      _ShutdownInstanceDisks(self, inst)
4782

    
4783

    
4784
class LURemoveInstance(LogicalUnit):
4785
  """Remove an instance.
4786

4787
  """
4788
  HPATH = "instance-remove"
4789
  HTYPE = constants.HTYPE_INSTANCE
4790
  _OP_REQP = ["instance_name", "ignore_failures"]
4791
  REQ_BGL = False
4792

    
4793
  def CheckArguments(self):
4794
    """Check the arguments.
4795

4796
    """
4797
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4798
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4799

    
4800
  def ExpandNames(self):
4801
    self._ExpandAndLockInstance()
4802
    self.needed_locks[locking.LEVEL_NODE] = []
4803
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4804

    
4805
  def DeclareLocks(self, level):
4806
    if level == locking.LEVEL_NODE:
4807
      self._LockInstancesNodes()
4808

    
4809
  def BuildHooksEnv(self):
4810
    """Build hooks env.
4811

4812
    This runs on master, primary and secondary nodes of the instance.
4813

4814
    """
4815
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4816
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4817
    nl = [self.cfg.GetMasterNode()]
4818
    nl_post = list(self.instance.all_nodes) + nl
4819
    return env, nl, nl_post
4820

    
4821
  def CheckPrereq(self):
4822
    """Check prerequisites.
4823

4824
    This checks that the instance is in the cluster.
4825

4826
    """
4827
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4828
    assert self.instance is not None, \
4829
      "Cannot retrieve locked instance %s" % self.op.instance_name
4830

    
4831
  def Exec(self, feedback_fn):
4832
    """Remove the instance.
4833

4834
    """
4835
    instance = self.instance
4836
    logging.info("Shutting down instance %s on node %s",
4837
                 instance.name, instance.primary_node)
4838

    
4839
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4840
                                             self.shutdown_timeout)
4841
    msg = result.fail_msg
4842
    if msg:
4843
      if self.op.ignore_failures:
4844
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4845
      else:
4846
        raise errors.OpExecError("Could not shutdown instance %s on"
4847
                                 " node %s: %s" %
4848
                                 (instance.name, instance.primary_node, msg))
4849

    
4850
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4851

    
4852

    
4853
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4854
  """Utility function to remove an instance.
4855

4856
  """
4857
  logging.info("Removing block devices for instance %s", instance.name)
4858

    
4859
  if not _RemoveDisks(lu, instance):
4860
    if not ignore_failures:
4861
      raise errors.OpExecError("Can't remove instance's disks")
4862
    feedback_fn("Warning: can't remove instance's disks")
4863

    
4864
  logging.info("Removing instance %s out of cluster config", instance.name)
4865

    
4866
  lu.cfg.RemoveInstance(instance.name)
4867

    
4868
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4869
    "Instance lock removal conflict"
4870

    
4871
  # Remove lock for the instance
4872
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4873

    
4874

    
4875
class LUQueryInstances(NoHooksLU):
4876
  """Logical unit for querying instances.
4877

4878
  """
4879
  # pylint: disable-msg=W0142
4880
  _OP_REQP = ["output_fields", "names", "use_locking"]
4881
  REQ_BGL = False
4882
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4883
                    "serial_no", "ctime", "mtime", "uuid"]
4884
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4885
                                    "admin_state",
4886
                                    "disk_template", "ip", "mac", "bridge",
4887
                                    "nic_mode", "nic_link",
4888
                                    "sda_size", "sdb_size", "vcpus", "tags",
4889
                                    "network_port", "beparams",
4890
                                    r"(disk)\.(size)/([0-9]+)",
4891
                                    r"(disk)\.(sizes)", "disk_usage",
4892
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4893
                                    r"(nic)\.(bridge)/([0-9]+)",
4894
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4895
                                    r"(disk|nic)\.(count)",
4896
                                    "hvparams",
4897
                                    ] + _SIMPLE_FIELDS +
4898
                                  ["hv/%s" % name
4899
                                   for name in constants.HVS_PARAMETERS
4900
                                   if name not in constants.HVC_GLOBALS] +
4901
                                  ["be/%s" % name
4902
                                   for name in constants.BES_PARAMETERS])
4903
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4904

    
4905

    
4906
  def ExpandNames(self):
4907
    _CheckOutputFields(static=self._FIELDS_STATIC,
4908
                       dynamic=self._FIELDS_DYNAMIC,
4909
                       selected=self.op.output_fields)
4910

    
4911
    self.needed_locks = {}
4912
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4913
    self.share_locks[locking.LEVEL_NODE] = 1
4914

    
4915
    if self.op.names:
4916
      self.wanted = _GetWantedInstances(self, self.op.names)
4917
    else:
4918
      self.wanted = locking.ALL_SET
4919

    
4920
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4921
    self.do_locking = self.do_node_query and self.op.use_locking
4922
    if self.do_locking:
4923
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4924
      self.needed_locks[locking.LEVEL_NODE] = []
4925
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4926

    
4927
  def DeclareLocks(self, level):
4928
    if level == locking.LEVEL_NODE and self.do_locking:
4929
      self._LockInstancesNodes()
4930

    
4931
  def CheckPrereq(self):
4932
    """Check prerequisites.
4933

4934
    """
4935
    pass
4936

    
4937
  def Exec(self, feedback_fn):
4938
    """Computes the list of nodes and their attributes.
4939

4940
    """
4941
    # pylint: disable-msg=R0912
4942
    # way too many branches here
4943
    all_info = self.cfg.GetAllInstancesInfo()
4944
    if self.wanted == locking.ALL_SET:
4945
      # caller didn't specify instance names, so ordering is not important
4946
      if self.do_locking:
4947
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4948
      else:
4949
        instance_names = all_info.keys()
4950
      instance_names = utils.NiceSort(instance_names)
4951
    else:
4952
      # caller did specify names, so we must keep the ordering
4953
      if self.do_locking:
4954
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4955
      else:
4956
        tgt_set = all_info.keys()
4957
      missing = set(self.wanted).difference(tgt_set)
4958
      if missing:
4959
        raise errors.OpExecError("Some instances were removed before"
4960
                                 " retrieving their data: %s" % missing)
4961
      instance_names = self.wanted
4962

    
4963
    instance_list = [all_info[iname] for iname in instance_names]
4964

    
4965
    # begin data gathering
4966

    
4967
    nodes = frozenset([inst.primary_node for inst in instance_list])
4968
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4969

    
4970
    bad_nodes = []
4971
    off_nodes = []
4972
    if self.do_node_query:
4973
      live_data = {}
4974
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4975
      for name in nodes:
4976
        result = node_data[name]
4977
        if result.offline:
4978
          # offline nodes will be in both lists
4979
          off_nodes.append(name)
4980
        if result.fail_msg:
4981
          bad_nodes.append(name)
4982
        else:
4983
          if result.payload:
4984
            live_data.update(result.payload)
4985
          # else no instance is alive
4986
    else:
4987
      live_data = dict([(name, {}) for name in instance_names])
4988

    
4989
    # end data gathering
4990

    
4991
    HVPREFIX = "hv/"
4992
    BEPREFIX = "be/"
4993
    output = []
4994
    cluster = self.cfg.GetClusterInfo()
4995
    for instance in instance_list:
4996
      iout = []
4997
      i_hv = cluster.FillHV(instance, skip_globals=True)
4998
      i_be = cluster.FillBE(instance)
4999
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5000
      for field in self.op.output_fields:
5001
        st_match = self._FIELDS_STATIC.Matches(field)
5002
        if field in self._SIMPLE_FIELDS:
5003
          val = getattr(instance, field)
5004
        elif field == "pnode":
5005
          val = instance.primary_node
5006
        elif field == "snodes":
5007
          val = list(instance.secondary_nodes)
5008
        elif field == "admin_state":
5009
          val = instance.admin_up
5010
        elif field == "oper_state":
5011
          if instance.primary_node in bad_nodes:
5012
            val = None
5013
          else:
5014
            val = bool(live_data.get(instance.name))
5015
        elif field == "status":
5016
          if instance.primary_node in off_nodes:
5017
            val = "ERROR_nodeoffline"
5018
          elif instance.primary_node in bad_nodes:
5019
            val = "ERROR_nodedown"
5020
          else:
5021
            running = bool(live_data.get(instance.name))
5022
            if running:
5023
              if instance.admin_up:
5024
                val = "running"
5025
              else:
5026
                val = "ERROR_up"
5027
            else:
5028
              if instance.admin_up:
5029
                val = "ERROR_down"
5030
              else:
5031
                val = "ADMIN_down"
5032
        elif field == "oper_ram":
5033
          if instance.primary_node in bad_nodes:
5034
            val = None
5035
          elif instance.name in live_data:
5036
            val = live_data[instance.name].get("memory", "?")
5037
          else:
5038
            val = "-"
5039
        elif field == "vcpus":
5040
          val = i_be[constants.BE_VCPUS]
5041
        elif field == "disk_template":
5042
          val = instance.disk_template
5043
        elif field == "ip":
5044
          if instance.nics:
5045
            val = instance.nics[0].ip
5046
          else:
5047
            val = None
5048
        elif field == "nic_mode":
5049
          if instance.nics:
5050
            val = i_nicp[0][constants.NIC_MODE]
5051
          else:
5052
            val = None
5053
        elif field == "nic_link":
5054
          if instance.nics:
5055
            val = i_nicp[0][constants.NIC_LINK]
5056
          else:
5057
            val = None
5058
        elif field == "bridge":
5059
          if (instance.nics and
5060
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5061
            val = i_nicp[0][constants.NIC_LINK]
5062
          else:
5063
            val = None
5064
        elif field == "mac":
5065
          if instance.nics:
5066
            val = instance.nics[0].mac
5067
          else:
5068
            val = None
5069
        elif field == "sda_size" or field == "sdb_size":
5070
          idx = ord(field[2]) - ord('a')
5071
          try:
5072
            val = instance.FindDisk(idx).size
5073
          except errors.OpPrereqError:
5074
            val = None
5075
        elif field == "disk_usage": # total disk usage per node
5076
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5077
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5078
        elif field == "tags":
5079
          val = list(instance.GetTags())
5080
        elif field == "hvparams":
5081
          val = i_hv
5082
        elif (field.startswith(HVPREFIX) and
5083
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5084
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5085
          val = i_hv.get(field[len(HVPREFIX):], None)
5086
        elif field == "beparams":
5087
          val = i_be
5088
        elif (field.startswith(BEPREFIX) and
5089
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5090
          val = i_be.get(field[len(BEPREFIX):], None)
5091
        elif st_match and st_match.groups():
5092
          # matches a variable list
5093
          st_groups = st_match.groups()
5094
          if st_groups and st_groups[0] == "disk":
5095
            if st_groups[1] == "count":
5096
              val = len(instance.disks)
5097
            elif st_groups[1] == "sizes":
5098
              val = [disk.size for disk in instance.disks]
5099
            elif st_groups[1] == "size":
5100
              try:
5101
                val = instance.FindDisk(st_groups[2]).size
5102
              except errors.OpPrereqError:
5103
                val = None
5104
            else:
5105
              assert False, "Unhandled disk parameter"
5106
          elif st_groups[0] == "nic":
5107
            if st_groups[1] == "count":
5108
              val = len(instance.nics)
5109
            elif st_groups[1] == "macs":
5110
              val = [nic.mac for nic in instance.nics]
5111
            elif st_groups[1] == "ips":
5112
              val = [nic.ip for nic in instance.nics]
5113
            elif st_groups[1] == "modes":
5114
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5115
            elif st_groups[1] == "links":
5116
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5117
            elif st_groups[1] == "bridges":
5118
              val = []
5119
              for nicp in i_nicp:
5120
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5121
                  val.append(nicp[constants.NIC_LINK])
5122
                else:
5123
                  val.append(None)
5124
            else:
5125
              # index-based item
5126
              nic_idx = int(st_groups[2])
5127
              if nic_idx >= len(instance.nics):
5128
                val = None
5129
              else:
5130
                if st_groups[1] == "mac":
5131
                  val = instance.nics[nic_idx].mac
5132
                elif st_groups[1] == "ip":
5133
                  val = instance.nics[nic_idx].ip
5134
                elif st_groups[1] == "mode":
5135
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5136
                elif st_groups[1] == "link":
5137
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5138
                elif st_groups[1] == "bridge":
5139
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5140
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5141
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5142
                  else:
5143
                    val = None
5144
                else:
5145
                  assert False, "Unhandled NIC parameter"
5146
          else:
5147
            assert False, ("Declared but unhandled variable parameter '%s'" %
5148
                           field)
5149
        else:
5150
          assert False, "Declared but unhandled parameter '%s'" % field
5151
        iout.append(val)
5152
      output.append(iout)
5153

    
5154
    return output
5155

    
5156

    
5157
class LUFailoverInstance(LogicalUnit):
5158
  """Failover an instance.
5159

5160
  """
5161
  HPATH = "instance-failover"
5162
  HTYPE = constants.HTYPE_INSTANCE
5163
  _OP_REQP = ["instance_name", "ignore_consistency"]
5164
  REQ_BGL = False
5165

    
5166
  def CheckArguments(self):
5167
    """Check the arguments.
5168

5169
    """
5170
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5171
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
5172

    
5173
  def ExpandNames(self):
5174
    self._ExpandAndLockInstance()
5175
    self.needed_locks[locking.LEVEL_NODE] = []
5176
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5177

    
5178
  def DeclareLocks(self, level):
5179
    if level == locking.LEVEL_NODE:
5180
      self._LockInstancesNodes()
5181

    
5182
  def BuildHooksEnv(self):
5183
    """Build hooks env.
5184

5185
    This runs on master, primary and secondary nodes of the instance.
5186

5187
    """
5188
    instance = self.instance
5189
    source_node = instance.primary_node
5190
    target_node = instance.secondary_nodes[0]
5191
    env = {
5192
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5193
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5194
      "OLD_PRIMARY": source_node,
5195
      "OLD_SECONDARY": target_node,
5196
      "NEW_PRIMARY": target_node,
5197
      "NEW_SECONDARY": source_node,
5198
      }
5199
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5200
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5201
    nl_post = list(nl)
5202
    nl_post.append(source_node)
5203
    return env, nl, nl_post
5204

    
5205
  def CheckPrereq(self):
5206
    """Check prerequisites.
5207

5208
    This checks that the instance is in the cluster.
5209

5210
    """
5211
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5212
    assert self.instance is not None, \
5213
      "Cannot retrieve locked instance %s" % self.op.instance_name
5214

    
5215
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5216
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5217
      raise errors.OpPrereqError("Instance's disk layout is not"
5218
                                 " network mirrored, cannot failover.",
5219
                                 errors.ECODE_STATE)
5220

    
5221
    secondary_nodes = instance.secondary_nodes
5222
    if not secondary_nodes:
5223
      raise errors.ProgrammerError("no secondary node but using "
5224
                                   "a mirrored disk template")
5225

    
5226
    target_node = secondary_nodes[0]
5227
    _CheckNodeOnline(self, target_node)
5228
    _CheckNodeNotDrained(self, target_node)
5229
    if instance.admin_up:
5230
      # check memory requirements on the secondary node
5231
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5232
                           instance.name, bep[constants.BE_MEMORY],
5233
                           instance.hypervisor)
5234
    else:
5235
      self.LogInfo("Not checking memory on the secondary node as"
5236
                   " instance will not be started")
5237

    
5238
    # check bridge existance
5239
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5240

    
5241
  def Exec(self, feedback_fn):
5242
    """Failover an instance.
5243

5244
    The failover is done by shutting it down on its present node and
5245
    starting it on the secondary.
5246

5247
    """
5248
    instance = self.instance
5249

    
5250
    source_node = instance.primary_node
5251
    target_node = instance.secondary_nodes[0]
5252

    
5253
    if instance.admin_up:
5254
      feedback_fn("* checking disk consistency between source and target")
5255
      for dev in instance.disks:
5256
        # for drbd, these are drbd over lvm
5257
        if not _CheckDiskConsistency(self, dev, target_node, False):
5258
          if not self.op.ignore_consistency:
5259
            raise errors.OpExecError("Disk %s is degraded on target node,"
5260
                                     " aborting failover." % dev.iv_name)
5261
    else:
5262
      feedback_fn("* not checking disk consistency as instance is not running")
5263

    
5264
    feedback_fn("* shutting down instance on source node")
5265
    logging.info("Shutting down instance %s on node %s",
5266
                 instance.name, source_node)
5267

    
5268
    result = self.rpc.call_instance_shutdown(source_node, instance,
5269
                                             self.shutdown_timeout)
5270
    msg = result.fail_msg
5271
    if msg:
5272
      if self.op.ignore_consistency:
5273
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5274
                             " Proceeding anyway. Please make sure node"
5275
                             " %s is down. Error details: %s",
5276
                             instance.name, source_node, source_node, msg)
5277
      else:
5278
        raise errors.OpExecError("Could not shutdown instance %s on"
5279
                                 " node %s: %s" %
5280
                                 (instance.name, source_node, msg))
5281

    
5282
    feedback_fn("* deactivating the instance's disks on source node")
5283
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5284
      raise errors.OpExecError("Can't shut down the instance's disks.")
5285

    
5286
    instance.primary_node = target_node
5287
    # distribute new instance config to the other nodes
5288
    self.cfg.Update(instance, feedback_fn)
5289

    
5290
    # Only start the instance if it's marked as up
5291
    if instance.admin_up:
5292
      feedback_fn("* activating the instance's disks on target node")
5293
      logging.info("Starting instance %s on node %s",
5294
                   instance.name, target_node)
5295

    
5296
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5297
                                               ignore_secondaries=True)
5298
      if not disks_ok:
5299
        _ShutdownInstanceDisks(self, instance)
5300
        raise errors.OpExecError("Can't activate the instance's disks")
5301

    
5302
      feedback_fn("* starting the instance on the target node")
5303
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5304
      msg = result.fail_msg
5305
      if msg:
5306
        _ShutdownInstanceDisks(self, instance)
5307
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5308
                                 (instance.name, target_node, msg))
5309

    
5310

    
5311
class LUMigrateInstance(LogicalUnit):
5312
  """Migrate an instance.
5313

5314
  This is migration without shutting down, compared to the failover,
5315
  which is done with shutdown.
5316

5317
  """
5318
  HPATH = "instance-migrate"
5319
  HTYPE = constants.HTYPE_INSTANCE
5320
  _OP_REQP = ["instance_name", "live", "cleanup"]
5321

    
5322
  REQ_BGL = False
5323

    
5324
  def ExpandNames(self):
5325
    self._ExpandAndLockInstance()
5326

    
5327
    self.needed_locks[locking.LEVEL_NODE] = []
5328
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5329

    
5330
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5331
                                       self.op.live, self.op.cleanup)
5332
    self.tasklets = [self._migrater]
5333

    
5334
  def DeclareLocks(self, level):
5335
    if level == locking.LEVEL_NODE:
5336
      self._LockInstancesNodes()
5337

    
5338
  def BuildHooksEnv(self):
5339
    """Build hooks env.
5340

5341
    This runs on master, primary and secondary nodes of the instance.
5342

5343
    """
5344
    instance = self._migrater.instance
5345
    source_node = instance.primary_node
5346
    target_node = instance.secondary_nodes[0]
5347
    env = _BuildInstanceHookEnvByObject(self, instance)
5348
    env["MIGRATE_LIVE"] = self.op.live
5349
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5350
    env.update({
5351
        "OLD_PRIMARY": source_node,
5352
        "OLD_SECONDARY": target_node,
5353
        "NEW_PRIMARY": target_node,
5354
        "NEW_SECONDARY": source_node,
5355
        })
5356
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5357
    nl_post = list(nl)
5358
    nl_post.append(source_node)
5359
    return env, nl, nl_post
5360

    
5361

    
5362
class LUMoveInstance(LogicalUnit):
5363
  """Move an instance by data-copying.
5364

5365
  """
5366
  HPATH = "instance-move"
5367
  HTYPE = constants.HTYPE_INSTANCE
5368
  _OP_REQP = ["instance_name", "target_node"]
5369
  REQ_BGL = False
5370

    
5371
  def CheckArguments(self):
5372
    """Check the arguments.
5373

5374
    """
5375
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5376
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
5377

    
5378
  def ExpandNames(self):
5379
    self._ExpandAndLockInstance()
5380
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5381
    self.op.target_node = target_node
5382
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5383
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5384

    
5385
  def DeclareLocks(self, level):
5386
    if level == locking.LEVEL_NODE:
5387
      self._LockInstancesNodes(primary_only=True)
5388

    
5389
  def BuildHooksEnv(self):
5390
    """Build hooks env.
5391

5392
    This runs on master, primary and secondary nodes of the instance.
5393

5394
    """
5395
    env = {
5396
      "TARGET_NODE": self.op.target_node,
5397
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5398
      }
5399
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5400
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5401
                                       self.op.target_node]
5402
    return env, nl, nl
5403

    
5404
  def CheckPrereq(self):
5405
    """Check prerequisites.
5406

5407
    This checks that the instance is in the cluster.
5408

5409
    """
5410
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5411
    assert self.instance is not None, \
5412
      "Cannot retrieve locked instance %s" % self.op.instance_name
5413

    
5414
    node = self.cfg.GetNodeInfo(self.op.target_node)
5415
    assert node is not None, \
5416
      "Cannot retrieve locked node %s" % self.op.target_node
5417

    
5418
    self.target_node = target_node = node.name
5419

    
5420
    if target_node == instance.primary_node:
5421
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5422
                                 (instance.name, target_node),
5423
                                 errors.ECODE_STATE)
5424

    
5425
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5426

    
5427
    for idx, dsk in enumerate(instance.disks):
5428
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5429
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5430
                                   " cannot copy" % idx, errors.ECODE_STATE)
5431

    
5432
    _CheckNodeOnline(self, target_node)
5433
    _CheckNodeNotDrained(self, target_node)
5434

    
5435
    if instance.admin_up:
5436
      # check memory requirements on the secondary node
5437
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5438
                           instance.name, bep[constants.BE_MEMORY],
5439
                           instance.hypervisor)
5440
    else:
5441
      self.LogInfo("Not checking memory on the secondary node as"
5442
                   " instance will not be started")
5443

    
5444
    # check bridge existance
5445
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5446

    
5447
  def Exec(self, feedback_fn):
5448
    """Move an instance.
5449

5450
    The move is done by shutting it down on its present node, copying
5451
    the data over (slow) and starting it on the new node.
5452

5453
    """
5454
    instance = self.instance
5455

    
5456
    source_node = instance.primary_node
5457
    target_node = self.target_node
5458

    
5459
    self.LogInfo("Shutting down instance %s on source node %s",
5460
                 instance.name, source_node)
5461

    
5462
    result = self.rpc.call_instance_shutdown(source_node, instance,
5463
                                             self.shutdown_timeout)
5464
    msg = result.fail_msg
5465
    if msg:
5466
      if self.op.ignore_consistency:
5467
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5468
                             " Proceeding anyway. Please make sure node"
5469
                             " %s is down. Error details: %s",
5470
                             instance.name, source_node, source_node, msg)
5471
      else:
5472
        raise errors.OpExecError("Could not shutdown instance %s on"
5473
                                 " node %s: %s" %
5474
                                 (instance.name, source_node, msg))
5475

    
5476
    # create the target disks
5477
    try:
5478
      _CreateDisks(self, instance, target_node=target_node)
5479
    except errors.OpExecError:
5480
      self.LogWarning("Device creation failed, reverting...")
5481
      try:
5482
        _RemoveDisks(self, instance, target_node=target_node)
5483
      finally:
5484
        self.cfg.ReleaseDRBDMinors(instance.name)
5485
        raise
5486

    
5487
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5488

    
5489
    errs = []
5490
    # activate, get path, copy the data over
5491
    for idx, disk in enumerate(instance.disks):
5492
      self.LogInfo("Copying data for disk %d", idx)
5493
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5494
                                               instance.name, True)
5495
      if result.fail_msg:
5496
        self.LogWarning("Can't assemble newly created disk %d: %s",
5497
                        idx, result.fail_msg)
5498
        errs.append(result.fail_msg)
5499
        break
5500
      dev_path = result.payload
5501
      result = self.rpc.call_blockdev_export(source_node, disk,
5502
                                             target_node, dev_path,
5503
                                             cluster_name)
5504
      if result.fail_msg:
5505
        self.LogWarning("Can't copy data over for disk %d: %s",
5506
                        idx, result.fail_msg)
5507
        errs.append(result.fail_msg)
5508
        break
5509

    
5510
    if errs:
5511
      self.LogWarning("Some disks failed to copy, aborting")
5512
      try:
5513
        _RemoveDisks(self, instance, target_node=target_node)
5514
      finally:
5515
        self.cfg.ReleaseDRBDMinors(instance.name)
5516
        raise errors.OpExecError("Errors during disk copy: %s" %
5517
                                 (",".join(errs),))
5518

    
5519
    instance.primary_node = target_node
5520
    self.cfg.Update(instance, feedback_fn)
5521

    
5522
    self.LogInfo("Removing the disks on the original node")
5523
    _RemoveDisks(self, instance, target_node=source_node)
5524

    
5525
    # Only start the instance if it's marked as up
5526
    if instance.admin_up:
5527
      self.LogInfo("Starting instance %s on node %s",
5528
                   instance.name, target_node)
5529

    
5530
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5531
                                           ignore_secondaries=True)
5532
      if not disks_ok:
5533
        _ShutdownInstanceDisks(self, instance)
5534
        raise errors.OpExecError("Can't activate the instance's disks")
5535

    
5536
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5537
      msg = result.fail_msg
5538
      if msg:
5539
        _ShutdownInstanceDisks(self, instance)
5540
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5541
                                 (instance.name, target_node, msg))
5542

    
5543

    
5544
class LUMigrateNode(LogicalUnit):
5545
  """Migrate all instances from a node.
5546

5547
  """
5548
  HPATH = "node-migrate"
5549
  HTYPE = constants.HTYPE_NODE
5550
  _OP_REQP = ["node_name", "live"]
5551
  REQ_BGL = False
5552

    
5553
  def ExpandNames(self):
5554
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5555

    
5556
    self.needed_locks = {
5557
      locking.LEVEL_NODE: [self.op.node_name],
5558
      }
5559

    
5560
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5561

    
5562
    # Create tasklets for migrating instances for all instances on this node
5563
    names = []
5564
    tasklets = []
5565

    
5566
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5567
      logging.debug("Migrating instance %s", inst.name)
5568
      names.append(inst.name)
5569

    
5570
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5571

    
5572
    self.tasklets = tasklets
5573

    
5574
    # Declare instance locks
5575
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5576

    
5577
  def DeclareLocks(self, level):
5578
    if level == locking.LEVEL_NODE:
5579
      self._LockInstancesNodes()
5580

    
5581
  def BuildHooksEnv(self):
5582
    """Build hooks env.
5583

5584
    This runs on the master, the primary and all the secondaries.
5585

5586
    """
5587
    env = {
5588
      "NODE_NAME": self.op.node_name,
5589
      }
5590

    
5591
    nl = [self.cfg.GetMasterNode()]
5592

    
5593
    return (env, nl, nl)
5594

    
5595

    
5596
class TLMigrateInstance(Tasklet):
5597
  def __init__(self, lu, instance_name, live, cleanup):
5598
    """Initializes this class.
5599

5600
    """
5601
    Tasklet.__init__(self, lu)
5602

    
5603
    # Parameters
5604
    self.instance_name = instance_name
5605
    self.live = live
5606
    self.cleanup = cleanup
5607

    
5608
  def CheckPrereq(self):
5609
    """Check prerequisites.
5610

5611
    This checks that the instance is in the cluster.
5612

5613
    """
5614
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5615
    instance = self.cfg.GetInstanceInfo(instance_name)
5616
    assert instance is not None
5617

    
5618
    if instance.disk_template != constants.DT_DRBD8:
5619
      raise errors.OpPrereqError("Instance's disk layout is not"
5620
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5621

    
5622
    secondary_nodes = instance.secondary_nodes
5623
    if not secondary_nodes:
5624
      raise errors.ConfigurationError("No secondary node but using"
5625
                                      " drbd8 disk template")
5626

    
5627
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5628

    
5629
    target_node = secondary_nodes[0]
5630
    # check memory requirements on the secondary node
5631
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5632
                         instance.name, i_be[constants.BE_MEMORY],
5633
                         instance.hypervisor)
5634

    
5635
    # check bridge existance
5636
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5637

    
5638
    if not self.cleanup:
5639
      _CheckNodeNotDrained(self.lu, target_node)
5640
      result = self.rpc.call_instance_migratable(instance.primary_node,
5641
                                                 instance)
5642
      result.Raise("Can't migrate, please use failover",
5643
                   prereq=True, ecode=errors.ECODE_STATE)
5644

    
5645
    self.instance = instance
5646

    
5647
  def _WaitUntilSync(self):
5648
    """Poll with custom rpc for disk sync.
5649

5650
    This uses our own step-based rpc call.
5651

5652
    """
5653
    self.feedback_fn("* wait until resync is done")
5654
    all_done = False
5655
    while not all_done:
5656
      all_done = True
5657
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5658
                                            self.nodes_ip,
5659
                                            self.instance.disks)
5660
      min_percent = 100
5661
      for node, nres in result.items():
5662
        nres.Raise("Cannot resync disks on node %s" % node)
5663
        node_done, node_percent = nres.payload
5664
        all_done = all_done and node_done
5665
        if node_percent is not None:
5666
          min_percent = min(min_percent, node_percent)
5667
      if not all_done:
5668
        if min_percent < 100:
5669
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5670
        time.sleep(2)
5671

    
5672
  def _EnsureSecondary(self, node):
5673
    """Demote a node to secondary.
5674

5675
    """
5676
    self.feedback_fn("* switching node %s to secondary mode" % node)
5677

    
5678
    for dev in self.instance.disks:
5679
      self.cfg.SetDiskID(dev, node)
5680

    
5681
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5682
                                          self.instance.disks)
5683
    result.Raise("Cannot change disk to secondary on node %s" % node)
5684

    
5685
  def _GoStandalone(self):
5686
    """Disconnect from the network.
5687

5688
    """
5689
    self.feedback_fn("* changing into standalone mode")
5690
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5691
                                               self.instance.disks)
5692
    for node, nres in result.items():
5693
      nres.Raise("Cannot disconnect disks node %s" % node)
5694

    
5695
  def _GoReconnect(self, multimaster):
5696
    """Reconnect to the network.
5697

5698
    """
5699
    if multimaster:
5700
      msg = "dual-master"
5701
    else:
5702
      msg = "single-master"
5703
    self.feedback_fn("* changing disks into %s mode" % msg)
5704
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5705
                                           self.instance.disks,
5706
                                           self.instance.name, multimaster)
5707
    for node, nres in result.items():
5708
      nres.Raise("Cannot change disks config on node %s" % node)
5709

    
5710
  def _ExecCleanup(self):
5711
    """Try to cleanup after a failed migration.
5712

5713
    The cleanup is done by:
5714
      - check that the instance is running only on one node
5715
        (and update the config if needed)
5716
      - change disks on its secondary node to secondary
5717
      - wait until disks are fully synchronized
5718
      - disconnect from the network
5719
      - change disks into single-master mode
5720
      - wait again until disks are fully synchronized
5721

5722
    """
5723
    instance = self.instance
5724
    target_node = self.target_node
5725
    source_node = self.source_node
5726

    
5727
    # check running on only one node
5728
    self.feedback_fn("* checking where the instance actually runs"
5729
                     " (if this hangs, the hypervisor might be in"
5730
                     " a bad state)")
5731
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5732
    for node, result in ins_l.items():
5733
      result.Raise("Can't contact node %s" % node)
5734

    
5735
    runningon_source = instance.name in ins_l[source_node].payload
5736
    runningon_target = instance.name in ins_l[target_node].payload
5737

    
5738
    if runningon_source and runningon_target:
5739
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5740
                               " or the hypervisor is confused. You will have"
5741
                               " to ensure manually that it runs only on one"
5742
                               " and restart this operation.")
5743

    
5744
    if not (runningon_source or runningon_target):
5745
      raise errors.OpExecError("Instance does not seem to be running at all."
5746
                               " In this case, it's safer to repair by"
5747
                               " running 'gnt-instance stop' to ensure disk"
5748
                               " shutdown, and then restarting it.")
5749

    
5750
    if runningon_target:
5751
      # the migration has actually succeeded, we need to update the config
5752
      self.feedback_fn("* instance running on secondary node (%s),"
5753
                       " updating config" % target_node)
5754
      instance.primary_node = target_node
5755
      self.cfg.Update(instance, self.feedback_fn)
5756
      demoted_node = source_node
5757
    else:
5758
      self.feedback_fn("* instance confirmed to be running on its"
5759
                       " primary node (%s)" % source_node)
5760
      demoted_node = target_node
5761

    
5762
    self._EnsureSecondary(demoted_node)
5763
    try:
5764
      self._WaitUntilSync()
5765
    except errors.OpExecError:
5766
      # we ignore here errors, since if the device is standalone, it
5767
      # won't be able to sync
5768
      pass
5769
    self._GoStandalone()
5770
    self._GoReconnect(False)
5771
    self._WaitUntilSync()
5772

    
5773
    self.feedback_fn("* done")
5774

    
5775
  def _RevertDiskStatus(self):
5776
    """Try to revert the disk status after a failed migration.
5777

5778
    """
5779
    target_node = self.target_node
5780
    try:
5781
      self._EnsureSecondary(target_node)
5782
      self._GoStandalone()
5783
      self._GoReconnect(False)
5784
      self._WaitUntilSync()
5785
    except errors.OpExecError, err:
5786
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5787
                         " drives: error '%s'\n"
5788
                         "Please look and recover the instance status" %
5789
                         str(err))
5790

    
5791
  def _AbortMigration(self):
5792
    """Call the hypervisor code to abort a started migration.
5793

5794
    """
5795
    instance = self.instance
5796
    target_node = self.target_node
5797
    migration_info = self.migration_info
5798

    
5799
    abort_result = self.rpc.call_finalize_migration(target_node,
5800
                                                    instance,
5801
                                                    migration_info,
5802
                                                    False)
5803
    abort_msg = abort_result.fail_msg
5804
    if abort_msg:
5805
      logging.error("Aborting migration failed on target node %s: %s",
5806
                    target_node, abort_msg)
5807
      # Don't raise an exception here, as we stil have to try to revert the
5808
      # disk status, even if this step failed.
5809

    
5810
  def _ExecMigration(self):
5811
    """Migrate an instance.
5812

5813
    The migrate is done by:
5814
      - change the disks into dual-master mode
5815
      - wait until disks are fully synchronized again
5816
      - migrate the instance
5817
      - change disks on the new secondary node (the old primary) to secondary
5818
      - wait until disks are fully synchronized
5819
      - change disks into single-master mode
5820

5821
    """
5822
    instance = self.instance
5823
    target_node = self.target_node
5824
    source_node = self.source_node
5825

    
5826
    self.feedback_fn("* checking disk consistency between source and target")
5827
    for dev in instance.disks:
5828
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5829
        raise errors.OpExecError("Disk %s is degraded or not fully"
5830
                                 " synchronized on target node,"
5831
                                 " aborting migrate." % dev.iv_name)
5832

    
5833
    # First get the migration information from the remote node
5834
    result = self.rpc.call_migration_info(source_node, instance)
5835
    msg = result.fail_msg
5836
    if msg:
5837
      log_err = ("Failed fetching source migration information from %s: %s" %
5838
                 (source_node, msg))
5839
      logging.error(log_err)
5840
      raise errors.OpExecError(log_err)
5841

    
5842
    self.migration_info = migration_info = result.payload
5843

    
5844
    # Then switch the disks to master/master mode
5845
    self._EnsureSecondary(target_node)
5846
    self._GoStandalone()
5847
    self._GoReconnect(True)
5848
    self._WaitUntilSync()
5849

    
5850
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5851
    result = self.rpc.call_accept_instance(target_node,
5852
                                           instance,
5853
                                           migration_info,
5854
                                           self.nodes_ip[target_node])
5855

    
5856
    msg = result.fail_msg
5857
    if msg:
5858
      logging.error("Instance pre-migration failed, trying to revert"
5859
                    " disk status: %s", msg)
5860
      self.feedback_fn("Pre-migration failed, aborting")
5861
      self._AbortMigration()
5862
      self._RevertDiskStatus()
5863
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5864
                               (instance.name, msg))
5865

    
5866
    self.feedback_fn("* migrating instance to %s" % target_node)
5867
    time.sleep(10)
5868
    result = self.rpc.call_instance_migrate(source_node, instance,
5869
                                            self.nodes_ip[target_node],
5870
                                            self.live)
5871
    msg = result.fail_msg
5872
    if msg:
5873
      logging.error("Instance migration failed, trying to revert"
5874
                    " disk status: %s", msg)
5875
      self.feedback_fn("Migration failed, aborting")
5876
      self._AbortMigration()
5877
      self._RevertDiskStatus()
5878
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5879
                               (instance.name, msg))
5880
    time.sleep(10)
5881

    
5882
    instance.primary_node = target_node
5883
    # distribute new instance config to the other nodes
5884
    self.cfg.Update(instance, self.feedback_fn)
5885

    
5886
    result = self.rpc.call_finalize_migration(target_node,
5887
                                              instance,
5888
                                              migration_info,
5889
                                              True)
5890
    msg = result.fail_msg
5891
    if msg:
5892
      logging.error("Instance migration succeeded, but finalization failed:"
5893
                    " %s", msg)
5894
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5895
                               msg)
5896

    
5897
    self._EnsureSecondary(source_node)
5898
    self._WaitUntilSync()
5899
    self._GoStandalone()
5900
    self._GoReconnect(False)
5901
    self._WaitUntilSync()
5902

    
5903
    self.feedback_fn("* done")
5904

    
5905
  def Exec(self, feedback_fn):
5906
    """Perform the migration.
5907

5908
    """
5909
    feedback_fn("Migrating instance %s" % self.instance.name)
5910

    
5911
    self.feedback_fn = feedback_fn
5912

    
5913
    self.source_node = self.instance.primary_node
5914
    self.target_node = self.instance.secondary_nodes[0]
5915
    self.all_nodes = [self.source_node, self.target_node]
5916
    self.nodes_ip = {
5917
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5918
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5919
      }
5920

    
5921
    if self.cleanup:
5922
      return self._ExecCleanup()
5923
    else:
5924
      return self._ExecMigration()
5925

    
5926

    
5927
def _CreateBlockDev(lu, node, instance, device, force_create,
5928
                    info, force_open):
5929
  """Create a tree of block devices on a given node.
5930

5931
  If this device type has to be created on secondaries, create it and
5932
  all its children.
5933

5934
  If not, just recurse to children keeping the same 'force' value.
5935

5936
  @param lu: the lu on whose behalf we execute
5937
  @param node: the node on which to create the device
5938
  @type instance: L{objects.Instance}
5939
  @param instance: the instance which owns the device
5940
  @type device: L{objects.Disk}
5941
  @param device: the device to create
5942
  @type force_create: boolean
5943
  @param force_create: whether to force creation of this device; this
5944
      will be change to True whenever we find a device which has
5945
      CreateOnSecondary() attribute
5946
  @param info: the extra 'metadata' we should attach to the device
5947
      (this will be represented as a LVM tag)
5948
  @type force_open: boolean
5949
  @param force_open: this parameter will be passes to the
5950
      L{backend.BlockdevCreate} function where it specifies
5951
      whether we run on primary or not, and it affects both
5952
      the child assembly and the device own Open() execution
5953

5954
  """
5955
  if device.CreateOnSecondary():
5956
    force_create = True
5957

    
5958
  if device.children:
5959
    for child in device.children:
5960
      _CreateBlockDev(lu, node, instance, child, force_create,
5961
                      info, force_open)
5962

    
5963
  if not force_create:
5964
    return
5965

    
5966
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5967

    
5968

    
5969
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5970
  """Create a single block device on a given node.
5971

5972
  This will not recurse over children of the device, so they must be
5973
  created in advance.
5974

5975
  @param lu: the lu on whose behalf we execute
5976
  @param node: the node on which to create the device
5977
  @type instance: L{objects.Instance}
5978
  @param instance: the instance which owns the device
5979
  @type device: L{objects.Disk}
5980
  @param device: the device to create
5981
  @param info: the extra 'metadata' we should attach to the device
5982
      (this will be represented as a LVM tag)
5983
  @type force_open: boolean
5984
  @param force_open: this parameter will be passes to the
5985
      L{backend.BlockdevCreate} function where it specifies
5986
      whether we run on primary or not, and it affects both
5987
      the child assembly and the device own Open() execution
5988

5989
  """
5990
  lu.cfg.SetDiskID(device, node)
5991
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5992
                                       instance.name, force_open, info)
5993
  result.Raise("Can't create block device %s on"
5994
               " node %s for instance %s" % (device, node, instance.name))
5995
  if device.physical_id is None:
5996
    device.physical_id = result.payload
5997

    
5998

    
5999
def _GenerateUniqueNames(lu, exts):
6000
  """Generate a suitable LV name.
6001

6002
  This will generate a logical volume name for the given instance.
6003

6004
  """
6005
  results = []
6006
  for val in exts:
6007
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6008
    results.append("%s%s" % (new_id, val))
6009
  return results
6010

    
6011

    
6012
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6013
                         p_minor, s_minor):
6014
  """Generate a drbd8 device complete with its children.
6015

6016
  """
6017
  port = lu.cfg.AllocatePort()
6018
  vgname = lu.cfg.GetVGName()
6019
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6020
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6021
                          logical_id=(vgname, names[0]))
6022
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6023
                          logical_id=(vgname, names[1]))
6024
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6025
                          logical_id=(primary, secondary, port,
6026
                                      p_minor, s_minor,
6027
                                      shared_secret),
6028
                          children=[dev_data, dev_meta],
6029
                          iv_name=iv_name)
6030
  return drbd_dev
6031

    
6032

    
6033
def _GenerateDiskTemplate(lu, template_name,
6034
                          instance_name, primary_node,
6035
                          secondary_nodes, disk_info,
6036
                          file_storage_dir, file_driver,
6037
                          base_index):
6038
  """Generate the entire disk layout for a given template type.
6039

6040
  """
6041
  #TODO: compute space requirements
6042

    
6043
  vgname = lu.cfg.GetVGName()
6044
  disk_count = len(disk_info)
6045
  disks = []
6046
  if template_name == constants.DT_DISKLESS:
6047
    pass
6048
  elif template_name == constants.DT_PLAIN:
6049
    if len(secondary_nodes) != 0:
6050
      raise errors.ProgrammerError("Wrong template configuration")
6051

    
6052
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6053
                                      for i in range(disk_count)])
6054
    for idx, disk in enumerate(disk_info):
6055
      disk_index = idx + base_index
6056
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6057
                              logical_id=(vgname, names[idx]),
6058
                              iv_name="disk/%d" % disk_index,
6059
                              mode=disk["mode"])
6060
      disks.append(disk_dev)
6061
  elif template_name == constants.DT_DRBD8:
6062
    if len(secondary_nodes) != 1:
6063
      raise errors.ProgrammerError("Wrong template configuration")
6064
    remote_node = secondary_nodes[0]
6065
    minors = lu.cfg.AllocateDRBDMinor(
6066
      [primary_node, remote_node] * len(disk_info), instance_name)
6067

    
6068
    names = []
6069
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6070
                                               for i in range(disk_count)]):
6071
      names.append(lv_prefix + "_data")
6072
      names.append(lv_prefix + "_meta")
6073
    for idx, disk in enumerate(disk_info):
6074
      disk_index = idx + base_index
6075
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6076
                                      disk["size"], names[idx*2:idx*2+2],
6077
                                      "disk/%d" % disk_index,
6078
                                      minors[idx*2], minors[idx*2+1])
6079
      disk_dev.mode = disk["mode"]
6080
      disks.append(disk_dev)
6081
  elif template_name == constants.DT_FILE:
6082
    if len(secondary_nodes) != 0:
6083
      raise errors.ProgrammerError("Wrong template configuration")
6084

    
6085
    _RequireFileStorage()
6086

    
6087
    for idx, disk in enumerate(disk_info):
6088
      disk_index = idx + base_index
6089
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6090
                              iv_name="disk/%d" % disk_index,
6091
                              logical_id=(file_driver,
6092
                                          "%s/disk%d" % (file_storage_dir,
6093
                                                         disk_index)),
6094
                              mode=disk["mode"])
6095
      disks.append(disk_dev)
6096
  else:
6097
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6098
  return disks
6099

    
6100

    
6101
def _GetInstanceInfoText(instance):
6102
  """Compute that text that should be added to the disk's metadata.
6103

6104
  """
6105
  return "originstname+%s" % instance.name
6106

    
6107

    
6108
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6109
  """Create all disks for an instance.
6110

6111
  This abstracts away some work from AddInstance.
6112

6113
  @type lu: L{LogicalUnit}
6114
  @param lu: the logical unit on whose behalf we execute
6115
  @type instance: L{objects.Instance}
6116
  @param instance: the instance whose disks we should create
6117
  @type to_skip: list
6118
  @param to_skip: list of indices to skip
6119
  @type target_node: string
6120
  @param target_node: if passed, overrides the target node for creation
6121
  @rtype: boolean
6122
  @return: the success of the creation
6123

6124
  """
6125
  info = _GetInstanceInfoText(instance)
6126
  if target_node is None:
6127
    pnode = instance.primary_node
6128
    all_nodes = instance.all_nodes
6129
  else:
6130
    pnode = target_node
6131
    all_nodes = [pnode]
6132

    
6133
  if instance.disk_template == constants.DT_FILE:
6134
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6135
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6136

    
6137
    result.Raise("Failed to create directory '%s' on"
6138
                 " node %s" % (file_storage_dir, pnode))
6139

    
6140
  # Note: this needs to be kept in sync with adding of disks in
6141
  # LUSetInstanceParams
6142
  for idx, device in enumerate(instance.disks):
6143
    if to_skip and idx in to_skip:
6144
      continue
6145
    logging.info("Creating volume %s for instance %s",
6146
                 device.iv_name, instance.name)
6147
    #HARDCODE
6148
    for node in all_nodes:
6149
      f_create = node == pnode
6150
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6151

    
6152

    
6153
def _RemoveDisks(lu, instance, target_node=None):
6154
  """Remove all disks for an instance.
6155

6156
  This abstracts away some work from `AddInstance()` and
6157
  `RemoveInstance()`. Note that in case some of the devices couldn't
6158
  be removed, the removal will continue with the other ones (compare
6159
  with `_CreateDisks()`).
6160

6161
  @type lu: L{LogicalUnit}
6162
  @param lu: the logical unit on whose behalf we execute
6163
  @type instance: L{objects.Instance}
6164
  @param instance: the instance whose disks we should remove
6165
  @type target_node: string
6166
  @param target_node: used to override the node on which to remove the disks
6167
  @rtype: boolean
6168
  @return: the success of the removal
6169

6170
  """
6171
  logging.info("Removing block devices for instance %s", instance.name)
6172

    
6173
  all_result = True
6174
  for device in instance.disks:
6175
    if target_node:
6176
      edata = [(target_node, device)]
6177
    else:
6178
      edata = device.ComputeNodeTree(instance.primary_node)
6179
    for node, disk in edata:
6180
      lu.cfg.SetDiskID(disk, node)
6181
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6182
      if msg:
6183
        lu.LogWarning("Could not remove block device %s on node %s,"
6184
                      " continuing anyway: %s", device.iv_name, node, msg)
6185
        all_result = False
6186

    
6187
  if instance.disk_template == constants.DT_FILE:
6188
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6189
    if target_node:
6190
      tgt = target_node
6191
    else:
6192
      tgt = instance.primary_node
6193
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6194
    if result.fail_msg:
6195
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6196
                    file_storage_dir, instance.primary_node, result.fail_msg)
6197
      all_result = False
6198

    
6199
  return all_result
6200

    
6201

    
6202
def _ComputeDiskSize(disk_template, disks):
6203
  """Compute disk size requirements in the volume group
6204

6205
  """
6206
  # Required free disk space as a function of disk and swap space
6207
  req_size_dict = {
6208
    constants.DT_DISKLESS: None,
6209
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6210
    # 128 MB are added for drbd metadata for each disk
6211
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6212
    constants.DT_FILE: None,
6213
  }
6214

    
6215
  if disk_template not in req_size_dict:
6216
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6217
                                 " is unknown" %  disk_template)
6218

    
6219
  return req_size_dict[disk_template]
6220

    
6221

    
6222
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6223
  """Hypervisor parameter validation.
6224

6225
  This function abstract the hypervisor parameter validation to be
6226
  used in both instance create and instance modify.
6227

6228
  @type lu: L{LogicalUnit}
6229
  @param lu: the logical unit for which we check
6230
  @type nodenames: list
6231
  @param nodenames: the list of nodes on which we should check
6232
  @type hvname: string
6233
  @param hvname: the name of the hypervisor we should use
6234
  @type hvparams: dict
6235
  @param hvparams: the parameters which we need to check
6236
  @raise errors.OpPrereqError: if the parameters are not valid
6237

6238
  """
6239
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6240
                                                  hvname,
6241
                                                  hvparams)
6242
  for node in nodenames:
6243
    info = hvinfo[node]
6244
    if info.offline:
6245
      continue
6246
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6247

    
6248

    
6249
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6250
  """OS parameters validation.
6251

6252
  @type lu: L{LogicalUnit}
6253
  @param lu: the logical unit for which we check
6254
  @type required: boolean
6255
  @param required: whether the validation should fail if the OS is not
6256
      found
6257
  @type nodenames: list
6258
  @param nodenames: the list of nodes on which we should check
6259
  @type osname: string
6260
  @param osname: the name of the hypervisor we should use
6261
  @type osparams: dict
6262
  @param osparams: the parameters which we need to check
6263
  @raise errors.OpPrereqError: if the parameters are not valid
6264

6265
  """
6266
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6267
                                   [constants.OS_VALIDATE_PARAMETERS],
6268
                                   osparams)
6269
  for node, nres in result.items():
6270
    # we don't check for offline cases since this should be run only
6271
    # against the master node and/or an instance's nodes
6272
    nres.Raise("OS Parameters validation failed on node %s" % node)
6273
    if not nres.payload:
6274
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6275
                 osname, node)
6276

    
6277

    
6278
class LUCreateInstance(LogicalUnit):
6279
  """Create an instance.
6280

6281
  """
6282
  HPATH = "instance-add"
6283
  HTYPE = constants.HTYPE_INSTANCE
6284
  _OP_REQP = ["instance_name", "disks",
6285
              "mode", "start",
6286
              "wait_for_sync", "ip_check", "nics",
6287
              "hvparams", "beparams", "osparams"]
6288
  REQ_BGL = False
6289

    
6290
  def CheckArguments(self):
6291
    """Check arguments.
6292

6293
    """
6294
    # set optional parameters to none if they don't exist
6295
    for attr in ["pnode", "snode", "iallocator", "hypervisor",
6296
                 "disk_template", "identify_defaults"]:
6297
      if not hasattr(self.op, attr):
6298
        setattr(self.op, attr, None)
6299

    
6300
    # do not require name_check to ease forward/backward compatibility
6301
    # for tools
6302
    if not hasattr(self.op, "name_check"):
6303
      self.op.name_check = True
6304
    if not hasattr(self.op, "no_install"):
6305
      self.op.no_install = False
6306
    if self.op.no_install and self.op.start:
6307
      self.LogInfo("No-installation mode selected, disabling startup")
6308
      self.op.start = False
6309
    # validate/normalize the instance name
6310
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6311
    if self.op.ip_check and not self.op.name_check:
6312
      # TODO: make the ip check more flexible and not depend on the name check
6313
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6314
                                 errors.ECODE_INVAL)
6315

    
6316
    # check nics' parameter names
6317
    for nic in self.op.nics:
6318
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6319

    
6320
    # check disks. parameter names and consistent adopt/no-adopt strategy
6321
    has_adopt = has_no_adopt = False
6322
    for disk in self.op.disks:
6323
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6324
      if "adopt" in disk:
6325
        has_adopt = True
6326
      else:
6327
        has_no_adopt = True
6328
    if has_adopt and has_no_adopt:
6329
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6330
                                 errors.ECODE_INVAL)
6331
    if has_adopt:
6332
      if self.op.disk_template != constants.DT_PLAIN:
6333
        raise errors.OpPrereqError("Disk adoption is only supported for the"
6334
                                   " 'plain' disk template",
6335
                                   errors.ECODE_INVAL)
6336
      if self.op.iallocator is not None:
6337
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6338
                                   " iallocator script", errors.ECODE_INVAL)
6339
      if self.op.mode == constants.INSTANCE_IMPORT:
6340
        raise errors.OpPrereqError("Disk adoption not allowed for"
6341
                                   " instance import", errors.ECODE_INVAL)
6342

    
6343
    self.adopt_disks = has_adopt
6344

    
6345
    # verify creation mode
6346
    if self.op.mode not in constants.INSTANCE_CREATE_MODES:
6347
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6348
                                 self.op.mode, errors.ECODE_INVAL)
6349

    
6350
    # instance name verification
6351
    if self.op.name_check:
6352
      self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6353
      self.op.instance_name = self.hostname1.name
6354
      # used in CheckPrereq for ip ping check
6355
      self.check_ip = self.hostname1.ip
6356
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6357
      raise errors.OpPrereqError("Remote imports require names to be checked" %
6358
                                 errors.ECODE_INVAL)
6359
    else:
6360
      self.check_ip = None
6361

    
6362
    # file storage checks
6363
    if (self.op.file_driver and
6364
        not self.op.file_driver in constants.FILE_DRIVER):
6365
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6366
                                 self.op.file_driver, errors.ECODE_INVAL)
6367

    
6368
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6369
      raise errors.OpPrereqError("File storage directory path not absolute",
6370
                                 errors.ECODE_INVAL)
6371

    
6372
    ### Node/iallocator related checks
6373
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6374
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6375
                                 " node must be given",
6376
                                 errors.ECODE_INVAL)
6377

    
6378
    self._cds = _GetClusterDomainSecret()
6379

    
6380
    if self.op.mode == constants.INSTANCE_IMPORT:
6381
      # On import force_variant must be True, because if we forced it at
6382
      # initial install, our only chance when importing it back is that it
6383
      # works again!
6384
      self.op.force_variant = True
6385

    
6386
      if self.op.no_install:
6387
        self.LogInfo("No-installation mode has no effect during import")
6388

    
6389
    elif self.op.mode == constants.INSTANCE_CREATE:
6390
      if getattr(self.op, "os_type", None) is None:
6391
        raise errors.OpPrereqError("No guest OS specified",
6392
                                   errors.ECODE_INVAL)
6393
      self.op.force_variant = getattr(self.op, "force_variant", False)
6394
      if self.op.disk_template is None:
6395
        raise errors.OpPrereqError("No disk template specified",
6396
                                   errors.ECODE_INVAL)
6397

    
6398
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6399
      # Check handshake to ensure both clusters have the same domain secret
6400
      src_handshake = getattr(self.op, "source_handshake", None)
6401
      if not src_handshake:
6402
        raise errors.OpPrereqError("Missing source handshake",
6403
                                   errors.ECODE_INVAL)
6404

    
6405
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6406
                                                           src_handshake)
6407
      if errmsg:
6408
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6409
                                   errors.ECODE_INVAL)
6410

    
6411
      # Load and check source CA
6412
      self.source_x509_ca_pem = getattr(self.op, "source_x509_ca", None)
6413
      if not self.source_x509_ca_pem:
6414
        raise errors.OpPrereqError("Missing source X509 CA",
6415
                                   errors.ECODE_INVAL)
6416

    
6417
      try:
6418
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6419
                                                    self._cds)
6420
      except OpenSSL.crypto.Error, err:
6421
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6422
                                   (err, ), errors.ECODE_INVAL)
6423

    
6424
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6425
      if errcode is not None:
6426
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6427
                                   errors.ECODE_INVAL)
6428

    
6429
      self.source_x509_ca = cert
6430

    
6431
      src_instance_name = getattr(self.op, "source_instance_name", None)
6432
      if not src_instance_name:
6433
        raise errors.OpPrereqError("Missing source instance name",
6434
                                   errors.ECODE_INVAL)
6435

    
6436
      self.source_instance_name = \
6437
        utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6438

    
6439
    else:
6440
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6441
                                 self.op.mode, errors.ECODE_INVAL)
6442

    
6443
  def ExpandNames(self):
6444
    """ExpandNames for CreateInstance.
6445

6446
    Figure out the right locks for instance creation.
6447

6448
    """
6449
    self.needed_locks = {}
6450

    
6451
    instance_name = self.op.instance_name
6452
    # this is just a preventive check, but someone might still add this
6453
    # instance in the meantime, and creation will fail at lock-add time
6454
    if instance_name in self.cfg.GetInstanceList():
6455
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6456
                                 instance_name, errors.ECODE_EXISTS)
6457

    
6458
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6459

    
6460
    if self.op.iallocator:
6461
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6462
    else:
6463
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6464
      nodelist = [self.op.pnode]
6465
      if self.op.snode is not None:
6466
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6467
        nodelist.append(self.op.snode)
6468
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6469

    
6470
    # in case of import lock the source node too
6471
    if self.op.mode == constants.INSTANCE_IMPORT:
6472
      src_node = getattr(self.op, "src_node", None)
6473
      src_path = getattr(self.op, "src_path", None)
6474

    
6475
      if src_path is None:
6476
        self.op.src_path = src_path = self.op.instance_name
6477

    
6478
      if src_node is None:
6479
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6480
        self.op.src_node = None
6481
        if os.path.isabs(src_path):
6482
          raise errors.OpPrereqError("Importing an instance from an absolute"
6483
                                     " path requires a source node option.",
6484
                                     errors.ECODE_INVAL)
6485
      else:
6486
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6487
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6488
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6489
        if not os.path.isabs(src_path):
6490
          self.op.src_path = src_path = \
6491
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6492

    
6493
  def _RunAllocator(self):
6494
    """Run the allocator based on input opcode.
6495

6496
    """
6497
    nics = [n.ToDict() for n in self.nics]
6498
    ial = IAllocator(self.cfg, self.rpc,
6499
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6500
                     name=self.op.instance_name,
6501
                     disk_template=self.op.disk_template,
6502
                     tags=[],
6503
                     os=self.op.os_type,
6504
                     vcpus=self.be_full[constants.BE_VCPUS],
6505
                     mem_size=self.be_full[constants.BE_MEMORY],
6506
                     disks=self.disks,
6507
                     nics=nics,
6508
                     hypervisor=self.op.hypervisor,
6509
                     )
6510

    
6511
    ial.Run(self.op.iallocator)
6512

    
6513
    if not ial.success:
6514
      raise errors.OpPrereqError("Can't compute nodes using"
6515
                                 " iallocator '%s': %s" %
6516
                                 (self.op.iallocator, ial.info),
6517
                                 errors.ECODE_NORES)
6518
    if len(ial.result) != ial.required_nodes:
6519
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6520
                                 " of nodes (%s), required %s" %
6521
                                 (self.op.iallocator, len(ial.result),
6522
                                  ial.required_nodes), errors.ECODE_FAULT)
6523
    self.op.pnode = ial.result[0]
6524
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6525
                 self.op.instance_name, self.op.iallocator,
6526
                 utils.CommaJoin(ial.result))
6527
    if ial.required_nodes == 2:
6528
      self.op.snode = ial.result[1]
6529

    
6530
  def BuildHooksEnv(self):
6531
    """Build hooks env.
6532

6533
    This runs on master, primary and secondary nodes of the instance.
6534

6535
    """
6536
    env = {
6537
      "ADD_MODE": self.op.mode,
6538
      }
6539
    if self.op.mode == constants.INSTANCE_IMPORT:
6540
      env["SRC_NODE"] = self.op.src_node
6541
      env["SRC_PATH"] = self.op.src_path
6542
      env["SRC_IMAGES"] = self.src_images
6543

    
6544
    env.update(_BuildInstanceHookEnv(
6545
      name=self.op.instance_name,
6546
      primary_node=self.op.pnode,
6547
      secondary_nodes=self.secondaries,
6548
      status=self.op.start,
6549
      os_type=self.op.os_type,
6550
      memory=self.be_full[constants.BE_MEMORY],
6551
      vcpus=self.be_full[constants.BE_VCPUS],
6552
      nics=_NICListToTuple(self, self.nics),
6553
      disk_template=self.op.disk_template,
6554
      disks=[(d["size"], d["mode"]) for d in self.disks],
6555
      bep=self.be_full,
6556
      hvp=self.hv_full,
6557
      hypervisor_name=self.op.hypervisor,
6558
    ))
6559

    
6560
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6561
          self.secondaries)
6562
    return env, nl, nl
6563

    
6564
  def _ReadExportInfo(self):
6565
    """Reads the export information from disk.
6566

6567
    It will override the opcode source node and path with the actual
6568
    information, if these two were not specified before.
6569

6570
    @return: the export information
6571

6572
    """
6573
    assert self.op.mode == constants.INSTANCE_IMPORT
6574

    
6575
    src_node = self.op.src_node
6576
    src_path = self.op.src_path
6577

    
6578
    if src_node is None:
6579
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6580
      exp_list = self.rpc.call_export_list(locked_nodes)
6581
      found = False
6582
      for node in exp_list:
6583
        if exp_list[node].fail_msg:
6584
          continue
6585
        if src_path in exp_list[node].payload:
6586
          found = True
6587
          self.op.src_node = src_node = node
6588
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6589
                                                       src_path)
6590
          break
6591
      if not found:
6592
        raise errors.OpPrereqError("No export found for relative path %s" %
6593
                                    src_path, errors.ECODE_INVAL)
6594

    
6595
    _CheckNodeOnline(self, src_node)
6596
    result = self.rpc.call_export_info(src_node, src_path)
6597
    result.Raise("No export or invalid export found in dir %s" % src_path)
6598

    
6599
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6600
    if not export_info.has_section(constants.INISECT_EXP):
6601
      raise errors.ProgrammerError("Corrupted export config",
6602
                                   errors.ECODE_ENVIRON)
6603

    
6604
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6605
    if (int(ei_version) != constants.EXPORT_VERSION):
6606
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6607
                                 (ei_version, constants.EXPORT_VERSION),
6608
                                 errors.ECODE_ENVIRON)
6609
    return export_info
6610

    
6611
  def _ReadExportParams(self, einfo):
6612
    """Use export parameters as defaults.
6613

6614
    In case the opcode doesn't specify (as in override) some instance
6615
    parameters, then try to use them from the export information, if
6616
    that declares them.
6617

6618
    """
6619
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6620

    
6621
    if self.op.disk_template is None:
6622
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6623
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6624
                                          "disk_template")
6625
      else:
6626
        raise errors.OpPrereqError("No disk template specified and the export"
6627
                                   " is missing the disk_template information",
6628
                                   errors.ECODE_INVAL)
6629

    
6630
    if not self.op.disks:
6631
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6632
        disks = []
6633
        # TODO: import the disk iv_name too
6634
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6635
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6636
          disks.append({"size": disk_sz})
6637
        self.op.disks = disks
6638
      else:
6639
        raise errors.OpPrereqError("No disk info specified and the export"
6640
                                   " is missing the disk information",
6641
                                   errors.ECODE_INVAL)
6642

    
6643
    if (not self.op.nics and
6644
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6645
      nics = []
6646
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6647
        ndict = {}
6648
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6649
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6650
          ndict[name] = v
6651
        nics.append(ndict)
6652
      self.op.nics = nics
6653

    
6654
    if (self.op.hypervisor is None and
6655
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6656
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6657
    if einfo.has_section(constants.INISECT_HYP):
6658
      # use the export parameters but do not override the ones
6659
      # specified by the user
6660
      for name, value in einfo.items(constants.INISECT_HYP):
6661
        if name not in self.op.hvparams:
6662
          self.op.hvparams[name] = value
6663

    
6664
    if einfo.has_section(constants.INISECT_BEP):
6665
      # use the parameters, without overriding
6666
      for name, value in einfo.items(constants.INISECT_BEP):
6667
        if name not in self.op.beparams:
6668
          self.op.beparams[name] = value
6669
    else:
6670
      # try to read the parameters old style, from the main section
6671
      for name in constants.BES_PARAMETERS:
6672
        if (name not in self.op.beparams and
6673
            einfo.has_option(constants.INISECT_INS, name)):
6674
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6675

    
6676
    if einfo.has_section(constants.INISECT_OSP):
6677
      # use the parameters, without overriding
6678
      for name, value in einfo.items(constants.INISECT_OSP):
6679
        if name not in self.op.osparams:
6680
          self.op.osparams[name] = value
6681

    
6682
  def _RevertToDefaults(self, cluster):
6683
    """Revert the instance parameters to the default values.
6684

6685
    """
6686
    # hvparams
6687
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6688
    for name in self.op.hvparams.keys():
6689
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6690
        del self.op.hvparams[name]
6691
    # beparams
6692
    be_defs = cluster.SimpleFillBE({})
6693
    for name in self.op.beparams.keys():
6694
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6695
        del self.op.beparams[name]
6696
    # nic params
6697
    nic_defs = cluster.SimpleFillNIC({})
6698
    for nic in self.op.nics:
6699
      for name in constants.NICS_PARAMETERS:
6700
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6701
          del nic[name]
6702
    # osparams
6703
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6704
    for name in self.op.osparams.keys():
6705
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
6706
        del self.op.osparams[name]
6707

    
6708
  def CheckPrereq(self):
6709
    """Check prerequisites.
6710

6711
    """
6712
    if self.op.mode == constants.INSTANCE_IMPORT:
6713
      export_info = self._ReadExportInfo()
6714
      self._ReadExportParams(export_info)
6715

    
6716
    _CheckDiskTemplate(self.op.disk_template)
6717

    
6718
    if (not self.cfg.GetVGName() and
6719
        self.op.disk_template not in constants.DTS_NOT_LVM):
6720
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6721
                                 " instances", errors.ECODE_STATE)
6722

    
6723
    if self.op.hypervisor is None:
6724
      self.op.hypervisor = self.cfg.GetHypervisorType()
6725

    
6726
    cluster = self.cfg.GetClusterInfo()
6727
    enabled_hvs = cluster.enabled_hypervisors
6728
    if self.op.hypervisor not in enabled_hvs:
6729
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6730
                                 " cluster (%s)" % (self.op.hypervisor,
6731
                                  ",".join(enabled_hvs)),
6732
                                 errors.ECODE_STATE)
6733

    
6734
    # check hypervisor parameter syntax (locally)
6735
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6736
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6737
                                      self.op.hvparams)
6738
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6739
    hv_type.CheckParameterSyntax(filled_hvp)
6740
    self.hv_full = filled_hvp
6741
    # check that we don't specify global parameters on an instance
6742
    _CheckGlobalHvParams(self.op.hvparams)
6743

    
6744
    # fill and remember the beparams dict
6745
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6746
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
6747

    
6748
    # build os parameters
6749
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6750

    
6751
    # now that hvp/bep are in final format, let's reset to defaults,
6752
    # if told to do so
6753
    if self.op.identify_defaults:
6754
      self._RevertToDefaults(cluster)
6755

    
6756
    # NIC buildup
6757
    self.nics = []
6758
    for idx, nic in enumerate(self.op.nics):
6759
      nic_mode_req = nic.get("mode", None)
6760
      nic_mode = nic_mode_req
6761
      if nic_mode is None:
6762
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6763

    
6764
      # in routed mode, for the first nic, the default ip is 'auto'
6765
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6766
        default_ip_mode = constants.VALUE_AUTO
6767
      else:
6768
        default_ip_mode = constants.VALUE_NONE
6769

    
6770
      # ip validity checks
6771
      ip = nic.get("ip", default_ip_mode)
6772
      if ip is None or ip.lower() == constants.VALUE_NONE:
6773
        nic_ip = None
6774
      elif ip.lower() == constants.VALUE_AUTO:
6775
        if not self.op.name_check:
6776
          raise errors.OpPrereqError("IP address set to auto but name checks"
6777
                                     " have been skipped. Aborting.",
6778
                                     errors.ECODE_INVAL)
6779
        nic_ip = self.hostname1.ip
6780
      else:
6781
        if not utils.IsValidIP(ip):
6782
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6783
                                     " like a valid IP" % ip,
6784
                                     errors.ECODE_INVAL)
6785
        nic_ip = ip
6786

    
6787
      # TODO: check the ip address for uniqueness
6788
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6789
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6790
                                   errors.ECODE_INVAL)
6791

    
6792
      # MAC address verification
6793
      mac = nic.get("mac", constants.VALUE_AUTO)
6794
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6795
        mac = utils.NormalizeAndValidateMac(mac)
6796

    
6797
        try:
6798
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6799
        except errors.ReservationError:
6800
          raise errors.OpPrereqError("MAC address %s already in use"
6801
                                     " in cluster" % mac,
6802
                                     errors.ECODE_NOTUNIQUE)
6803

    
6804
      # bridge verification
6805
      bridge = nic.get("bridge", None)
6806
      link = nic.get("link", None)
6807
      if bridge and link:
6808
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6809
                                   " at the same time", errors.ECODE_INVAL)
6810
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6811
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6812
                                   errors.ECODE_INVAL)
6813
      elif bridge:
6814
        link = bridge
6815

    
6816
      nicparams = {}
6817
      if nic_mode_req:
6818
        nicparams[constants.NIC_MODE] = nic_mode_req
6819
      if link:
6820
        nicparams[constants.NIC_LINK] = link
6821

    
6822
      check_params = cluster.SimpleFillNIC(nicparams)
6823
      objects.NIC.CheckParameterSyntax(check_params)
6824
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6825

    
6826
    # disk checks/pre-build
6827
    self.disks = []
6828
    for disk in self.op.disks:
6829
      mode = disk.get("mode", constants.DISK_RDWR)
6830
      if mode not in constants.DISK_ACCESS_SET:
6831
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6832
                                   mode, errors.ECODE_INVAL)
6833
      size = disk.get("size", None)
6834
      if size is None:
6835
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6836
      try:
6837
        size = int(size)
6838
      except (TypeError, ValueError):
6839
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6840
                                   errors.ECODE_INVAL)
6841
      new_disk = {"size": size, "mode": mode}
6842
      if "adopt" in disk:
6843
        new_disk["adopt"] = disk["adopt"]
6844
      self.disks.append(new_disk)
6845

    
6846
    if self.op.mode == constants.INSTANCE_IMPORT:
6847

    
6848
      # Check that the new instance doesn't have less disks than the export
6849
      instance_disks = len(self.disks)
6850
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6851
      if instance_disks < export_disks:
6852
        raise errors.OpPrereqError("Not enough disks to import."
6853
                                   " (instance: %d, export: %d)" %
6854
                                   (instance_disks, export_disks),
6855
                                   errors.ECODE_INVAL)
6856

    
6857
      disk_images = []
6858
      for idx in range(export_disks):
6859
        option = 'disk%d_dump' % idx
6860
        if export_info.has_option(constants.INISECT_INS, option):
6861
          # FIXME: are the old os-es, disk sizes, etc. useful?
6862
          export_name = export_info.get(constants.INISECT_INS, option)
6863
          image = utils.PathJoin(self.op.src_path, export_name)
6864
          disk_images.append(image)
6865
        else:
6866
          disk_images.append(False)
6867

    
6868
      self.src_images = disk_images
6869

    
6870
      old_name = export_info.get(constants.INISECT_INS, 'name')
6871
      try:
6872
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6873
      except (TypeError, ValueError), err:
6874
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
6875
                                   " an integer: %s" % str(err),
6876
                                   errors.ECODE_STATE)
6877
      if self.op.instance_name == old_name:
6878
        for idx, nic in enumerate(self.nics):
6879
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6880
            nic_mac_ini = 'nic%d_mac' % idx
6881
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6882

    
6883
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6884

    
6885
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6886
    if self.op.ip_check:
6887
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6888
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6889
                                   (self.check_ip, self.op.instance_name),
6890
                                   errors.ECODE_NOTUNIQUE)
6891

    
6892
    #### mac address generation
6893
    # By generating here the mac address both the allocator and the hooks get
6894
    # the real final mac address rather than the 'auto' or 'generate' value.
6895
    # There is a race condition between the generation and the instance object
6896
    # creation, which means that we know the mac is valid now, but we're not
6897
    # sure it will be when we actually add the instance. If things go bad
6898
    # adding the instance will abort because of a duplicate mac, and the
6899
    # creation job will fail.
6900
    for nic in self.nics:
6901
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6902
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6903

    
6904
    #### allocator run
6905

    
6906
    if self.op.iallocator is not None:
6907
      self._RunAllocator()
6908

    
6909
    #### node related checks
6910

    
6911
    # check primary node
6912
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6913
    assert self.pnode is not None, \
6914
      "Cannot retrieve locked node %s" % self.op.pnode
6915
    if pnode.offline:
6916
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6917
                                 pnode.name, errors.ECODE_STATE)
6918
    if pnode.drained:
6919
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6920
                                 pnode.name, errors.ECODE_STATE)
6921

    
6922
    self.secondaries = []
6923

    
6924
    # mirror node verification
6925
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6926
      if self.op.snode is None:
6927
        raise errors.OpPrereqError("The networked disk templates need"
6928
                                   " a mirror node", errors.ECODE_INVAL)
6929
      if self.op.snode == pnode.name:
6930
        raise errors.OpPrereqError("The secondary node cannot be the"
6931
                                   " primary node.", errors.ECODE_INVAL)
6932
      _CheckNodeOnline(self, self.op.snode)
6933
      _CheckNodeNotDrained(self, self.op.snode)
6934
      self.secondaries.append(self.op.snode)
6935

    
6936
    nodenames = [pnode.name] + self.secondaries
6937

    
6938
    req_size = _ComputeDiskSize(self.op.disk_template,
6939
                                self.disks)
6940

    
6941
    # Check lv size requirements, if not adopting
6942
    if req_size is not None and not self.adopt_disks:
6943
      _CheckNodesFreeDisk(self, nodenames, req_size)
6944

    
6945
    if self.adopt_disks: # instead, we must check the adoption data
6946
      all_lvs = set([i["adopt"] for i in self.disks])
6947
      if len(all_lvs) != len(self.disks):
6948
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6949
                                   errors.ECODE_INVAL)
6950
      for lv_name in all_lvs:
6951
        try:
6952
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6953
        except errors.ReservationError:
6954
          raise errors.OpPrereqError("LV named %s used by another instance" %
6955
                                     lv_name, errors.ECODE_NOTUNIQUE)
6956

    
6957
      node_lvs = self.rpc.call_lv_list([pnode.name],
6958
                                       self.cfg.GetVGName())[pnode.name]
6959
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6960
      node_lvs = node_lvs.payload
6961
      delta = all_lvs.difference(node_lvs.keys())
6962
      if delta:
6963
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6964
                                   utils.CommaJoin(delta),
6965
                                   errors.ECODE_INVAL)
6966
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6967
      if online_lvs:
6968
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6969
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6970
                                   errors.ECODE_STATE)
6971
      # update the size of disk based on what is found
6972
      for dsk in self.disks:
6973
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6974

    
6975
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6976

    
6977
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6978
    # check OS parameters (remotely)
6979
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
6980

    
6981
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6982

    
6983
    # memory check on primary node
6984
    if self.op.start:
6985
      _CheckNodeFreeMemory(self, self.pnode.name,
6986
                           "creating instance %s" % self.op.instance_name,
6987
                           self.be_full[constants.BE_MEMORY],
6988
                           self.op.hypervisor)
6989

    
6990
    self.dry_run_result = list(nodenames)
6991

    
6992
  def Exec(self, feedback_fn):
6993
    """Create and add the instance to the cluster.
6994

6995
    """
6996
    instance = self.op.instance_name
6997
    pnode_name = self.pnode.name
6998

    
6999
    ht_kind = self.op.hypervisor
7000
    if ht_kind in constants.HTS_REQ_PORT:
7001
      network_port = self.cfg.AllocatePort()
7002
    else:
7003
      network_port = None
7004

    
7005
    if constants.ENABLE_FILE_STORAGE:
7006
      # this is needed because os.path.join does not accept None arguments
7007
      if self.op.file_storage_dir is None:
7008
        string_file_storage_dir = ""
7009
      else:
7010
        string_file_storage_dir = self.op.file_storage_dir
7011

    
7012
      # build the full file storage dir path
7013
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7014
                                        string_file_storage_dir, instance)
7015
    else:
7016
      file_storage_dir = ""
7017

    
7018
    disks = _GenerateDiskTemplate(self,
7019
                                  self.op.disk_template,
7020
                                  instance, pnode_name,
7021
                                  self.secondaries,
7022
                                  self.disks,
7023
                                  file_storage_dir,
7024
                                  self.op.file_driver,
7025
                                  0)
7026

    
7027
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7028
                            primary_node=pnode_name,
7029
                            nics=self.nics, disks=disks,
7030
                            disk_template=self.op.disk_template,
7031
                            admin_up=False,
7032
                            network_port=network_port,
7033
                            beparams=self.op.beparams,
7034
                            hvparams=self.op.hvparams,
7035
                            hypervisor=self.op.hypervisor,
7036
                            osparams=self.op.osparams,
7037
                            )
7038

    
7039
    if self.adopt_disks:
7040
      # rename LVs to the newly-generated names; we need to construct
7041
      # 'fake' LV disks with the old data, plus the new unique_id
7042
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7043
      rename_to = []
7044
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7045
        rename_to.append(t_dsk.logical_id)
7046
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7047
        self.cfg.SetDiskID(t_dsk, pnode_name)
7048
      result = self.rpc.call_blockdev_rename(pnode_name,
7049
                                             zip(tmp_disks, rename_to))
7050
      result.Raise("Failed to rename adoped LVs")
7051
    else:
7052
      feedback_fn("* creating instance disks...")
7053
      try:
7054
        _CreateDisks(self, iobj)
7055
      except errors.OpExecError:
7056
        self.LogWarning("Device creation failed, reverting...")
7057
        try:
7058
          _RemoveDisks(self, iobj)
7059
        finally:
7060
          self.cfg.ReleaseDRBDMinors(instance)
7061
          raise
7062

    
7063
    feedback_fn("adding instance %s to cluster config" % instance)
7064

    
7065
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7066

    
7067
    # Declare that we don't want to remove the instance lock anymore, as we've
7068
    # added the instance to the config
7069
    del self.remove_locks[locking.LEVEL_INSTANCE]
7070
    # Unlock all the nodes
7071
    if self.op.mode == constants.INSTANCE_IMPORT:
7072
      nodes_keep = [self.op.src_node]
7073
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7074
                       if node != self.op.src_node]
7075
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7076
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7077
    else:
7078
      self.context.glm.release(locking.LEVEL_NODE)
7079
      del self.acquired_locks[locking.LEVEL_NODE]
7080

    
7081
    if self.op.wait_for_sync:
7082
      disk_abort = not _WaitForSync(self, iobj)
7083
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7084
      # make sure the disks are not degraded (still sync-ing is ok)
7085
      time.sleep(15)
7086
      feedback_fn("* checking mirrors status")
7087
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7088
    else:
7089
      disk_abort = False
7090

    
7091
    if disk_abort:
7092
      _RemoveDisks(self, iobj)
7093
      self.cfg.RemoveInstance(iobj.name)
7094
      # Make sure the instance lock gets removed
7095
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7096
      raise errors.OpExecError("There are some degraded disks for"
7097
                               " this instance")
7098

    
7099
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7100
      if self.op.mode == constants.INSTANCE_CREATE:
7101
        if not self.op.no_install:
7102
          feedback_fn("* running the instance OS create scripts...")
7103
          # FIXME: pass debug option from opcode to backend
7104
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7105
                                                 self.op.debug_level)
7106
          result.Raise("Could not add os for instance %s"
7107
                       " on node %s" % (instance, pnode_name))
7108

    
7109
      elif self.op.mode == constants.INSTANCE_IMPORT:
7110
        feedback_fn("* running the instance OS import scripts...")
7111

    
7112
        transfers = []
7113

    
7114
        for idx, image in enumerate(self.src_images):
7115
          if not image:
7116
            continue
7117

    
7118
          # FIXME: pass debug option from opcode to backend
7119
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7120
                                             constants.IEIO_FILE, (image, ),
7121
                                             constants.IEIO_SCRIPT,
7122
                                             (iobj.disks[idx], idx),
7123
                                             None)
7124
          transfers.append(dt)
7125

    
7126
        import_result = \
7127
          masterd.instance.TransferInstanceData(self, feedback_fn,
7128
                                                self.op.src_node, pnode_name,
7129
                                                self.pnode.secondary_ip,
7130
                                                iobj, transfers)
7131
        if not compat.all(import_result):
7132
          self.LogWarning("Some disks for instance %s on node %s were not"
7133
                          " imported successfully" % (instance, pnode_name))
7134

    
7135
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7136
        feedback_fn("* preparing remote import...")
7137
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
7138
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7139

    
7140
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7141
                                                     self.source_x509_ca,
7142
                                                     self._cds, timeouts)
7143
        if not compat.all(disk_results):
7144
          # TODO: Should the instance still be started, even if some disks
7145
          # failed to import (valid for local imports, too)?
7146
          self.LogWarning("Some disks for instance %s on node %s were not"
7147
                          " imported successfully" % (instance, pnode_name))
7148

    
7149
        # Run rename script on newly imported instance
7150
        assert iobj.name == instance
7151
        feedback_fn("Running rename script for %s" % instance)
7152
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7153
                                                   self.source_instance_name,
7154
                                                   self.op.debug_level)
7155
        if result.fail_msg:
7156
          self.LogWarning("Failed to run rename script for %s on node"
7157
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7158

    
7159
      else:
7160
        # also checked in the prereq part
7161
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7162
                                     % self.op.mode)
7163

    
7164
    if self.op.start:
7165
      iobj.admin_up = True
7166
      self.cfg.Update(iobj, feedback_fn)
7167
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7168
      feedback_fn("* starting instance...")
7169
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7170
      result.Raise("Could not start instance")
7171

    
7172
    return list(iobj.all_nodes)
7173

    
7174

    
7175
class LUConnectConsole(NoHooksLU):
7176
  """Connect to an instance's console.
7177

7178
  This is somewhat special in that it returns the command line that
7179
  you need to run on the master node in order to connect to the
7180
  console.
7181

7182
  """
7183
  _OP_REQP = ["instance_name"]
7184
  REQ_BGL = False
7185

    
7186
  def ExpandNames(self):
7187
    self._ExpandAndLockInstance()
7188

    
7189
  def CheckPrereq(self):
7190
    """Check prerequisites.
7191

7192
    This checks that the instance is in the cluster.
7193

7194
    """
7195
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7196
    assert self.instance is not None, \
7197
      "Cannot retrieve locked instance %s" % self.op.instance_name
7198
    _CheckNodeOnline(self, self.instance.primary_node)
7199

    
7200
  def Exec(self, feedback_fn):
7201
    """Connect to the console of an instance
7202

7203
    """
7204
    instance = self.instance
7205
    node = instance.primary_node
7206

    
7207
    node_insts = self.rpc.call_instance_list([node],
7208
                                             [instance.hypervisor])[node]
7209
    node_insts.Raise("Can't get node information from %s" % node)
7210

    
7211
    if instance.name not in node_insts.payload:
7212
      raise errors.OpExecError("Instance %s is not running." % instance.name)
7213

    
7214
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7215

    
7216
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7217
    cluster = self.cfg.GetClusterInfo()
7218
    # beparams and hvparams are passed separately, to avoid editing the
7219
    # instance and then saving the defaults in the instance itself.
7220
    hvparams = cluster.FillHV(instance)
7221
    beparams = cluster.FillBE(instance)
7222
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7223

    
7224
    # build ssh cmdline
7225
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7226

    
7227

    
7228
class LUReplaceDisks(LogicalUnit):
7229
  """Replace the disks of an instance.
7230

7231
  """
7232
  HPATH = "mirrors-replace"
7233
  HTYPE = constants.HTYPE_INSTANCE
7234
  _OP_REQP = ["instance_name", "mode", "disks"]
7235
  REQ_BGL = False
7236

    
7237
  def CheckArguments(self):
7238
    if not hasattr(self.op, "remote_node"):
7239
      self.op.remote_node = None
7240
    if not hasattr(self.op, "iallocator"):
7241
      self.op.iallocator = None
7242
    if not hasattr(self.op, "early_release"):
7243
      self.op.early_release = False
7244

    
7245
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7246
                                  self.op.iallocator)
7247

    
7248
  def ExpandNames(self):
7249
    self._ExpandAndLockInstance()
7250

    
7251
    if self.op.iallocator is not None:
7252
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7253

    
7254
    elif self.op.remote_node is not None:
7255
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7256
      self.op.remote_node = remote_node
7257

    
7258
      # Warning: do not remove the locking of the new secondary here
7259
      # unless DRBD8.AddChildren is changed to work in parallel;
7260
      # currently it doesn't since parallel invocations of
7261
      # FindUnusedMinor will conflict
7262
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7263
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7264

    
7265
    else:
7266
      self.needed_locks[locking.LEVEL_NODE] = []
7267
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7268

    
7269
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7270
                                   self.op.iallocator, self.op.remote_node,
7271
                                   self.op.disks, False, self.op.early_release)
7272

    
7273
    self.tasklets = [self.replacer]
7274

    
7275
  def DeclareLocks(self, level):
7276
    # If we're not already locking all nodes in the set we have to declare the
7277
    # instance's primary/secondary nodes.
7278
    if (level == locking.LEVEL_NODE and
7279
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7280
      self._LockInstancesNodes()
7281

    
7282
  def BuildHooksEnv(self):
7283
    """Build hooks env.
7284

7285
    This runs on the master, the primary and all the secondaries.
7286

7287
    """
7288
    instance = self.replacer.instance
7289
    env = {
7290
      "MODE": self.op.mode,
7291
      "NEW_SECONDARY": self.op.remote_node,
7292
      "OLD_SECONDARY": instance.secondary_nodes[0],
7293
      }
7294
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7295
    nl = [
7296
      self.cfg.GetMasterNode(),
7297
      instance.primary_node,
7298
      ]
7299
    if self.op.remote_node is not None:
7300
      nl.append(self.op.remote_node)
7301
    return env, nl, nl
7302

    
7303

    
7304
class LUEvacuateNode(LogicalUnit):
7305
  """Relocate the secondary instances from a node.
7306

7307
  """
7308
  HPATH = "node-evacuate"
7309
  HTYPE = constants.HTYPE_NODE
7310
  _OP_REQP = ["node_name"]
7311
  REQ_BGL = False
7312

    
7313
  def CheckArguments(self):
7314
    if not hasattr(self.op, "remote_node"):
7315
      self.op.remote_node = None
7316
    if not hasattr(self.op, "iallocator"):
7317
      self.op.iallocator = None
7318
    if not hasattr(self.op, "early_release"):
7319
      self.op.early_release = False
7320

    
7321
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
7322
                                  self.op.remote_node,
7323
                                  self.op.iallocator)
7324

    
7325
  def ExpandNames(self):
7326
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7327

    
7328
    self.needed_locks = {}
7329

    
7330
    # Declare node locks
7331
    if self.op.iallocator is not None:
7332
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7333

    
7334
    elif self.op.remote_node is not None:
7335
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7336

    
7337
      # Warning: do not remove the locking of the new secondary here
7338
      # unless DRBD8.AddChildren is changed to work in parallel;
7339
      # currently it doesn't since parallel invocations of
7340
      # FindUnusedMinor will conflict
7341
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7342
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7343

    
7344
    else:
7345
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7346

    
7347
    # Create tasklets for replacing disks for all secondary instances on this
7348
    # node
7349
    names = []
7350
    tasklets = []
7351

    
7352
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7353
      logging.debug("Replacing disks for instance %s", inst.name)
7354
      names.append(inst.name)
7355

    
7356
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7357
                                self.op.iallocator, self.op.remote_node, [],
7358
                                True, self.op.early_release)
7359
      tasklets.append(replacer)
7360

    
7361
    self.tasklets = tasklets
7362
    self.instance_names = names
7363

    
7364
    # Declare instance locks
7365
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7366

    
7367
  def DeclareLocks(self, level):
7368
    # If we're not already locking all nodes in the set we have to declare the
7369
    # instance's primary/secondary nodes.
7370
    if (level == locking.LEVEL_NODE and
7371
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7372
      self._LockInstancesNodes()
7373

    
7374
  def BuildHooksEnv(self):
7375
    """Build hooks env.
7376

7377
    This runs on the master, the primary and all the secondaries.
7378

7379
    """
7380
    env = {
7381
      "NODE_NAME": self.op.node_name,
7382
      }
7383

    
7384
    nl = [self.cfg.GetMasterNode()]
7385

    
7386
    if self.op.remote_node is not None:
7387
      env["NEW_SECONDARY"] = self.op.remote_node
7388
      nl.append(self.op.remote_node)
7389

    
7390
    return (env, nl, nl)
7391

    
7392

    
7393
class TLReplaceDisks(Tasklet):
7394
  """Replaces disks for an instance.
7395

7396
  Note: Locking is not within the scope of this class.
7397

7398
  """
7399
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7400
               disks, delay_iallocator, early_release):
7401
    """Initializes this class.
7402

7403
    """
7404
    Tasklet.__init__(self, lu)
7405

    
7406
    # Parameters
7407
    self.instance_name = instance_name
7408
    self.mode = mode
7409
    self.iallocator_name = iallocator_name
7410
    self.remote_node = remote_node
7411
    self.disks = disks
7412
    self.delay_iallocator = delay_iallocator
7413
    self.early_release = early_release
7414

    
7415
    # Runtime data
7416
    self.instance = None
7417
    self.new_node = None
7418
    self.target_node = None
7419
    self.other_node = None
7420
    self.remote_node_info = None
7421
    self.node_secondary_ip = None
7422

    
7423
  @staticmethod
7424
  def CheckArguments(mode, remote_node, iallocator):
7425
    """Helper function for users of this class.
7426

7427
    """
7428
    # check for valid parameter combination
7429
    if mode == constants.REPLACE_DISK_CHG:
7430
      if remote_node is None and iallocator is None:
7431
        raise errors.OpPrereqError("When changing the secondary either an"
7432
                                   " iallocator script must be used or the"
7433
                                   " new node given", errors.ECODE_INVAL)
7434

    
7435
      if remote_node is not None and iallocator is not None:
7436
        raise errors.OpPrereqError("Give either the iallocator or the new"
7437
                                   " secondary, not both", errors.ECODE_INVAL)
7438

    
7439
    elif remote_node is not None or iallocator is not None:
7440
      # Not replacing the secondary
7441
      raise errors.OpPrereqError("The iallocator and new node options can"
7442
                                 " only be used when changing the"
7443
                                 " secondary node", errors.ECODE_INVAL)
7444

    
7445
  @staticmethod
7446
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7447
    """Compute a new secondary node using an IAllocator.
7448

7449
    """
7450
    ial = IAllocator(lu.cfg, lu.rpc,
7451
                     mode=constants.IALLOCATOR_MODE_RELOC,
7452
                     name=instance_name,
7453
                     relocate_from=relocate_from)
7454

    
7455
    ial.Run(iallocator_name)
7456

    
7457
    if not ial.success:
7458
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7459
                                 " %s" % (iallocator_name, ial.info),
7460
                                 errors.ECODE_NORES)
7461

    
7462
    if len(ial.result) != ial.required_nodes:
7463
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7464
                                 " of nodes (%s), required %s" %
7465
                                 (iallocator_name,
7466
                                  len(ial.result), ial.required_nodes),
7467
                                 errors.ECODE_FAULT)
7468

    
7469
    remote_node_name = ial.result[0]
7470

    
7471
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7472
               instance_name, remote_node_name)
7473

    
7474
    return remote_node_name
7475

    
7476
  def _FindFaultyDisks(self, node_name):
7477
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7478
                                    node_name, True)
7479

    
7480
  def CheckPrereq(self):
7481
    """Check prerequisites.
7482

7483
    This checks that the instance is in the cluster.
7484

7485
    """
7486
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7487
    assert instance is not None, \
7488
      "Cannot retrieve locked instance %s" % self.instance_name
7489

    
7490
    if instance.disk_template != constants.DT_DRBD8:
7491
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7492
                                 " instances", errors.ECODE_INVAL)
7493

    
7494
    if len(instance.secondary_nodes) != 1:
7495
      raise errors.OpPrereqError("The instance has a strange layout,"
7496
                                 " expected one secondary but found %d" %
7497
                                 len(instance.secondary_nodes),
7498
                                 errors.ECODE_FAULT)
7499

    
7500
    if not self.delay_iallocator:
7501
      self._CheckPrereq2()
7502

    
7503
  def _CheckPrereq2(self):
7504
    """Check prerequisites, second part.
7505

7506
    This function should always be part of CheckPrereq. It was separated and is
7507
    now called from Exec because during node evacuation iallocator was only
7508
    called with an unmodified cluster model, not taking planned changes into
7509
    account.
7510

7511
    """
7512
    instance = self.instance
7513
    secondary_node = instance.secondary_nodes[0]
7514

    
7515
    if self.iallocator_name is None:
7516
      remote_node = self.remote_node
7517
    else:
7518
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7519
                                       instance.name, instance.secondary_nodes)
7520

    
7521
    if remote_node is not None:
7522
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7523
      assert self.remote_node_info is not None, \
7524
        "Cannot retrieve locked node %s" % remote_node
7525
    else:
7526
      self.remote_node_info = None
7527

    
7528
    if remote_node == self.instance.primary_node:
7529
      raise errors.OpPrereqError("The specified node is the primary node of"
7530
                                 " the instance.", errors.ECODE_INVAL)
7531

    
7532
    if remote_node == secondary_node:
7533
      raise errors.OpPrereqError("The specified node is already the"
7534
                                 " secondary node of the instance.",
7535
                                 errors.ECODE_INVAL)
7536

    
7537
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7538
                                    constants.REPLACE_DISK_CHG):
7539
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7540
                                 errors.ECODE_INVAL)
7541

    
7542
    if self.mode == constants.REPLACE_DISK_AUTO:
7543
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7544
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7545

    
7546
      if faulty_primary and faulty_secondary:
7547
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7548
                                   " one node and can not be repaired"
7549
                                   " automatically" % self.instance_name,
7550
                                   errors.ECODE_STATE)
7551

    
7552
      if faulty_primary:
7553
        self.disks = faulty_primary
7554
        self.target_node = instance.primary_node
7555
        self.other_node = secondary_node
7556
        check_nodes = [self.target_node, self.other_node]
7557
      elif faulty_secondary:
7558
        self.disks = faulty_secondary
7559
        self.target_node = secondary_node
7560
        self.other_node = instance.primary_node
7561
        check_nodes = [self.target_node, self.other_node]
7562
      else:
7563
        self.disks = []
7564
        check_nodes = []
7565

    
7566
    else:
7567
      # Non-automatic modes
7568
      if self.mode == constants.REPLACE_DISK_PRI:
7569
        self.target_node = instance.primary_node
7570
        self.other_node = secondary_node
7571
        check_nodes = [self.target_node, self.other_node]
7572

    
7573
      elif self.mode == constants.REPLACE_DISK_SEC:
7574
        self.target_node = secondary_node
7575
        self.other_node = instance.primary_node
7576
        check_nodes = [self.target_node, self.other_node]
7577

    
7578
      elif self.mode == constants.REPLACE_DISK_CHG:
7579
        self.new_node = remote_node
7580
        self.other_node = instance.primary_node
7581
        self.target_node = secondary_node
7582
        check_nodes = [self.new_node, self.other_node]
7583

    
7584
        _CheckNodeNotDrained(self.lu, remote_node)
7585

    
7586
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7587
        assert old_node_info is not None
7588
        if old_node_info.offline and not self.early_release:
7589
          # doesn't make sense to delay the release
7590
          self.early_release = True
7591
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7592
                          " early-release mode", secondary_node)
7593

    
7594
      else:
7595
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7596
                                     self.mode)
7597

    
7598
      # If not specified all disks should be replaced
7599
      if not self.disks:
7600
        self.disks = range(len(self.instance.disks))
7601

    
7602
    for node in check_nodes:
7603
      _CheckNodeOnline(self.lu, node)
7604

    
7605
    # Check whether disks are valid
7606
    for disk_idx in self.disks:
7607
      instance.FindDisk(disk_idx)
7608

    
7609
    # Get secondary node IP addresses
7610
    node_2nd_ip = {}
7611

    
7612
    for node_name in [self.target_node, self.other_node, self.new_node]:
7613
      if node_name is not None:
7614
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7615

    
7616
    self.node_secondary_ip = node_2nd_ip
7617

    
7618
  def Exec(self, feedback_fn):
7619
    """Execute disk replacement.
7620

7621
    This dispatches the disk replacement to the appropriate handler.
7622

7623
    """
7624
    if self.delay_iallocator:
7625
      self._CheckPrereq2()
7626

    
7627
    if not self.disks:
7628
      feedback_fn("No disks need replacement")
7629
      return
7630

    
7631
    feedback_fn("Replacing disk(s) %s for %s" %
7632
                (utils.CommaJoin(self.disks), self.instance.name))
7633

    
7634
    activate_disks = (not self.instance.admin_up)
7635

    
7636
    # Activate the instance disks if we're replacing them on a down instance
7637
    if activate_disks:
7638
      _StartInstanceDisks(self.lu, self.instance, True)
7639

    
7640
    try:
7641
      # Should we replace the secondary node?
7642
      if self.new_node is not None:
7643
        fn = self._ExecDrbd8Secondary
7644
      else:
7645
        fn = self._ExecDrbd8DiskOnly
7646

    
7647
      return fn(feedback_fn)
7648

    
7649
    finally:
7650
      # Deactivate the instance disks if we're replacing them on a
7651
      # down instance
7652
      if activate_disks:
7653
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7654

    
7655
  def _CheckVolumeGroup(self, nodes):
7656
    self.lu.LogInfo("Checking volume groups")
7657

    
7658
    vgname = self.cfg.GetVGName()
7659

    
7660
    # Make sure volume group exists on all involved nodes
7661
    results = self.rpc.call_vg_list(nodes)
7662
    if not results:
7663
      raise errors.OpExecError("Can't list volume groups on the nodes")
7664

    
7665
    for node in nodes:
7666
      res = results[node]
7667
      res.Raise("Error checking node %s" % node)
7668
      if vgname not in res.payload:
7669
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7670
                                 (vgname, node))
7671

    
7672
  def _CheckDisksExistence(self, nodes):
7673
    # Check disk existence
7674
    for idx, dev in enumerate(self.instance.disks):
7675
      if idx not in self.disks:
7676
        continue
7677

    
7678
      for node in nodes:
7679
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7680
        self.cfg.SetDiskID(dev, node)
7681

    
7682
        result = self.rpc.call_blockdev_find(node, dev)
7683

    
7684
        msg = result.fail_msg
7685
        if msg or not result.payload:
7686
          if not msg:
7687
            msg = "disk not found"
7688
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7689
                                   (idx, node, msg))
7690

    
7691
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7692
    for idx, dev in enumerate(self.instance.disks):
7693
      if idx not in self.disks:
7694
        continue
7695

    
7696
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7697
                      (idx, node_name))
7698

    
7699
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7700
                                   ldisk=ldisk):
7701
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7702
                                 " replace disks for instance %s" %
7703
                                 (node_name, self.instance.name))
7704

    
7705
  def _CreateNewStorage(self, node_name):
7706
    vgname = self.cfg.GetVGName()
7707
    iv_names = {}
7708

    
7709
    for idx, dev in enumerate(self.instance.disks):
7710
      if idx not in self.disks:
7711
        continue
7712

    
7713
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7714

    
7715
      self.cfg.SetDiskID(dev, node_name)
7716

    
7717
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7718
      names = _GenerateUniqueNames(self.lu, lv_names)
7719

    
7720
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7721
                             logical_id=(vgname, names[0]))
7722
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7723
                             logical_id=(vgname, names[1]))
7724

    
7725
      new_lvs = [lv_data, lv_meta]
7726
      old_lvs = dev.children
7727
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7728

    
7729
      # we pass force_create=True to force the LVM creation
7730
      for new_lv in new_lvs:
7731
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7732
                        _GetInstanceInfoText(self.instance), False)
7733

    
7734
    return iv_names
7735

    
7736
  def _CheckDevices(self, node_name, iv_names):
7737
    for name, (dev, _, _) in iv_names.iteritems():
7738
      self.cfg.SetDiskID(dev, node_name)
7739

    
7740
      result = self.rpc.call_blockdev_find(node_name, dev)
7741

    
7742
      msg = result.fail_msg
7743
      if msg or not result.payload:
7744
        if not msg:
7745
          msg = "disk not found"
7746
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7747
                                 (name, msg))
7748

    
7749
      if result.payload.is_degraded:
7750
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7751

    
7752
  def _RemoveOldStorage(self, node_name, iv_names):
7753
    for name, (_, old_lvs, _) in iv_names.iteritems():
7754
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7755

    
7756
      for lv in old_lvs:
7757
        self.cfg.SetDiskID(lv, node_name)
7758

    
7759
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7760
        if msg:
7761
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7762
                             hint="remove unused LVs manually")
7763

    
7764
  def _ReleaseNodeLock(self, node_name):
7765
    """Releases the lock for a given node."""
7766
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7767

    
7768
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7769
    """Replace a disk on the primary or secondary for DRBD 8.
7770

7771
    The algorithm for replace is quite complicated:
7772

7773
      1. for each disk to be replaced:
7774

7775
        1. create new LVs on the target node with unique names
7776
        1. detach old LVs from the drbd device
7777
        1. rename old LVs to name_replaced.<time_t>
7778
        1. rename new LVs to old LVs
7779
        1. attach the new LVs (with the old names now) to the drbd device
7780

7781
      1. wait for sync across all devices
7782

7783
      1. for each modified disk:
7784

7785
        1. remove old LVs (which have the name name_replaces.<time_t>)
7786

7787
    Failures are not very well handled.
7788

7789
    """
7790
    steps_total = 6
7791

    
7792
    # Step: check device activation
7793
    self.lu.LogStep(1, steps_total, "Check device existence")
7794
    self._CheckDisksExistence([self.other_node, self.target_node])
7795
    self._CheckVolumeGroup([self.target_node, self.other_node])
7796

    
7797
    # Step: check other node consistency
7798
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7799
    self._CheckDisksConsistency(self.other_node,
7800
                                self.other_node == self.instance.primary_node,
7801
                                False)
7802

    
7803
    # Step: create new storage
7804
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7805
    iv_names = self._CreateNewStorage(self.target_node)
7806

    
7807
    # Step: for each lv, detach+rename*2+attach
7808
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7809
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7810
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7811

    
7812
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7813
                                                     old_lvs)
7814
      result.Raise("Can't detach drbd from local storage on node"
7815
                   " %s for device %s" % (self.target_node, dev.iv_name))
7816
      #dev.children = []
7817
      #cfg.Update(instance)
7818

    
7819
      # ok, we created the new LVs, so now we know we have the needed
7820
      # storage; as such, we proceed on the target node to rename
7821
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7822
      # using the assumption that logical_id == physical_id (which in
7823
      # turn is the unique_id on that node)
7824

    
7825
      # FIXME(iustin): use a better name for the replaced LVs
7826
      temp_suffix = int(time.time())
7827
      ren_fn = lambda d, suff: (d.physical_id[0],
7828
                                d.physical_id[1] + "_replaced-%s" % suff)
7829

    
7830
      # Build the rename list based on what LVs exist on the node
7831
      rename_old_to_new = []
7832
      for to_ren in old_lvs:
7833
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7834
        if not result.fail_msg and result.payload:
7835
          # device exists
7836
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7837

    
7838
      self.lu.LogInfo("Renaming the old LVs on the target node")
7839
      result = self.rpc.call_blockdev_rename(self.target_node,
7840
                                             rename_old_to_new)
7841
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7842

    
7843
      # Now we rename the new LVs to the old LVs
7844
      self.lu.LogInfo("Renaming the new LVs on the target node")
7845
      rename_new_to_old = [(new, old.physical_id)
7846
                           for old, new in zip(old_lvs, new_lvs)]
7847
      result = self.rpc.call_blockdev_rename(self.target_node,
7848
                                             rename_new_to_old)
7849
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7850

    
7851
      for old, new in zip(old_lvs, new_lvs):
7852
        new.logical_id = old.logical_id
7853
        self.cfg.SetDiskID(new, self.target_node)
7854

    
7855
      for disk in old_lvs:
7856
        disk.logical_id = ren_fn(disk, temp_suffix)
7857
        self.cfg.SetDiskID(disk, self.target_node)
7858

    
7859
      # Now that the new lvs have the old name, we can add them to the device
7860
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7861
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7862
                                                  new_lvs)
7863
      msg = result.fail_msg
7864
      if msg:
7865
        for new_lv in new_lvs:
7866
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7867
                                               new_lv).fail_msg
7868
          if msg2:
7869
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7870
                               hint=("cleanup manually the unused logical"
7871
                                     "volumes"))
7872
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7873

    
7874
      dev.children = new_lvs
7875

    
7876
      self.cfg.Update(self.instance, feedback_fn)
7877

    
7878
    cstep = 5
7879
    if self.early_release:
7880
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7881
      cstep += 1
7882
      self._RemoveOldStorage(self.target_node, iv_names)
7883
      # WARNING: we release both node locks here, do not do other RPCs
7884
      # than WaitForSync to the primary node
7885
      self._ReleaseNodeLock([self.target_node, self.other_node])
7886

    
7887
    # Wait for sync
7888
    # This can fail as the old devices are degraded and _WaitForSync
7889
    # does a combined result over all disks, so we don't check its return value
7890
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7891
    cstep += 1
7892
    _WaitForSync(self.lu, self.instance)
7893

    
7894
    # Check all devices manually
7895
    self._CheckDevices(self.instance.primary_node, iv_names)
7896

    
7897
    # Step: remove old storage
7898
    if not self.early_release:
7899
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7900
      cstep += 1
7901
      self._RemoveOldStorage(self.target_node, iv_names)
7902

    
7903
  def _ExecDrbd8Secondary(self, feedback_fn):
7904
    """Replace the secondary node for DRBD 8.
7905

7906
    The algorithm for replace is quite complicated:
7907
      - for all disks of the instance:
7908
        - create new LVs on the new node with same names
7909
        - shutdown the drbd device on the old secondary
7910
        - disconnect the drbd network on the primary
7911
        - create the drbd device on the new secondary
7912
        - network attach the drbd on the primary, using an artifice:
7913
          the drbd code for Attach() will connect to the network if it
7914
          finds a device which is connected to the good local disks but
7915
          not network enabled
7916
      - wait for sync across all devices
7917
      - remove all disks from the old secondary
7918

7919
    Failures are not very well handled.
7920

7921
    """
7922
    steps_total = 6
7923

    
7924
    # Step: check device activation
7925
    self.lu.LogStep(1, steps_total, "Check device existence")
7926
    self._CheckDisksExistence([self.instance.primary_node])
7927
    self._CheckVolumeGroup([self.instance.primary_node])
7928

    
7929
    # Step: check other node consistency
7930
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7931
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7932

    
7933
    # Step: create new storage
7934
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7935
    for idx, dev in enumerate(self.instance.disks):
7936
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7937
                      (self.new_node, idx))
7938
      # we pass force_create=True to force LVM creation
7939
      for new_lv in dev.children:
7940
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7941
                        _GetInstanceInfoText(self.instance), False)
7942

    
7943
    # Step 4: dbrd minors and drbd setups changes
7944
    # after this, we must manually remove the drbd minors on both the
7945
    # error and the success paths
7946
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7947
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7948
                                         for dev in self.instance.disks],
7949
                                        self.instance.name)
7950
    logging.debug("Allocated minors %r", minors)
7951

    
7952
    iv_names = {}
7953
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7954
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7955
                      (self.new_node, idx))
7956
      # create new devices on new_node; note that we create two IDs:
7957
      # one without port, so the drbd will be activated without
7958
      # networking information on the new node at this stage, and one
7959
      # with network, for the latter activation in step 4
7960
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7961
      if self.instance.primary_node == o_node1:
7962
        p_minor = o_minor1
7963
      else:
7964
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7965
        p_minor = o_minor2
7966

    
7967
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7968
                      p_minor, new_minor, o_secret)
7969
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7970
                    p_minor, new_minor, o_secret)
7971

    
7972
      iv_names[idx] = (dev, dev.children, new_net_id)
7973
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7974
                    new_net_id)
7975
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7976
                              logical_id=new_alone_id,
7977
                              children=dev.children,
7978
                              size=dev.size)
7979
      try:
7980
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7981
                              _GetInstanceInfoText(self.instance), False)
7982
      except errors.GenericError:
7983
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7984
        raise
7985

    
7986
    # We have new devices, shutdown the drbd on the old secondary
7987
    for idx, dev in enumerate(self.instance.disks):
7988
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7989
      self.cfg.SetDiskID(dev, self.target_node)
7990
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7991
      if msg:
7992
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7993
                           "node: %s" % (idx, msg),
7994
                           hint=("Please cleanup this device manually as"
7995
                                 " soon as possible"))
7996

    
7997
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7998
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7999
                                               self.node_secondary_ip,
8000
                                               self.instance.disks)\
8001
                                              [self.instance.primary_node]
8002

    
8003
    msg = result.fail_msg
8004
    if msg:
8005
      # detaches didn't succeed (unlikely)
8006
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8007
      raise errors.OpExecError("Can't detach the disks from the network on"
8008
                               " old node: %s" % (msg,))
8009

    
8010
    # if we managed to detach at least one, we update all the disks of
8011
    # the instance to point to the new secondary
8012
    self.lu.LogInfo("Updating instance configuration")
8013
    for dev, _, new_logical_id in iv_names.itervalues():
8014
      dev.logical_id = new_logical_id
8015
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8016

    
8017
    self.cfg.Update(self.instance, feedback_fn)
8018

    
8019
    # and now perform the drbd attach
8020
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8021
                    " (standalone => connected)")
8022
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8023
                                            self.new_node],
8024
                                           self.node_secondary_ip,
8025
                                           self.instance.disks,
8026
                                           self.instance.name,
8027
                                           False)
8028
    for to_node, to_result in result.items():
8029
      msg = to_result.fail_msg
8030
      if msg:
8031
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8032
                           to_node, msg,
8033
                           hint=("please do a gnt-instance info to see the"
8034
                                 " status of disks"))
8035
    cstep = 5
8036
    if self.early_release:
8037
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8038
      cstep += 1
8039
      self._RemoveOldStorage(self.target_node, iv_names)
8040
      # WARNING: we release all node locks here, do not do other RPCs
8041
      # than WaitForSync to the primary node
8042
      self._ReleaseNodeLock([self.instance.primary_node,
8043
                             self.target_node,
8044
                             self.new_node])
8045

    
8046
    # Wait for sync
8047
    # This can fail as the old devices are degraded and _WaitForSync
8048
    # does a combined result over all disks, so we don't check its return value
8049
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8050
    cstep += 1
8051
    _WaitForSync(self.lu, self.instance)
8052

    
8053
    # Check all devices manually
8054
    self._CheckDevices(self.instance.primary_node, iv_names)
8055

    
8056
    # Step: remove old storage
8057
    if not self.early_release:
8058
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8059
      self._RemoveOldStorage(self.target_node, iv_names)
8060

    
8061

    
8062
class LURepairNodeStorage(NoHooksLU):
8063
  """Repairs the volume group on a node.
8064

8065
  """
8066
  _OP_REQP = ["node_name"]
8067
  REQ_BGL = False
8068

    
8069
  def CheckArguments(self):
8070
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8071

    
8072
    _CheckStorageType(self.op.storage_type)
8073

    
8074
  def ExpandNames(self):
8075
    self.needed_locks = {
8076
      locking.LEVEL_NODE: [self.op.node_name],
8077
      }
8078

    
8079
  def _CheckFaultyDisks(self, instance, node_name):
8080
    """Ensure faulty disks abort the opcode or at least warn."""
8081
    try:
8082
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8083
                                  node_name, True):
8084
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8085
                                   " node '%s'" % (instance.name, node_name),
8086
                                   errors.ECODE_STATE)
8087
    except errors.OpPrereqError, err:
8088
      if self.op.ignore_consistency:
8089
        self.proc.LogWarning(str(err.args[0]))
8090
      else:
8091
        raise
8092

    
8093
  def CheckPrereq(self):
8094
    """Check prerequisites.
8095

8096
    """
8097
    storage_type = self.op.storage_type
8098

    
8099
    if (constants.SO_FIX_CONSISTENCY not in
8100
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8101
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8102
                                 " repaired" % storage_type,
8103
                                 errors.ECODE_INVAL)
8104

    
8105
    # Check whether any instance on this node has faulty disks
8106
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8107
      if not inst.admin_up:
8108
        continue
8109
      check_nodes = set(inst.all_nodes)
8110
      check_nodes.discard(self.op.node_name)
8111
      for inst_node_name in check_nodes:
8112
        self._CheckFaultyDisks(inst, inst_node_name)
8113

    
8114
  def Exec(self, feedback_fn):
8115
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8116
                (self.op.name, self.op.node_name))
8117

    
8118
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8119
    result = self.rpc.call_storage_execute(self.op.node_name,
8120
                                           self.op.storage_type, st_args,
8121
                                           self.op.name,
8122
                                           constants.SO_FIX_CONSISTENCY)
8123
    result.Raise("Failed to repair storage unit '%s' on %s" %
8124
                 (self.op.name, self.op.node_name))
8125

    
8126

    
8127
class LUNodeEvacuationStrategy(NoHooksLU):
8128
  """Computes the node evacuation strategy.
8129

8130
  """
8131
  _OP_REQP = ["nodes"]
8132
  REQ_BGL = False
8133

    
8134
  def CheckArguments(self):
8135
    if not hasattr(self.op, "remote_node"):
8136
      self.op.remote_node = None
8137
    if not hasattr(self.op, "iallocator"):
8138
      self.op.iallocator = None
8139
    if self.op.remote_node is not None and self.op.iallocator is not None:
8140
      raise errors.OpPrereqError("Give either the iallocator or the new"
8141
                                 " secondary, not both", errors.ECODE_INVAL)
8142

    
8143
  def ExpandNames(self):
8144
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8145
    self.needed_locks = locks = {}
8146
    if self.op.remote_node is None:
8147
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8148
    else:
8149
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8150
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8151

    
8152
  def CheckPrereq(self):
8153
    pass
8154

    
8155
  def Exec(self, feedback_fn):
8156
    if self.op.remote_node is not None:
8157
      instances = []
8158
      for node in self.op.nodes:
8159
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8160
      result = []
8161
      for i in instances:
8162
        if i.primary_node == self.op.remote_node:
8163
          raise errors.OpPrereqError("Node %s is the primary node of"
8164
                                     " instance %s, cannot use it as"
8165
                                     " secondary" %
8166
                                     (self.op.remote_node, i.name),
8167
                                     errors.ECODE_INVAL)
8168
        result.append([i.name, self.op.remote_node])
8169
    else:
8170
      ial = IAllocator(self.cfg, self.rpc,
8171
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8172
                       evac_nodes=self.op.nodes)
8173
      ial.Run(self.op.iallocator, validate=True)
8174
      if not ial.success:
8175
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8176
                                 errors.ECODE_NORES)
8177
      result = ial.result
8178
    return result
8179

    
8180

    
8181
class LUGrowDisk(LogicalUnit):
8182
  """Grow a disk of an instance.
8183

8184
  """
8185
  HPATH = "disk-grow"
8186
  HTYPE = constants.HTYPE_INSTANCE
8187
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
8188
  REQ_BGL = False
8189

    
8190
  def ExpandNames(self):
8191
    self._ExpandAndLockInstance()
8192
    self.needed_locks[locking.LEVEL_NODE] = []
8193
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8194

    
8195
  def DeclareLocks(self, level):
8196
    if level == locking.LEVEL_NODE:
8197
      self._LockInstancesNodes()
8198

    
8199
  def BuildHooksEnv(self):
8200
    """Build hooks env.
8201

8202
    This runs on the master, the primary and all the secondaries.
8203

8204
    """
8205
    env = {
8206
      "DISK": self.op.disk,
8207
      "AMOUNT": self.op.amount,
8208
      }
8209
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8210
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8211
    return env, nl, nl
8212

    
8213
  def CheckPrereq(self):
8214
    """Check prerequisites.
8215

8216
    This checks that the instance is in the cluster.
8217

8218
    """
8219
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8220
    assert instance is not None, \
8221
      "Cannot retrieve locked instance %s" % self.op.instance_name
8222
    nodenames = list(instance.all_nodes)
8223
    for node in nodenames:
8224
      _CheckNodeOnline(self, node)
8225

    
8226

    
8227
    self.instance = instance
8228

    
8229
    if instance.disk_template not in constants.DTS_GROWABLE:
8230
      raise errors.OpPrereqError("Instance's disk layout does not support"
8231
                                 " growing.", errors.ECODE_INVAL)
8232

    
8233
    self.disk = instance.FindDisk(self.op.disk)
8234

    
8235
    if instance.disk_template != constants.DT_FILE:
8236
      # TODO: check the free disk space for file, when that feature will be
8237
      # supported
8238
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8239

    
8240
  def Exec(self, feedback_fn):
8241
    """Execute disk grow.
8242

8243
    """
8244
    instance = self.instance
8245
    disk = self.disk
8246

    
8247
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8248
    if not disks_ok:
8249
      raise errors.OpExecError("Cannot activate block device to grow")
8250

    
8251
    for node in instance.all_nodes:
8252
      self.cfg.SetDiskID(disk, node)
8253
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8254
      result.Raise("Grow request failed to node %s" % node)
8255

    
8256
      # TODO: Rewrite code to work properly
8257
      # DRBD goes into sync mode for a short amount of time after executing the
8258
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8259
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8260
      # time is a work-around.
8261
      time.sleep(5)
8262

    
8263
    disk.RecordGrow(self.op.amount)
8264
    self.cfg.Update(instance, feedback_fn)
8265
    if self.op.wait_for_sync:
8266
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8267
      if disk_abort:
8268
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8269
                             " status.\nPlease check the instance.")
8270
      if not instance.admin_up:
8271
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8272
    elif not instance.admin_up:
8273
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8274
                           " not supposed to be running because no wait for"
8275
                           " sync mode was requested.")
8276

    
8277

    
8278
class LUQueryInstanceData(NoHooksLU):
8279
  """Query runtime instance data.
8280

8281
  """
8282
  _OP_REQP = ["instances", "static"]
8283
  REQ_BGL = False
8284

    
8285
  def ExpandNames(self):
8286
    self.needed_locks = {}
8287
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8288

    
8289
    if not isinstance(self.op.instances, list):
8290
      raise errors.OpPrereqError("Invalid argument type 'instances'",
8291
                                 errors.ECODE_INVAL)
8292

    
8293
    if self.op.instances:
8294
      self.wanted_names = []
8295
      for name in self.op.instances:
8296
        full_name = _ExpandInstanceName(self.cfg, name)
8297
        self.wanted_names.append(full_name)
8298
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8299
    else:
8300
      self.wanted_names = None
8301
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8302

    
8303
    self.needed_locks[locking.LEVEL_NODE] = []
8304
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8305

    
8306
  def DeclareLocks(self, level):
8307
    if level == locking.LEVEL_NODE:
8308
      self._LockInstancesNodes()
8309

    
8310
  def CheckPrereq(self):
8311
    """Check prerequisites.
8312

8313
    This only checks the optional instance list against the existing names.
8314

8315
    """
8316
    if self.wanted_names is None:
8317
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8318

    
8319
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8320
                             in self.wanted_names]
8321
    return
8322

    
8323
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8324
    """Returns the status of a block device
8325

8326
    """
8327
    if self.op.static or not node:
8328
      return None
8329

    
8330
    self.cfg.SetDiskID(dev, node)
8331

    
8332
    result = self.rpc.call_blockdev_find(node, dev)
8333
    if result.offline:
8334
      return None
8335

    
8336
    result.Raise("Can't compute disk status for %s" % instance_name)
8337

    
8338
    status = result.payload
8339
    if status is None:
8340
      return None
8341

    
8342
    return (status.dev_path, status.major, status.minor,
8343
            status.sync_percent, status.estimated_time,
8344
            status.is_degraded, status.ldisk_status)
8345

    
8346
  def _ComputeDiskStatus(self, instance, snode, dev):
8347
    """Compute block device status.
8348

8349
    """
8350
    if dev.dev_type in constants.LDS_DRBD:
8351
      # we change the snode then (otherwise we use the one passed in)
8352
      if dev.logical_id[0] == instance.primary_node:
8353
        snode = dev.logical_id[1]
8354
      else:
8355
        snode = dev.logical_id[0]
8356

    
8357
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8358
                                              instance.name, dev)
8359
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8360

    
8361
    if dev.children:
8362
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8363
                      for child in dev.children]
8364
    else:
8365
      dev_children = []
8366

    
8367
    data = {
8368
      "iv_name": dev.iv_name,
8369
      "dev_type": dev.dev_type,
8370
      "logical_id": dev.logical_id,
8371
      "physical_id": dev.physical_id,
8372
      "pstatus": dev_pstatus,
8373
      "sstatus": dev_sstatus,
8374
      "children": dev_children,
8375
      "mode": dev.mode,
8376
      "size": dev.size,
8377
      }
8378

    
8379
    return data
8380

    
8381
  def Exec(self, feedback_fn):
8382
    """Gather and return data"""
8383
    result = {}
8384

    
8385
    cluster = self.cfg.GetClusterInfo()
8386

    
8387
    for instance in self.wanted_instances:
8388
      if not self.op.static:
8389
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8390
                                                  instance.name,
8391
                                                  instance.hypervisor)
8392
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8393
        remote_info = remote_info.payload
8394
        if remote_info and "state" in remote_info:
8395
          remote_state = "up"
8396
        else:
8397
          remote_state = "down"
8398
      else:
8399
        remote_state = None
8400
      if instance.admin_up:
8401
        config_state = "up"
8402
      else:
8403
        config_state = "down"
8404

    
8405
      disks = [self._ComputeDiskStatus(instance, None, device)
8406
               for device in instance.disks]
8407

    
8408
      idict = {
8409
        "name": instance.name,
8410
        "config_state": config_state,
8411
        "run_state": remote_state,
8412
        "pnode": instance.primary_node,
8413
        "snodes": instance.secondary_nodes,
8414
        "os": instance.os,
8415
        # this happens to be the same format used for hooks
8416
        "nics": _NICListToTuple(self, instance.nics),
8417
        "disk_template": instance.disk_template,
8418
        "disks": disks,
8419
        "hypervisor": instance.hypervisor,
8420
        "network_port": instance.network_port,
8421
        "hv_instance": instance.hvparams,
8422
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8423
        "be_instance": instance.beparams,
8424
        "be_actual": cluster.FillBE(instance),
8425
        "os_instance": instance.osparams,
8426
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8427
        "serial_no": instance.serial_no,
8428
        "mtime": instance.mtime,
8429
        "ctime": instance.ctime,
8430
        "uuid": instance.uuid,
8431
        }
8432

    
8433
      result[instance.name] = idict
8434

    
8435
    return result
8436

    
8437

    
8438
class LUSetInstanceParams(LogicalUnit):
8439
  """Modifies an instances's parameters.
8440

8441
  """
8442
  HPATH = "instance-modify"
8443
  HTYPE = constants.HTYPE_INSTANCE
8444
  _OP_REQP = ["instance_name"]
8445
  REQ_BGL = False
8446

    
8447
  def CheckArguments(self):
8448
    if not hasattr(self.op, 'nics'):
8449
      self.op.nics = []
8450
    if not hasattr(self.op, 'disks'):
8451
      self.op.disks = []
8452
    if not hasattr(self.op, 'beparams'):
8453
      self.op.beparams = {}
8454
    if not hasattr(self.op, 'hvparams'):
8455
      self.op.hvparams = {}
8456
    if not hasattr(self.op, "disk_template"):
8457
      self.op.disk_template = None
8458
    if not hasattr(self.op, "remote_node"):
8459
      self.op.remote_node = None
8460
    if not hasattr(self.op, "os_name"):
8461
      self.op.os_name = None
8462
    if not hasattr(self.op, "force_variant"):
8463
      self.op.force_variant = False
8464
    if not hasattr(self.op, "osparams"):
8465
      self.op.osparams = None
8466
    self.op.force = getattr(self.op, "force", False)
8467
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8468
            self.op.hvparams or self.op.beparams or self.op.os_name):
8469
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8470

    
8471
    if self.op.hvparams:
8472
      _CheckGlobalHvParams(self.op.hvparams)
8473

    
8474
    # Disk validation
8475
    disk_addremove = 0
8476
    for disk_op, disk_dict in self.op.disks:
8477
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8478
      if disk_op == constants.DDM_REMOVE:
8479
        disk_addremove += 1
8480
        continue
8481
      elif disk_op == constants.DDM_ADD:
8482
        disk_addremove += 1
8483
      else:
8484
        if not isinstance(disk_op, int):
8485
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8486
        if not isinstance(disk_dict, dict):
8487
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8488
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8489

    
8490
      if disk_op == constants.DDM_ADD:
8491
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8492
        if mode not in constants.DISK_ACCESS_SET:
8493
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8494
                                     errors.ECODE_INVAL)
8495
        size = disk_dict.get('size', None)
8496
        if size is None:
8497
          raise errors.OpPrereqError("Required disk parameter size missing",
8498
                                     errors.ECODE_INVAL)
8499
        try:
8500
          size = int(size)
8501
        except (TypeError, ValueError), err:
8502
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8503
                                     str(err), errors.ECODE_INVAL)
8504
        disk_dict['size'] = size
8505
      else:
8506
        # modification of disk
8507
        if 'size' in disk_dict:
8508
          raise errors.OpPrereqError("Disk size change not possible, use"
8509
                                     " grow-disk", errors.ECODE_INVAL)
8510

    
8511
    if disk_addremove > 1:
8512
      raise errors.OpPrereqError("Only one disk add or remove operation"
8513
                                 " supported at a time", errors.ECODE_INVAL)
8514

    
8515
    if self.op.disks and self.op.disk_template is not None:
8516
      raise errors.OpPrereqError("Disk template conversion and other disk"
8517
                                 " changes not supported at the same time",
8518
                                 errors.ECODE_INVAL)
8519

    
8520
    if self.op.disk_template:
8521
      _CheckDiskTemplate(self.op.disk_template)
8522
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8523
          self.op.remote_node is None):
8524
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8525
                                   " one requires specifying a secondary node",
8526
                                   errors.ECODE_INVAL)
8527

    
8528
    # NIC validation
8529
    nic_addremove = 0
8530
    for nic_op, nic_dict in self.op.nics:
8531
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8532
      if nic_op == constants.DDM_REMOVE:
8533
        nic_addremove += 1
8534
        continue
8535
      elif nic_op == constants.DDM_ADD:
8536
        nic_addremove += 1
8537
      else:
8538
        if not isinstance(nic_op, int):
8539
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8540
        if not isinstance(nic_dict, dict):
8541
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8542
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8543

    
8544
      # nic_dict should be a dict
8545
      nic_ip = nic_dict.get('ip', None)
8546
      if nic_ip is not None:
8547
        if nic_ip.lower() == constants.VALUE_NONE:
8548
          nic_dict['ip'] = None
8549
        else:
8550
          if not utils.IsValidIP(nic_ip):
8551
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8552
                                       errors.ECODE_INVAL)
8553

    
8554
      nic_bridge = nic_dict.get('bridge', None)
8555
      nic_link = nic_dict.get('link', None)
8556
      if nic_bridge and nic_link:
8557
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8558
                                   " at the same time", errors.ECODE_INVAL)
8559
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8560
        nic_dict['bridge'] = None
8561
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8562
        nic_dict['link'] = None
8563

    
8564
      if nic_op == constants.DDM_ADD:
8565
        nic_mac = nic_dict.get('mac', None)
8566
        if nic_mac is None:
8567
          nic_dict['mac'] = constants.VALUE_AUTO
8568

    
8569
      if 'mac' in nic_dict:
8570
        nic_mac = nic_dict['mac']
8571
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8572
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8573

    
8574
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8575
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8576
                                     " modifying an existing nic",
8577
                                     errors.ECODE_INVAL)
8578

    
8579
    if nic_addremove > 1:
8580
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8581
                                 " supported at a time", errors.ECODE_INVAL)
8582

    
8583
  def ExpandNames(self):
8584
    self._ExpandAndLockInstance()
8585
    self.needed_locks[locking.LEVEL_NODE] = []
8586
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8587

    
8588
  def DeclareLocks(self, level):
8589
    if level == locking.LEVEL_NODE:
8590
      self._LockInstancesNodes()
8591
      if self.op.disk_template and self.op.remote_node:
8592
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8593
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8594

    
8595
  def BuildHooksEnv(self):
8596
    """Build hooks env.
8597

8598
    This runs on the master, primary and secondaries.
8599

8600
    """
8601
    args = dict()
8602
    if constants.BE_MEMORY in self.be_new:
8603
      args['memory'] = self.be_new[constants.BE_MEMORY]
8604
    if constants.BE_VCPUS in self.be_new:
8605
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8606
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8607
    # information at all.
8608
    if self.op.nics:
8609
      args['nics'] = []
8610
      nic_override = dict(self.op.nics)
8611
      for idx, nic in enumerate(self.instance.nics):
8612
        if idx in nic_override:
8613
          this_nic_override = nic_override[idx]
8614
        else:
8615
          this_nic_override = {}
8616
        if 'ip' in this_nic_override:
8617
          ip = this_nic_override['ip']
8618
        else:
8619
          ip = nic.ip
8620
        if 'mac' in this_nic_override:
8621
          mac = this_nic_override['mac']
8622
        else:
8623
          mac = nic.mac
8624
        if idx in self.nic_pnew:
8625
          nicparams = self.nic_pnew[idx]
8626
        else:
8627
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8628
        mode = nicparams[constants.NIC_MODE]
8629
        link = nicparams[constants.NIC_LINK]
8630
        args['nics'].append((ip, mac, mode, link))
8631
      if constants.DDM_ADD in nic_override:
8632
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8633
        mac = nic_override[constants.DDM_ADD]['mac']
8634
        nicparams = self.nic_pnew[constants.DDM_ADD]
8635
        mode = nicparams[constants.NIC_MODE]
8636
        link = nicparams[constants.NIC_LINK]
8637
        args['nics'].append((ip, mac, mode, link))
8638
      elif constants.DDM_REMOVE in nic_override:
8639
        del args['nics'][-1]
8640

    
8641
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8642
    if self.op.disk_template:
8643
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8644
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8645
    return env, nl, nl
8646

    
8647
  def CheckPrereq(self):
8648
    """Check prerequisites.
8649

8650
    This only checks the instance list against the existing names.
8651

8652
    """
8653
    self.force = self.op.force
8654

    
8655
    # checking the new params on the primary/secondary nodes
8656

    
8657
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8658
    cluster = self.cluster = self.cfg.GetClusterInfo()
8659
    assert self.instance is not None, \
8660
      "Cannot retrieve locked instance %s" % self.op.instance_name
8661
    pnode = instance.primary_node
8662
    nodelist = list(instance.all_nodes)
8663

    
8664
    # OS change
8665
    if self.op.os_name and not self.op.force:
8666
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8667
                      self.op.force_variant)
8668
      instance_os = self.op.os_name
8669
    else:
8670
      instance_os = instance.os
8671

    
8672
    if self.op.disk_template:
8673
      if instance.disk_template == self.op.disk_template:
8674
        raise errors.OpPrereqError("Instance already has disk template %s" %
8675
                                   instance.disk_template, errors.ECODE_INVAL)
8676

    
8677
      if (instance.disk_template,
8678
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8679
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8680
                                   " %s to %s" % (instance.disk_template,
8681
                                                  self.op.disk_template),
8682
                                   errors.ECODE_INVAL)
8683
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8684
        _CheckNodeOnline(self, self.op.remote_node)
8685
        _CheckNodeNotDrained(self, self.op.remote_node)
8686
        disks = [{"size": d.size} for d in instance.disks]
8687
        required = _ComputeDiskSize(self.op.disk_template, disks)
8688
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8689
        _CheckInstanceDown(self, instance, "cannot change disk template")
8690

    
8691
    # hvparams processing
8692
    if self.op.hvparams:
8693
      hv_type = instance.hypervisor
8694
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8695
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8696
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8697

    
8698
      # local check
8699
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8700
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8701
      self.hv_new = hv_new # the new actual values
8702
      self.hv_inst = i_hvdict # the new dict (without defaults)
8703
    else:
8704
      self.hv_new = self.hv_inst = {}
8705

    
8706
    # beparams processing
8707
    if self.op.beparams:
8708
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8709
                                   use_none=True)
8710
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8711
      be_new = cluster.SimpleFillBE(i_bedict)
8712
      self.be_new = be_new # the new actual values
8713
      self.be_inst = i_bedict # the new dict (without defaults)
8714
    else:
8715
      self.be_new = self.be_inst = {}
8716

    
8717
    # osparams processing
8718
    if self.op.osparams:
8719
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8720
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8721
      self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8722
      self.os_inst = i_osdict # the new dict (without defaults)
8723
    else:
8724
      self.os_new = self.os_inst = {}
8725

    
8726
    self.warn = []
8727

    
8728
    if constants.BE_MEMORY in self.op.beparams and not self.force:
8729
      mem_check_list = [pnode]
8730
      if be_new[constants.BE_AUTO_BALANCE]:
8731
        # either we changed auto_balance to yes or it was from before
8732
        mem_check_list.extend(instance.secondary_nodes)
8733
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8734
                                                  instance.hypervisor)
8735
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8736
                                         instance.hypervisor)
8737
      pninfo = nodeinfo[pnode]
8738
      msg = pninfo.fail_msg
8739
      if msg:
8740
        # Assume the primary node is unreachable and go ahead
8741
        self.warn.append("Can't get info from primary node %s: %s" %
8742
                         (pnode,  msg))
8743
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8744
        self.warn.append("Node data from primary node %s doesn't contain"
8745
                         " free memory information" % pnode)
8746
      elif instance_info.fail_msg:
8747
        self.warn.append("Can't get instance runtime information: %s" %
8748
                        instance_info.fail_msg)
8749
      else:
8750
        if instance_info.payload:
8751
          current_mem = int(instance_info.payload['memory'])
8752
        else:
8753
          # Assume instance not running
8754
          # (there is a slight race condition here, but it's not very probable,
8755
          # and we have no other way to check)
8756
          current_mem = 0
8757
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8758
                    pninfo.payload['memory_free'])
8759
        if miss_mem > 0:
8760
          raise errors.OpPrereqError("This change will prevent the instance"
8761
                                     " from starting, due to %d MB of memory"
8762
                                     " missing on its primary node" % miss_mem,
8763
                                     errors.ECODE_NORES)
8764

    
8765
      if be_new[constants.BE_AUTO_BALANCE]:
8766
        for node, nres in nodeinfo.items():
8767
          if node not in instance.secondary_nodes:
8768
            continue
8769
          msg = nres.fail_msg
8770
          if msg:
8771
            self.warn.append("Can't get info from secondary node %s: %s" %
8772
                             (node, msg))
8773
          elif not isinstance(nres.payload.get('memory_free', None), int):
8774
            self.warn.append("Secondary node %s didn't return free"
8775
                             " memory information" % node)
8776
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8777
            self.warn.append("Not enough memory to failover instance to"
8778
                             " secondary node %s" % node)
8779

    
8780
    # NIC processing
8781
    self.nic_pnew = {}
8782
    self.nic_pinst = {}
8783
    for nic_op, nic_dict in self.op.nics:
8784
      if nic_op == constants.DDM_REMOVE:
8785
        if not instance.nics:
8786
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8787
                                     errors.ECODE_INVAL)
8788
        continue
8789
      if nic_op != constants.DDM_ADD:
8790
        # an existing nic
8791
        if not instance.nics:
8792
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8793
                                     " no NICs" % nic_op,
8794
                                     errors.ECODE_INVAL)
8795
        if nic_op < 0 or nic_op >= len(instance.nics):
8796
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8797
                                     " are 0 to %d" %
8798
                                     (nic_op, len(instance.nics) - 1),
8799
                                     errors.ECODE_INVAL)
8800
        old_nic_params = instance.nics[nic_op].nicparams
8801
        old_nic_ip = instance.nics[nic_op].ip
8802
      else:
8803
        old_nic_params = {}
8804
        old_nic_ip = None
8805

    
8806
      update_params_dict = dict([(key, nic_dict[key])
8807
                                 for key in constants.NICS_PARAMETERS
8808
                                 if key in nic_dict])
8809

    
8810
      if 'bridge' in nic_dict:
8811
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8812

    
8813
      new_nic_params = _GetUpdatedParams(old_nic_params,
8814
                                         update_params_dict)
8815
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8816
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8817
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8818
      self.nic_pinst[nic_op] = new_nic_params
8819
      self.nic_pnew[nic_op] = new_filled_nic_params
8820
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8821

    
8822
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8823
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8824
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8825
        if msg:
8826
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8827
          if self.force:
8828
            self.warn.append(msg)
8829
          else:
8830
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8831
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8832
        if 'ip' in nic_dict:
8833
          nic_ip = nic_dict['ip']
8834
        else:
8835
          nic_ip = old_nic_ip
8836
        if nic_ip is None:
8837
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8838
                                     ' on a routed nic', errors.ECODE_INVAL)
8839
      if 'mac' in nic_dict:
8840
        nic_mac = nic_dict['mac']
8841
        if nic_mac is None:
8842
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8843
                                     errors.ECODE_INVAL)
8844
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8845
          # otherwise generate the mac
8846
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8847
        else:
8848
          # or validate/reserve the current one
8849
          try:
8850
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8851
          except errors.ReservationError:
8852
            raise errors.OpPrereqError("MAC address %s already in use"
8853
                                       " in cluster" % nic_mac,
8854
                                       errors.ECODE_NOTUNIQUE)
8855

    
8856
    # DISK processing
8857
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8858
      raise errors.OpPrereqError("Disk operations not supported for"
8859
                                 " diskless instances",
8860
                                 errors.ECODE_INVAL)
8861
    for disk_op, _ in self.op.disks:
8862
      if disk_op == constants.DDM_REMOVE:
8863
        if len(instance.disks) == 1:
8864
          raise errors.OpPrereqError("Cannot remove the last disk of"
8865
                                     " an instance", errors.ECODE_INVAL)
8866
        _CheckInstanceDown(self, instance, "cannot remove disks")
8867

    
8868
      if (disk_op == constants.DDM_ADD and
8869
          len(instance.nics) >= constants.MAX_DISKS):
8870
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8871
                                   " add more" % constants.MAX_DISKS,
8872
                                   errors.ECODE_STATE)
8873
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8874
        # an existing disk
8875
        if disk_op < 0 or disk_op >= len(instance.disks):
8876
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8877
                                     " are 0 to %d" %
8878
                                     (disk_op, len(instance.disks)),
8879
                                     errors.ECODE_INVAL)
8880

    
8881
    return
8882

    
8883
  def _ConvertPlainToDrbd(self, feedback_fn):
8884
    """Converts an instance from plain to drbd.
8885

8886
    """
8887
    feedback_fn("Converting template to drbd")
8888
    instance = self.instance
8889
    pnode = instance.primary_node
8890
    snode = self.op.remote_node
8891

    
8892
    # create a fake disk info for _GenerateDiskTemplate
8893
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8894
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8895
                                      instance.name, pnode, [snode],
8896
                                      disk_info, None, None, 0)
8897
    info = _GetInstanceInfoText(instance)
8898
    feedback_fn("Creating aditional volumes...")
8899
    # first, create the missing data and meta devices
8900
    for disk in new_disks:
8901
      # unfortunately this is... not too nice
8902
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8903
                            info, True)
8904
      for child in disk.children:
8905
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8906
    # at this stage, all new LVs have been created, we can rename the
8907
    # old ones
8908
    feedback_fn("Renaming original volumes...")
8909
    rename_list = [(o, n.children[0].logical_id)
8910
                   for (o, n) in zip(instance.disks, new_disks)]
8911
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8912
    result.Raise("Failed to rename original LVs")
8913

    
8914
    feedback_fn("Initializing DRBD devices...")
8915
    # all child devices are in place, we can now create the DRBD devices
8916
    for disk in new_disks:
8917
      for node in [pnode, snode]:
8918
        f_create = node == pnode
8919
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8920

    
8921
    # at this point, the instance has been modified
8922
    instance.disk_template = constants.DT_DRBD8
8923
    instance.disks = new_disks
8924
    self.cfg.Update(instance, feedback_fn)
8925

    
8926
    # disks are created, waiting for sync
8927
    disk_abort = not _WaitForSync(self, instance)
8928
    if disk_abort:
8929
      raise errors.OpExecError("There are some degraded disks for"
8930
                               " this instance, please cleanup manually")
8931

    
8932
  def _ConvertDrbdToPlain(self, feedback_fn):
8933
    """Converts an instance from drbd to plain.
8934

8935
    """
8936
    instance = self.instance
8937
    assert len(instance.secondary_nodes) == 1
8938
    pnode = instance.primary_node
8939
    snode = instance.secondary_nodes[0]
8940
    feedback_fn("Converting template to plain")
8941

    
8942
    old_disks = instance.disks
8943
    new_disks = [d.children[0] for d in old_disks]
8944

    
8945
    # copy over size and mode
8946
    for parent, child in zip(old_disks, new_disks):
8947
      child.size = parent.size
8948
      child.mode = parent.mode
8949

    
8950
    # update instance structure
8951
    instance.disks = new_disks
8952
    instance.disk_template = constants.DT_PLAIN
8953
    self.cfg.Update(instance, feedback_fn)
8954

    
8955
    feedback_fn("Removing volumes on the secondary node...")
8956
    for disk in old_disks:
8957
      self.cfg.SetDiskID(disk, snode)
8958
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8959
      if msg:
8960
        self.LogWarning("Could not remove block device %s on node %s,"
8961
                        " continuing anyway: %s", disk.iv_name, snode, msg)
8962

    
8963
    feedback_fn("Removing unneeded volumes on the primary node...")
8964
    for idx, disk in enumerate(old_disks):
8965
      meta = disk.children[1]
8966
      self.cfg.SetDiskID(meta, pnode)
8967
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8968
      if msg:
8969
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
8970
                        " continuing anyway: %s", idx, pnode, msg)
8971

    
8972

    
8973
  def Exec(self, feedback_fn):
8974
    """Modifies an instance.
8975

8976
    All parameters take effect only at the next restart of the instance.
8977

8978
    """
8979
    # Process here the warnings from CheckPrereq, as we don't have a
8980
    # feedback_fn there.
8981
    for warn in self.warn:
8982
      feedback_fn("WARNING: %s" % warn)
8983

    
8984
    result = []
8985
    instance = self.instance
8986
    # disk changes
8987
    for disk_op, disk_dict in self.op.disks:
8988
      if disk_op == constants.DDM_REMOVE:
8989
        # remove the last disk
8990
        device = instance.disks.pop()
8991
        device_idx = len(instance.disks)
8992
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8993
          self.cfg.SetDiskID(disk, node)
8994
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8995
          if msg:
8996
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8997
                            " continuing anyway", device_idx, node, msg)
8998
        result.append(("disk/%d" % device_idx, "remove"))
8999
      elif disk_op == constants.DDM_ADD:
9000
        # add a new disk
9001
        if instance.disk_template == constants.DT_FILE:
9002
          file_driver, file_path = instance.disks[0].logical_id
9003
          file_path = os.path.dirname(file_path)
9004
        else:
9005
          file_driver = file_path = None
9006
        disk_idx_base = len(instance.disks)
9007
        new_disk = _GenerateDiskTemplate(self,
9008
                                         instance.disk_template,
9009
                                         instance.name, instance.primary_node,
9010
                                         instance.secondary_nodes,
9011
                                         [disk_dict],
9012
                                         file_path,
9013
                                         file_driver,
9014
                                         disk_idx_base)[0]
9015
        instance.disks.append(new_disk)
9016
        info = _GetInstanceInfoText(instance)
9017

    
9018
        logging.info("Creating volume %s for instance %s",
9019
                     new_disk.iv_name, instance.name)
9020
        # Note: this needs to be kept in sync with _CreateDisks
9021
        #HARDCODE
9022
        for node in instance.all_nodes:
9023
          f_create = node == instance.primary_node
9024
          try:
9025
            _CreateBlockDev(self, node, instance, new_disk,
9026
                            f_create, info, f_create)
9027
          except errors.OpExecError, err:
9028
            self.LogWarning("Failed to create volume %s (%s) on"
9029
                            " node %s: %s",
9030
                            new_disk.iv_name, new_disk, node, err)
9031
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9032
                       (new_disk.size, new_disk.mode)))
9033
      else:
9034
        # change a given disk
9035
        instance.disks[disk_op].mode = disk_dict['mode']
9036
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9037

    
9038
    if self.op.disk_template:
9039
      r_shut = _ShutdownInstanceDisks(self, instance)
9040
      if not r_shut:
9041
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9042
                                 " proceed with disk template conversion")
9043
      mode = (instance.disk_template, self.op.disk_template)
9044
      try:
9045
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9046
      except:
9047
        self.cfg.ReleaseDRBDMinors(instance.name)
9048
        raise
9049
      result.append(("disk_template", self.op.disk_template))
9050

    
9051
    # NIC changes
9052
    for nic_op, nic_dict in self.op.nics:
9053
      if nic_op == constants.DDM_REMOVE:
9054
        # remove the last nic
9055
        del instance.nics[-1]
9056
        result.append(("nic.%d" % len(instance.nics), "remove"))
9057
      elif nic_op == constants.DDM_ADD:
9058
        # mac and bridge should be set, by now
9059
        mac = nic_dict['mac']
9060
        ip = nic_dict.get('ip', None)
9061
        nicparams = self.nic_pinst[constants.DDM_ADD]
9062
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9063
        instance.nics.append(new_nic)
9064
        result.append(("nic.%d" % (len(instance.nics) - 1),
9065
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9066
                       (new_nic.mac, new_nic.ip,
9067
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9068
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9069
                       )))
9070
      else:
9071
        for key in 'mac', 'ip':
9072
          if key in nic_dict:
9073
            setattr(instance.nics[nic_op], key, nic_dict[key])
9074
        if nic_op in self.nic_pinst:
9075
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9076
        for key, val in nic_dict.iteritems():
9077
          result.append(("nic.%s/%d" % (key, nic_op), val))
9078

    
9079
    # hvparams changes
9080
    if self.op.hvparams:
9081
      instance.hvparams = self.hv_inst
9082
      for key, val in self.op.hvparams.iteritems():
9083
        result.append(("hv/%s" % key, val))
9084

    
9085
    # beparams changes
9086
    if self.op.beparams:
9087
      instance.beparams = self.be_inst
9088
      for key, val in self.op.beparams.iteritems():
9089
        result.append(("be/%s" % key, val))
9090

    
9091
    # OS change
9092
    if self.op.os_name:
9093
      instance.os = self.op.os_name
9094

    
9095
    # osparams changes
9096
    if self.op.osparams:
9097
      instance.osparams = self.os_inst
9098
      for key, val in self.op.osparams.iteritems():
9099
        result.append(("os/%s" % key, val))
9100

    
9101
    self.cfg.Update(instance, feedback_fn)
9102

    
9103
    return result
9104

    
9105
  _DISK_CONVERSIONS = {
9106
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9107
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9108
    }
9109

    
9110

    
9111
class LUQueryExports(NoHooksLU):
9112
  """Query the exports list
9113

9114
  """
9115
  _OP_REQP = ['nodes']
9116
  REQ_BGL = False
9117

    
9118
  def ExpandNames(self):
9119
    self.needed_locks = {}
9120
    self.share_locks[locking.LEVEL_NODE] = 1
9121
    if not self.op.nodes:
9122
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9123
    else:
9124
      self.needed_locks[locking.LEVEL_NODE] = \
9125
        _GetWantedNodes(self, self.op.nodes)
9126

    
9127
  def CheckPrereq(self):
9128
    """Check prerequisites.
9129

9130
    """
9131
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9132

    
9133
  def Exec(self, feedback_fn):
9134
    """Compute the list of all the exported system images.
9135

9136
    @rtype: dict
9137
    @return: a dictionary with the structure node->(export-list)
9138
        where export-list is a list of the instances exported on
9139
        that node.
9140

9141
    """
9142
    rpcresult = self.rpc.call_export_list(self.nodes)
9143
    result = {}
9144
    for node in rpcresult:
9145
      if rpcresult[node].fail_msg:
9146
        result[node] = False
9147
      else:
9148
        result[node] = rpcresult[node].payload
9149

    
9150
    return result
9151

    
9152

    
9153
class LUPrepareExport(NoHooksLU):
9154
  """Prepares an instance for an export and returns useful information.
9155

9156
  """
9157
  _OP_REQP = ["instance_name", "mode"]
9158
  REQ_BGL = False
9159

    
9160
  def CheckArguments(self):
9161
    """Check the arguments.
9162

9163
    """
9164
    if self.op.mode not in constants.EXPORT_MODES:
9165
      raise errors.OpPrereqError("Invalid export mode %r" % self.op.mode,
9166
                                 errors.ECODE_INVAL)
9167

    
9168
  def ExpandNames(self):
9169
    self._ExpandAndLockInstance()
9170

    
9171
  def CheckPrereq(self):
9172
    """Check prerequisites.
9173

9174
    """
9175
    instance_name = self.op.instance_name
9176

    
9177
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9178
    assert self.instance is not None, \
9179
          "Cannot retrieve locked instance %s" % self.op.instance_name
9180
    _CheckNodeOnline(self, self.instance.primary_node)
9181

    
9182
    self._cds = _GetClusterDomainSecret()
9183

    
9184
  def Exec(self, feedback_fn):
9185
    """Prepares an instance for an export.
9186

9187
    """
9188
    instance = self.instance
9189

    
9190
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9191
      salt = utils.GenerateSecret(8)
9192

    
9193
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9194
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9195
                                              constants.RIE_CERT_VALIDITY)
9196
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9197

    
9198
      (name, cert_pem) = result.payload
9199

    
9200
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9201
                                             cert_pem)
9202

    
9203
      return {
9204
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9205
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9206
                          salt),
9207
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9208
        }
9209

    
9210
    return None
9211

    
9212

    
9213
class LUExportInstance(LogicalUnit):
9214
  """Export an instance to an image in the cluster.
9215

9216
  """
9217
  HPATH = "instance-export"
9218
  HTYPE = constants.HTYPE_INSTANCE
9219
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
9220
  REQ_BGL = False
9221

    
9222
  def CheckArguments(self):
9223
    """Check the arguments.
9224

9225
    """
9226
    _CheckBooleanOpField(self.op, "remove_instance")
9227
    _CheckBooleanOpField(self.op, "ignore_remove_failures")
9228

    
9229
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
9230
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
9231
    self.remove_instance = getattr(self.op, "remove_instance", False)
9232
    self.ignore_remove_failures = getattr(self.op, "ignore_remove_failures",
9233
                                          False)
9234
    self.export_mode = getattr(self.op, "mode", constants.EXPORT_MODE_LOCAL)
9235
    self.x509_key_name = getattr(self.op, "x509_key_name", None)
9236
    self.dest_x509_ca_pem = getattr(self.op, "destination_x509_ca", None)
9237

    
9238
    if self.remove_instance and not self.op.shutdown:
9239
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9240
                                 " down before")
9241

    
9242
    if self.export_mode not in constants.EXPORT_MODES:
9243
      raise errors.OpPrereqError("Invalid export mode %r" % self.export_mode,
9244
                                 errors.ECODE_INVAL)
9245

    
9246
    if self.export_mode == constants.EXPORT_MODE_REMOTE:
9247
      if not self.x509_key_name:
9248
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9249
                                   errors.ECODE_INVAL)
9250

    
9251
      if not self.dest_x509_ca_pem:
9252
        raise errors.OpPrereqError("Missing destination X509 CA",
9253
                                   errors.ECODE_INVAL)
9254

    
9255
  def ExpandNames(self):
9256
    self._ExpandAndLockInstance()
9257

    
9258
    # Lock all nodes for local exports
9259
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9260
      # FIXME: lock only instance primary and destination node
9261
      #
9262
      # Sad but true, for now we have do lock all nodes, as we don't know where
9263
      # the previous export might be, and in this LU we search for it and
9264
      # remove it from its current node. In the future we could fix this by:
9265
      #  - making a tasklet to search (share-lock all), then create the new one,
9266
      #    then one to remove, after
9267
      #  - removing the removal operation altogether
9268
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9269

    
9270
  def DeclareLocks(self, level):
9271
    """Last minute lock declaration."""
9272
    # All nodes are locked anyway, so nothing to do here.
9273

    
9274
  def BuildHooksEnv(self):
9275
    """Build hooks env.
9276

9277
    This will run on the master, primary node and target node.
9278

9279
    """
9280
    env = {
9281
      "EXPORT_MODE": self.export_mode,
9282
      "EXPORT_NODE": self.op.target_node,
9283
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9284
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
9285
      # TODO: Generic function for boolean env variables
9286
      "REMOVE_INSTANCE": str(bool(self.remove_instance)),
9287
      }
9288

    
9289
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9290

    
9291
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9292

    
9293
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9294
      nl.append(self.op.target_node)
9295

    
9296
    return env, nl, nl
9297

    
9298
  def CheckPrereq(self):
9299
    """Check prerequisites.
9300

9301
    This checks that the instance and node names are valid.
9302

9303
    """
9304
    instance_name = self.op.instance_name
9305

    
9306
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9307
    assert self.instance is not None, \
9308
          "Cannot retrieve locked instance %s" % self.op.instance_name
9309
    _CheckNodeOnline(self, self.instance.primary_node)
9310

    
9311
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9312
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9313
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9314
      assert self.dst_node is not None
9315

    
9316
      _CheckNodeOnline(self, self.dst_node.name)
9317
      _CheckNodeNotDrained(self, self.dst_node.name)
9318

    
9319
      self._cds = None
9320
      self.dest_disk_info = None
9321
      self.dest_x509_ca = None
9322

    
9323
    elif self.export_mode == constants.EXPORT_MODE_REMOTE:
9324
      self.dst_node = None
9325

    
9326
      if len(self.op.target_node) != len(self.instance.disks):
9327
        raise errors.OpPrereqError(("Received destination information for %s"
9328
                                    " disks, but instance %s has %s disks") %
9329
                                   (len(self.op.target_node), instance_name,
9330
                                    len(self.instance.disks)),
9331
                                   errors.ECODE_INVAL)
9332

    
9333
      cds = _GetClusterDomainSecret()
9334

    
9335
      # Check X509 key name
9336
      try:
9337
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9338
      except (TypeError, ValueError), err:
9339
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9340

    
9341
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9342
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9343
                                   errors.ECODE_INVAL)
9344

    
9345
      # Load and verify CA
9346
      try:
9347
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9348
      except OpenSSL.crypto.Error, err:
9349
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9350
                                   (err, ), errors.ECODE_INVAL)
9351

    
9352
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9353
      if errcode is not None:
9354
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" % (msg, ),
9355
                                   errors.ECODE_INVAL)
9356

    
9357
      self.dest_x509_ca = cert
9358

    
9359
      # Verify target information
9360
      disk_info = []
9361
      for idx, disk_data in enumerate(self.op.target_node):
9362
        try:
9363
          (host, port, magic) = \
9364
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9365
        except errors.GenericError, err:
9366
          raise errors.OpPrereqError("Target info for disk %s: %s" % (idx, err),
9367
                                     errors.ECODE_INVAL)
9368

    
9369
        disk_info.append((host, port, magic))
9370

    
9371
      assert len(disk_info) == len(self.op.target_node)
9372
      self.dest_disk_info = disk_info
9373

    
9374
    else:
9375
      raise errors.ProgrammerError("Unhandled export mode %r" %
9376
                                   self.export_mode)
9377

    
9378
    # instance disk type verification
9379
    # TODO: Implement export support for file-based disks
9380
    for disk in self.instance.disks:
9381
      if disk.dev_type == constants.LD_FILE:
9382
        raise errors.OpPrereqError("Export not supported for instances with"
9383
                                   " file-based disks", errors.ECODE_INVAL)
9384

    
9385
  def _CleanupExports(self, feedback_fn):
9386
    """Removes exports of current instance from all other nodes.
9387

9388
    If an instance in a cluster with nodes A..D was exported to node C, its
9389
    exports will be removed from the nodes A, B and D.
9390

9391
    """
9392
    assert self.export_mode != constants.EXPORT_MODE_REMOTE
9393

    
9394
    nodelist = self.cfg.GetNodeList()
9395
    nodelist.remove(self.dst_node.name)
9396

    
9397
    # on one-node clusters nodelist will be empty after the removal
9398
    # if we proceed the backup would be removed because OpQueryExports
9399
    # substitutes an empty list with the full cluster node list.
9400
    iname = self.instance.name
9401
    if nodelist:
9402
      feedback_fn("Removing old exports for instance %s" % iname)
9403
      exportlist = self.rpc.call_export_list(nodelist)
9404
      for node in exportlist:
9405
        if exportlist[node].fail_msg:
9406
          continue
9407
        if iname in exportlist[node].payload:
9408
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9409
          if msg:
9410
            self.LogWarning("Could not remove older export for instance %s"
9411
                            " on node %s: %s", iname, node, msg)
9412

    
9413
  def Exec(self, feedback_fn):
9414
    """Export an instance to an image in the cluster.
9415

9416
    """
9417
    assert self.export_mode in constants.EXPORT_MODES
9418

    
9419
    instance = self.instance
9420
    src_node = instance.primary_node
9421

    
9422
    if self.op.shutdown:
9423
      # shutdown the instance, but not the disks
9424
      feedback_fn("Shutting down instance %s" % instance.name)
9425
      result = self.rpc.call_instance_shutdown(src_node, instance,
9426
                                               self.shutdown_timeout)
9427
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9428
      result.Raise("Could not shutdown instance %s on"
9429
                   " node %s" % (instance.name, src_node))
9430

    
9431
    # set the disks ID correctly since call_instance_start needs the
9432
    # correct drbd minor to create the symlinks
9433
    for disk in instance.disks:
9434
      self.cfg.SetDiskID(disk, src_node)
9435

    
9436
    activate_disks = (not instance.admin_up)
9437

    
9438
    if activate_disks:
9439
      # Activate the instance disks if we'exporting a stopped instance
9440
      feedback_fn("Activating disks for %s" % instance.name)
9441
      _StartInstanceDisks(self, instance, None)
9442

    
9443
    try:
9444
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9445
                                                     instance)
9446

    
9447
      helper.CreateSnapshots()
9448
      try:
9449
        if (self.op.shutdown and instance.admin_up and
9450
            not self.remove_instance):
9451
          assert not activate_disks
9452
          feedback_fn("Starting instance %s" % instance.name)
9453
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9454
          msg = result.fail_msg
9455
          if msg:
9456
            feedback_fn("Failed to start instance: %s" % msg)
9457
            _ShutdownInstanceDisks(self, instance)
9458
            raise errors.OpExecError("Could not start instance: %s" % msg)
9459

    
9460
        if self.export_mode == constants.EXPORT_MODE_LOCAL:
9461
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9462
        elif self.export_mode == constants.EXPORT_MODE_REMOTE:
9463
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9464
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9465

    
9466
          (key_name, _, _) = self.x509_key_name
9467

    
9468
          dest_ca_pem = \
9469
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9470
                                            self.dest_x509_ca)
9471

    
9472
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9473
                                                     key_name, dest_ca_pem,
9474
                                                     timeouts)
9475
      finally:
9476
        helper.Cleanup()
9477

    
9478
      # Check for backwards compatibility
9479
      assert len(dresults) == len(instance.disks)
9480
      assert compat.all(isinstance(i, bool) for i in dresults), \
9481
             "Not all results are boolean: %r" % dresults
9482

    
9483
    finally:
9484
      if activate_disks:
9485
        feedback_fn("Deactivating disks for %s" % instance.name)
9486
        _ShutdownInstanceDisks(self, instance)
9487

    
9488
    # Remove instance if requested
9489
    if self.remove_instance:
9490
      if not (compat.all(dresults) and fin_resu):
9491
        feedback_fn("Not removing instance %s as parts of the export failed" %
9492
                    instance.name)
9493
      else:
9494
        feedback_fn("Removing instance %s" % instance.name)
9495
        _RemoveInstance(self, feedback_fn, instance,
9496
                        self.ignore_remove_failures)
9497

    
9498
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9499
      self._CleanupExports(feedback_fn)
9500

    
9501
    return fin_resu, dresults
9502

    
9503

    
9504
class LURemoveExport(NoHooksLU):
9505
  """Remove exports related to the named instance.
9506

9507
  """
9508
  _OP_REQP = ["instance_name"]
9509
  REQ_BGL = False
9510

    
9511
  def ExpandNames(self):
9512
    self.needed_locks = {}
9513
    # We need all nodes to be locked in order for RemoveExport to work, but we
9514
    # don't need to lock the instance itself, as nothing will happen to it (and
9515
    # we can remove exports also for a removed instance)
9516
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9517

    
9518
  def CheckPrereq(self):
9519
    """Check prerequisites.
9520
    """
9521
    pass
9522

    
9523
  def Exec(self, feedback_fn):
9524
    """Remove any export.
9525

9526
    """
9527
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9528
    # If the instance was not found we'll try with the name that was passed in.
9529
    # This will only work if it was an FQDN, though.
9530
    fqdn_warn = False
9531
    if not instance_name:
9532
      fqdn_warn = True
9533
      instance_name = self.op.instance_name
9534

    
9535
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9536
    exportlist = self.rpc.call_export_list(locked_nodes)
9537
    found = False
9538
    for node in exportlist:
9539
      msg = exportlist[node].fail_msg
9540
      if msg:
9541
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9542
        continue
9543
      if instance_name in exportlist[node].payload:
9544
        found = True
9545
        result = self.rpc.call_export_remove(node, instance_name)
9546
        msg = result.fail_msg
9547
        if msg:
9548
          logging.error("Could not remove export for instance %s"
9549
                        " on node %s: %s", instance_name, node, msg)
9550

    
9551
    if fqdn_warn and not found:
9552
      feedback_fn("Export not found. If trying to remove an export belonging"
9553
                  " to a deleted instance please use its Fully Qualified"
9554
                  " Domain Name.")
9555

    
9556

    
9557
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9558
  """Generic tags LU.
9559

9560
  This is an abstract class which is the parent of all the other tags LUs.
9561

9562
  """
9563

    
9564
  def ExpandNames(self):
9565
    self.needed_locks = {}
9566
    if self.op.kind == constants.TAG_NODE:
9567
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9568
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9569
    elif self.op.kind == constants.TAG_INSTANCE:
9570
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9571
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9572

    
9573
  def CheckPrereq(self):
9574
    """Check prerequisites.
9575

9576
    """
9577
    if self.op.kind == constants.TAG_CLUSTER:
9578
      self.target = self.cfg.GetClusterInfo()
9579
    elif self.op.kind == constants.TAG_NODE:
9580
      self.target = self.cfg.GetNodeInfo(self.op.name)
9581
    elif self.op.kind == constants.TAG_INSTANCE:
9582
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9583
    else:
9584
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9585
                                 str(self.op.kind), errors.ECODE_INVAL)
9586

    
9587

    
9588
class LUGetTags(TagsLU):
9589
  """Returns the tags of a given object.
9590

9591
  """
9592
  _OP_REQP = ["kind", "name"]
9593
  REQ_BGL = False
9594

    
9595
  def Exec(self, feedback_fn):
9596
    """Returns the tag list.
9597

9598
    """
9599
    return list(self.target.GetTags())
9600

    
9601

    
9602
class LUSearchTags(NoHooksLU):
9603
  """Searches the tags for a given pattern.
9604

9605
  """
9606
  _OP_REQP = ["pattern"]
9607
  REQ_BGL = False
9608

    
9609
  def ExpandNames(self):
9610
    self.needed_locks = {}
9611

    
9612
  def CheckPrereq(self):
9613
    """Check prerequisites.
9614

9615
    This checks the pattern passed for validity by compiling it.
9616

9617
    """
9618
    try:
9619
      self.re = re.compile(self.op.pattern)
9620
    except re.error, err:
9621
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9622
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9623

    
9624
  def Exec(self, feedback_fn):
9625
    """Returns the tag list.
9626

9627
    """
9628
    cfg = self.cfg
9629
    tgts = [("/cluster", cfg.GetClusterInfo())]
9630
    ilist = cfg.GetAllInstancesInfo().values()
9631
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9632
    nlist = cfg.GetAllNodesInfo().values()
9633
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9634
    results = []
9635
    for path, target in tgts:
9636
      for tag in target.GetTags():
9637
        if self.re.search(tag):
9638
          results.append((path, tag))
9639
    return results
9640

    
9641

    
9642
class LUAddTags(TagsLU):
9643
  """Sets a tag on a given object.
9644

9645
  """
9646
  _OP_REQP = ["kind", "name", "tags"]
9647
  REQ_BGL = False
9648

    
9649
  def CheckPrereq(self):
9650
    """Check prerequisites.
9651

9652
    This checks the type and length of the tag name and value.
9653

9654
    """
9655
    TagsLU.CheckPrereq(self)
9656
    for tag in self.op.tags:
9657
      objects.TaggableObject.ValidateTag(tag)
9658

    
9659
  def Exec(self, feedback_fn):
9660
    """Sets the tag.
9661

9662
    """
9663
    try:
9664
      for tag in self.op.tags:
9665
        self.target.AddTag(tag)
9666
    except errors.TagError, err:
9667
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9668
    self.cfg.Update(self.target, feedback_fn)
9669

    
9670

    
9671
class LUDelTags(TagsLU):
9672
  """Delete a list of tags from a given object.
9673

9674
  """
9675
  _OP_REQP = ["kind", "name", "tags"]
9676
  REQ_BGL = False
9677

    
9678
  def CheckPrereq(self):
9679
    """Check prerequisites.
9680

9681
    This checks that we have the given tag.
9682

9683
    """
9684
    TagsLU.CheckPrereq(self)
9685
    for tag in self.op.tags:
9686
      objects.TaggableObject.ValidateTag(tag)
9687
    del_tags = frozenset(self.op.tags)
9688
    cur_tags = self.target.GetTags()
9689
    if not del_tags <= cur_tags:
9690
      diff_tags = del_tags - cur_tags
9691
      diff_names = ["'%s'" % tag for tag in diff_tags]
9692
      diff_names.sort()
9693
      raise errors.OpPrereqError("Tag(s) %s not found" %
9694
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9695

    
9696
  def Exec(self, feedback_fn):
9697
    """Remove the tag from the object.
9698

9699
    """
9700
    for tag in self.op.tags:
9701
      self.target.RemoveTag(tag)
9702
    self.cfg.Update(self.target, feedback_fn)
9703

    
9704

    
9705
class LUTestDelay(NoHooksLU):
9706
  """Sleep for a specified amount of time.
9707

9708
  This LU sleeps on the master and/or nodes for a specified amount of
9709
  time.
9710

9711
  """
9712
  _OP_REQP = ["duration", "on_master", "on_nodes"]
9713
  REQ_BGL = False
9714

    
9715
  def CheckArguments(self):
9716
    # TODO: convert to the type system
9717
    self.op.repeat = getattr(self.op, "repeat", 0)
9718
    if self.op.repeat < 0:
9719
      raise errors.OpPrereqError("Repetition count cannot be negative")
9720

    
9721
  def ExpandNames(self):
9722
    """Expand names and set required locks.
9723

9724
    This expands the node list, if any.
9725

9726
    """
9727
    self.needed_locks = {}
9728
    if self.op.on_nodes:
9729
      # _GetWantedNodes can be used here, but is not always appropriate to use
9730
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9731
      # more information.
9732
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9733
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9734

    
9735
  def CheckPrereq(self):
9736
    """Check prerequisites.
9737

9738
    """
9739

    
9740
  def _TestDelay(self):
9741
    """Do the actual sleep.
9742

9743
    """
9744
    if self.op.on_master:
9745
      if not utils.TestDelay(self.op.duration):
9746
        raise errors.OpExecError("Error during master delay test")
9747
    if self.op.on_nodes:
9748
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9749
      for node, node_result in result.items():
9750
        node_result.Raise("Failure during rpc call to node %s" % node)
9751

    
9752
  def Exec(self, feedback_fn):
9753
    """Execute the test delay opcode, with the wanted repetitions.
9754

9755
    """
9756
    if self.op.repeat == 0:
9757
      self._TestDelay()
9758
    else:
9759
      top_value = self.op.repeat - 1
9760
      for i in range(self.op.repeat):
9761
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9762
        self._TestDelay()
9763

    
9764

    
9765
class IAllocator(object):
9766
  """IAllocator framework.
9767

9768
  An IAllocator instance has three sets of attributes:
9769
    - cfg that is needed to query the cluster
9770
    - input data (all members of the _KEYS class attribute are required)
9771
    - four buffer attributes (in|out_data|text), that represent the
9772
      input (to the external script) in text and data structure format,
9773
      and the output from it, again in two formats
9774
    - the result variables from the script (success, info, nodes) for
9775
      easy usage
9776

9777
  """
9778
  # pylint: disable-msg=R0902
9779
  # lots of instance attributes
9780
  _ALLO_KEYS = [
9781
    "name", "mem_size", "disks", "disk_template",
9782
    "os", "tags", "nics", "vcpus", "hypervisor",
9783
    ]
9784
  _RELO_KEYS = [
9785
    "name", "relocate_from",
9786
    ]
9787
  _EVAC_KEYS = [
9788
    "evac_nodes",
9789
    ]
9790

    
9791
  def __init__(self, cfg, rpc, mode, **kwargs):
9792
    self.cfg = cfg
9793
    self.rpc = rpc
9794
    # init buffer variables
9795
    self.in_text = self.out_text = self.in_data = self.out_data = None
9796
    # init all input fields so that pylint is happy
9797
    self.mode = mode
9798
    self.mem_size = self.disks = self.disk_template = None
9799
    self.os = self.tags = self.nics = self.vcpus = None
9800
    self.hypervisor = None
9801
    self.relocate_from = None
9802
    self.name = None
9803
    self.evac_nodes = None
9804
    # computed fields
9805
    self.required_nodes = None
9806
    # init result fields
9807
    self.success = self.info = self.result = None
9808
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9809
      keyset = self._ALLO_KEYS
9810
      fn = self._AddNewInstance
9811
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9812
      keyset = self._RELO_KEYS
9813
      fn = self._AddRelocateInstance
9814
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9815
      keyset = self._EVAC_KEYS
9816
      fn = self._AddEvacuateNodes
9817
    else:
9818
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9819
                                   " IAllocator" % self.mode)
9820
    for key in kwargs:
9821
      if key not in keyset:
9822
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9823
                                     " IAllocator" % key)
9824
      setattr(self, key, kwargs[key])
9825

    
9826
    for key in keyset:
9827
      if key not in kwargs:
9828
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9829
                                     " IAllocator" % key)
9830
    self._BuildInputData(fn)
9831

    
9832
  def _ComputeClusterData(self):
9833
    """Compute the generic allocator input data.
9834

9835
    This is the data that is independent of the actual operation.
9836

9837
    """
9838
    cfg = self.cfg
9839
    cluster_info = cfg.GetClusterInfo()
9840
    # cluster data
9841
    data = {
9842
      "version": constants.IALLOCATOR_VERSION,
9843
      "cluster_name": cfg.GetClusterName(),
9844
      "cluster_tags": list(cluster_info.GetTags()),
9845
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9846
      # we don't have job IDs
9847
      }
9848
    iinfo = cfg.GetAllInstancesInfo().values()
9849
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9850

    
9851
    # node data
9852
    node_results = {}
9853
    node_list = cfg.GetNodeList()
9854

    
9855
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9856
      hypervisor_name = self.hypervisor
9857
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9858
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9859
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9860
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9861

    
9862
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9863
                                        hypervisor_name)
9864
    node_iinfo = \
9865
      self.rpc.call_all_instances_info(node_list,
9866
                                       cluster_info.enabled_hypervisors)
9867
    for nname, nresult in node_data.items():
9868
      # first fill in static (config-based) values
9869
      ninfo = cfg.GetNodeInfo(nname)
9870
      pnr = {
9871
        "tags": list(ninfo.GetTags()),
9872
        "primary_ip": ninfo.primary_ip,
9873
        "secondary_ip": ninfo.secondary_ip,
9874
        "offline": ninfo.offline,
9875
        "drained": ninfo.drained,
9876
        "master_candidate": ninfo.master_candidate,
9877
        }
9878

    
9879
      if not (ninfo.offline or ninfo.drained):
9880
        nresult.Raise("Can't get data for node %s" % nname)
9881
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9882
                                nname)
9883
        remote_info = nresult.payload
9884

    
9885
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9886
                     'vg_size', 'vg_free', 'cpu_total']:
9887
          if attr not in remote_info:
9888
            raise errors.OpExecError("Node '%s' didn't return attribute"
9889
                                     " '%s'" % (nname, attr))
9890
          if not isinstance(remote_info[attr], int):
9891
            raise errors.OpExecError("Node '%s' returned invalid value"
9892
                                     " for '%s': %s" %
9893
                                     (nname, attr, remote_info[attr]))
9894
        # compute memory used by primary instances
9895
        i_p_mem = i_p_up_mem = 0
9896
        for iinfo, beinfo in i_list:
9897
          if iinfo.primary_node == nname:
9898
            i_p_mem += beinfo[constants.BE_MEMORY]
9899
            if iinfo.name not in node_iinfo[nname].payload:
9900
              i_used_mem = 0
9901
            else:
9902
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9903
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9904
            remote_info['memory_free'] -= max(0, i_mem_diff)
9905

    
9906
            if iinfo.admin_up:
9907
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9908

    
9909
        # compute memory used by instances
9910
        pnr_dyn = {
9911
          "total_memory": remote_info['memory_total'],
9912
          "reserved_memory": remote_info['memory_dom0'],
9913
          "free_memory": remote_info['memory_free'],
9914
          "total_disk": remote_info['vg_size'],
9915
          "free_disk": remote_info['vg_free'],
9916
          "total_cpus": remote_info['cpu_total'],
9917
          "i_pri_memory": i_p_mem,
9918
          "i_pri_up_memory": i_p_up_mem,
9919
          }
9920
        pnr.update(pnr_dyn)
9921

    
9922
      node_results[nname] = pnr
9923
    data["nodes"] = node_results
9924

    
9925
    # instance data
9926
    instance_data = {}
9927
    for iinfo, beinfo in i_list:
9928
      nic_data = []
9929
      for nic in iinfo.nics:
9930
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9931
        nic_dict = {"mac": nic.mac,
9932
                    "ip": nic.ip,
9933
                    "mode": filled_params[constants.NIC_MODE],
9934
                    "link": filled_params[constants.NIC_LINK],
9935
                   }
9936
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9937
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9938
        nic_data.append(nic_dict)
9939
      pir = {
9940
        "tags": list(iinfo.GetTags()),
9941
        "admin_up": iinfo.admin_up,
9942
        "vcpus": beinfo[constants.BE_VCPUS],
9943
        "memory": beinfo[constants.BE_MEMORY],
9944
        "os": iinfo.os,
9945
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9946
        "nics": nic_data,
9947
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9948
        "disk_template": iinfo.disk_template,
9949
        "hypervisor": iinfo.hypervisor,
9950
        }
9951
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9952
                                                 pir["disks"])
9953
      instance_data[iinfo.name] = pir
9954

    
9955
    data["instances"] = instance_data
9956

    
9957
    self.in_data = data
9958

    
9959
  def _AddNewInstance(self):
9960
    """Add new instance data to allocator structure.
9961

9962
    This in combination with _AllocatorGetClusterData will create the
9963
    correct structure needed as input for the allocator.
9964

9965
    The checks for the completeness of the opcode must have already been
9966
    done.
9967

9968
    """
9969
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9970

    
9971
    if self.disk_template in constants.DTS_NET_MIRROR:
9972
      self.required_nodes = 2
9973
    else:
9974
      self.required_nodes = 1
9975
    request = {
9976
      "name": self.name,
9977
      "disk_template": self.disk_template,
9978
      "tags": self.tags,
9979
      "os": self.os,
9980
      "vcpus": self.vcpus,
9981
      "memory": self.mem_size,
9982
      "disks": self.disks,
9983
      "disk_space_total": disk_space,
9984
      "nics": self.nics,
9985
      "required_nodes": self.required_nodes,
9986
      }
9987
    return request
9988

    
9989
  def _AddRelocateInstance(self):
9990
    """Add relocate instance data to allocator structure.
9991

9992
    This in combination with _IAllocatorGetClusterData will create the
9993
    correct structure needed as input for the allocator.
9994

9995
    The checks for the completeness of the opcode must have already been
9996
    done.
9997

9998
    """
9999
    instance = self.cfg.GetInstanceInfo(self.name)
10000
    if instance is None:
10001
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
10002
                                   " IAllocator" % self.name)
10003

    
10004
    if instance.disk_template not in constants.DTS_NET_MIRROR:
10005
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10006
                                 errors.ECODE_INVAL)
10007

    
10008
    if len(instance.secondary_nodes) != 1:
10009
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
10010
                                 errors.ECODE_STATE)
10011

    
10012
    self.required_nodes = 1
10013
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
10014
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10015

    
10016
    request = {
10017
      "name": self.name,
10018
      "disk_space_total": disk_space,
10019
      "required_nodes": self.required_nodes,
10020
      "relocate_from": self.relocate_from,
10021
      }
10022
    return request
10023

    
10024
  def _AddEvacuateNodes(self):
10025
    """Add evacuate nodes data to allocator structure.
10026

10027
    """
10028
    request = {
10029
      "evac_nodes": self.evac_nodes
10030
      }
10031
    return request
10032

    
10033
  def _BuildInputData(self, fn):
10034
    """Build input data structures.
10035

10036
    """
10037
    self._ComputeClusterData()
10038

    
10039
    request = fn()
10040
    request["type"] = self.mode
10041
    self.in_data["request"] = request
10042

    
10043
    self.in_text = serializer.Dump(self.in_data)
10044

    
10045
  def Run(self, name, validate=True, call_fn=None):
10046
    """Run an instance allocator and return the results.
10047

10048
    """
10049
    if call_fn is None:
10050
      call_fn = self.rpc.call_iallocator_runner
10051

    
10052
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10053
    result.Raise("Failure while running the iallocator script")
10054

    
10055
    self.out_text = result.payload
10056
    if validate:
10057
      self._ValidateResult()
10058

    
10059
  def _ValidateResult(self):
10060
    """Process the allocator results.
10061

10062
    This will process and if successful save the result in
10063
    self.out_data and the other parameters.
10064

10065
    """
10066
    try:
10067
      rdict = serializer.Load(self.out_text)
10068
    except Exception, err:
10069
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10070

    
10071
    if not isinstance(rdict, dict):
10072
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10073

    
10074
    # TODO: remove backwards compatiblity in later versions
10075
    if "nodes" in rdict and "result" not in rdict:
10076
      rdict["result"] = rdict["nodes"]
10077
      del rdict["nodes"]
10078

    
10079
    for key in "success", "info", "result":
10080
      if key not in rdict:
10081
        raise errors.OpExecError("Can't parse iallocator results:"
10082
                                 " missing key '%s'" % key)
10083
      setattr(self, key, rdict[key])
10084

    
10085
    if not isinstance(rdict["result"], list):
10086
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10087
                               " is not a list")
10088
    self.out_data = rdict
10089

    
10090

    
10091
class LUTestAllocator(NoHooksLU):
10092
  """Run allocator tests.
10093

10094
  This LU runs the allocator tests
10095

10096
  """
10097
  _OP_REQP = ["direction", "mode", "name"]
10098

    
10099
  def CheckPrereq(self):
10100
    """Check prerequisites.
10101

10102
    This checks the opcode parameters depending on the director and mode test.
10103

10104
    """
10105
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10106
      for attr in ["name", "mem_size", "disks", "disk_template",
10107
                   "os", "tags", "nics", "vcpus"]:
10108
        if not hasattr(self.op, attr):
10109
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10110
                                     attr, errors.ECODE_INVAL)
10111
      iname = self.cfg.ExpandInstanceName(self.op.name)
10112
      if iname is not None:
10113
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10114
                                   iname, errors.ECODE_EXISTS)
10115
      if not isinstance(self.op.nics, list):
10116
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10117
                                   errors.ECODE_INVAL)
10118
      for row in self.op.nics:
10119
        if (not isinstance(row, dict) or
10120
            "mac" not in row or
10121
            "ip" not in row or
10122
            "bridge" not in row):
10123
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
10124
                                     " parameter", errors.ECODE_INVAL)
10125
      if not isinstance(self.op.disks, list):
10126
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10127
                                   errors.ECODE_INVAL)
10128
      for row in self.op.disks:
10129
        if (not isinstance(row, dict) or
10130
            "size" not in row or
10131
            not isinstance(row["size"], int) or
10132
            "mode" not in row or
10133
            row["mode"] not in ['r', 'w']):
10134
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10135
                                     " parameter", errors.ECODE_INVAL)
10136
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
10137
        self.op.hypervisor = self.cfg.GetHypervisorType()
10138
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10139
      if not hasattr(self.op, "name"):
10140
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
10141
                                   errors.ECODE_INVAL)
10142
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10143
      self.op.name = fname
10144
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10145
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10146
      if not hasattr(self.op, "evac_nodes"):
10147
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10148
                                   " opcode input", errors.ECODE_INVAL)
10149
    else:
10150
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10151
                                 self.op.mode, errors.ECODE_INVAL)
10152

    
10153
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10154
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
10155
        raise errors.OpPrereqError("Missing allocator name",
10156
                                   errors.ECODE_INVAL)
10157
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10158
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10159
                                 self.op.direction, errors.ECODE_INVAL)
10160

    
10161
  def Exec(self, feedback_fn):
10162
    """Run the allocator test.
10163

10164
    """
10165
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10166
      ial = IAllocator(self.cfg, self.rpc,
10167
                       mode=self.op.mode,
10168
                       name=self.op.name,
10169
                       mem_size=self.op.mem_size,
10170
                       disks=self.op.disks,
10171
                       disk_template=self.op.disk_template,
10172
                       os=self.op.os,
10173
                       tags=self.op.tags,
10174
                       nics=self.op.nics,
10175
                       vcpus=self.op.vcpus,
10176
                       hypervisor=self.op.hypervisor,
10177
                       )
10178
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10179
      ial = IAllocator(self.cfg, self.rpc,
10180
                       mode=self.op.mode,
10181
                       name=self.op.name,
10182
                       relocate_from=list(self.relocate_from),
10183
                       )
10184
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10185
      ial = IAllocator(self.cfg, self.rpc,
10186
                       mode=self.op.mode,
10187
                       evac_nodes=self.op.evac_nodes)
10188
    else:
10189
      raise errors.ProgrammerError("Uncatched mode %s in"
10190
                                   " LUTestAllocator.Exec", self.op.mode)
10191

    
10192
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10193
      result = ial.in_text
10194
    else:
10195
      ial.Run(self.op.allocator, validate=False)
10196
      result = ial.out_text
10197
    return result