Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 490acd18

History | View | Annotate | Download (371.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42

    
43
from ganeti import ssh
44
from ganeti import utils
45
from ganeti import errors
46
from ganeti import hypervisor
47
from ganeti import locking
48
from ganeti import constants
49
from ganeti import objects
50
from ganeti import serializer
51
from ganeti import ssconf
52
from ganeti import uidpool
53
from ganeti import compat
54
from ganeti import masterd
55
from ganeti import netutils
56
from ganeti import ht
57

    
58
import ganeti.masterd.instance # pylint: disable-msg=W0611
59

    
60
# Common opcode attributes
61

    
62
#: output fields for a query operation
63
_POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
64

    
65

    
66
#: the shutdown timeout
67
_PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
68
                     ht.TPositiveInt)
69

    
70
#: the force parameter
71
_PForce = ("force", False, ht.TBool)
72

    
73
#: a required instance name (for single-instance LUs)
74
_PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
75

    
76
#: Whether to ignore offline nodes
77
_PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
78

    
79
#: a required node name (for single-node LUs)
80
_PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
81

    
82
#: the migration type (live/non-live)
83
_PMigrationMode = ("mode", None,
84
                   ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
85

    
86
#: the obsolete 'live' mode (boolean)
87
_PMigrationLive = ("live", None, ht.TMaybeBool)
88

    
89

    
90
# End types
91
class LogicalUnit(object):
92
  """Logical Unit base class.
93

94
  Subclasses must follow these rules:
95
    - implement ExpandNames
96
    - implement CheckPrereq (except when tasklets are used)
97
    - implement Exec (except when tasklets are used)
98
    - implement BuildHooksEnv
99
    - redefine HPATH and HTYPE
100
    - optionally redefine their run requirements:
101
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
102

103
  Note that all commands require root permissions.
104

105
  @ivar dry_run_result: the value (if any) that will be returned to the caller
106
      in dry-run mode (signalled by opcode dry_run parameter)
107
  @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
108
      they should get if not already defined, and types they must match
109

110
  """
111
  HPATH = None
112
  HTYPE = None
113
  _OP_PARAMS = []
114
  REQ_BGL = True
115

    
116
  def __init__(self, processor, op, context, rpc):
117
    """Constructor for LogicalUnit.
118

119
    This needs to be overridden in derived classes in order to check op
120
    validity.
121

122
    """
123
    self.proc = processor
124
    self.op = op
125
    self.cfg = context.cfg
126
    self.context = context
127
    self.rpc = rpc
128
    # Dicts used to declare locking needs to mcpu
129
    self.needed_locks = None
130
    self.acquired_locks = {}
131
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
132
    self.add_locks = {}
133
    self.remove_locks = {}
134
    # Used to force good behavior when calling helper functions
135
    self.recalculate_locks = {}
136
    self.__ssh = None
137
    # logging
138
    self.Log = processor.Log # pylint: disable-msg=C0103
139
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
140
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
141
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
142
    # support for dry-run
143
    self.dry_run_result = None
144
    # support for generic debug attribute
145
    if (not hasattr(self.op, "debug_level") or
146
        not isinstance(self.op.debug_level, int)):
147
      self.op.debug_level = 0
148

    
149
    # Tasklets
150
    self.tasklets = None
151

    
152
    # The new kind-of-type-system
153
    op_id = self.op.OP_ID
154
    for attr_name, aval, test in self._OP_PARAMS:
155
      if not hasattr(op, attr_name):
156
        if aval == ht.NoDefault:
157
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
158
                                     (op_id, attr_name), errors.ECODE_INVAL)
159
        else:
160
          if callable(aval):
161
            dval = aval()
162
          else:
163
            dval = aval
164
          setattr(self.op, attr_name, dval)
165
      attr_val = getattr(op, attr_name)
166
      if test == ht.NoType:
167
        # no tests here
168
        continue
169
      if not callable(test):
170
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
171
                                     " given type is not a proper type (%s)" %
172
                                     (op_id, attr_name, test))
173
      if not test(attr_val):
174
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
175
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
176
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
177
                                   (op_id, attr_name), errors.ECODE_INVAL)
178

    
179
    self.CheckArguments()
180

    
181
  def __GetSSH(self):
182
    """Returns the SshRunner object
183

184
    """
185
    if not self.__ssh:
186
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
187
    return self.__ssh
188

    
189
  ssh = property(fget=__GetSSH)
190

    
191
  def CheckArguments(self):
192
    """Check syntactic validity for the opcode arguments.
193

194
    This method is for doing a simple syntactic check and ensure
195
    validity of opcode parameters, without any cluster-related
196
    checks. While the same can be accomplished in ExpandNames and/or
197
    CheckPrereq, doing these separate is better because:
198

199
      - ExpandNames is left as as purely a lock-related function
200
      - CheckPrereq is run after we have acquired locks (and possible
201
        waited for them)
202

203
    The function is allowed to change the self.op attribute so that
204
    later methods can no longer worry about missing parameters.
205

206
    """
207
    pass
208

    
209
  def ExpandNames(self):
210
    """Expand names for this LU.
211

212
    This method is called before starting to execute the opcode, and it should
213
    update all the parameters of the opcode to their canonical form (e.g. a
214
    short node name must be fully expanded after this method has successfully
215
    completed). This way locking, hooks, logging, ecc. can work correctly.
216

217
    LUs which implement this method must also populate the self.needed_locks
218
    member, as a dict with lock levels as keys, and a list of needed lock names
219
    as values. Rules:
220

221
      - use an empty dict if you don't need any lock
222
      - if you don't need any lock at a particular level omit that level
223
      - don't put anything for the BGL level
224
      - if you want all locks at a level use locking.ALL_SET as a value
225

226
    If you need to share locks (rather than acquire them exclusively) at one
227
    level you can modify self.share_locks, setting a true value (usually 1) for
228
    that level. By default locks are not shared.
229

230
    This function can also define a list of tasklets, which then will be
231
    executed in order instead of the usual LU-level CheckPrereq and Exec
232
    functions, if those are not defined by the LU.
233

234
    Examples::
235

236
      # Acquire all nodes and one instance
237
      self.needed_locks = {
238
        locking.LEVEL_NODE: locking.ALL_SET,
239
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
240
      }
241
      # Acquire just two nodes
242
      self.needed_locks = {
243
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
244
      }
245
      # Acquire no locks
246
      self.needed_locks = {} # No, you can't leave it to the default value None
247

248
    """
249
    # The implementation of this method is mandatory only if the new LU is
250
    # concurrent, so that old LUs don't need to be changed all at the same
251
    # time.
252
    if self.REQ_BGL:
253
      self.needed_locks = {} # Exclusive LUs don't need locks.
254
    else:
255
      raise NotImplementedError
256

    
257
  def DeclareLocks(self, level):
258
    """Declare LU locking needs for a level
259

260
    While most LUs can just declare their locking needs at ExpandNames time,
261
    sometimes there's the need to calculate some locks after having acquired
262
    the ones before. This function is called just before acquiring locks at a
263
    particular level, but after acquiring the ones at lower levels, and permits
264
    such calculations. It can be used to modify self.needed_locks, and by
265
    default it does nothing.
266

267
    This function is only called if you have something already set in
268
    self.needed_locks for the level.
269

270
    @param level: Locking level which is going to be locked
271
    @type level: member of ganeti.locking.LEVELS
272

273
    """
274

    
275
  def CheckPrereq(self):
276
    """Check prerequisites for this LU.
277

278
    This method should check that the prerequisites for the execution
279
    of this LU are fulfilled. It can do internode communication, but
280
    it should be idempotent - no cluster or system changes are
281
    allowed.
282

283
    The method should raise errors.OpPrereqError in case something is
284
    not fulfilled. Its return value is ignored.
285

286
    This method should also update all the parameters of the opcode to
287
    their canonical form if it hasn't been done by ExpandNames before.
288

289
    """
290
    if self.tasklets is not None:
291
      for (idx, tl) in enumerate(self.tasklets):
292
        logging.debug("Checking prerequisites for tasklet %s/%s",
293
                      idx + 1, len(self.tasklets))
294
        tl.CheckPrereq()
295
    else:
296
      pass
297

    
298
  def Exec(self, feedback_fn):
299
    """Execute the LU.
300

301
    This method should implement the actual work. It should raise
302
    errors.OpExecError for failures that are somewhat dealt with in
303
    code, or expected.
304

305
    """
306
    if self.tasklets is not None:
307
      for (idx, tl) in enumerate(self.tasklets):
308
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
309
        tl.Exec(feedback_fn)
310
    else:
311
      raise NotImplementedError
312

    
313
  def BuildHooksEnv(self):
314
    """Build hooks environment for this LU.
315

316
    This method should return a three-node tuple consisting of: a dict
317
    containing the environment that will be used for running the
318
    specific hook for this LU, a list of node names on which the hook
319
    should run before the execution, and a list of node names on which
320
    the hook should run after the execution.
321

322
    The keys of the dict must not have 'GANETI_' prefixed as this will
323
    be handled in the hooks runner. Also note additional keys will be
324
    added by the hooks runner. If the LU doesn't define any
325
    environment, an empty dict (and not None) should be returned.
326

327
    No nodes should be returned as an empty list (and not None).
328

329
    Note that if the HPATH for a LU class is None, this function will
330
    not be called.
331

332
    """
333
    raise NotImplementedError
334

    
335
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
336
    """Notify the LU about the results of its hooks.
337

338
    This method is called every time a hooks phase is executed, and notifies
339
    the Logical Unit about the hooks' result. The LU can then use it to alter
340
    its result based on the hooks.  By default the method does nothing and the
341
    previous result is passed back unchanged but any LU can define it if it
342
    wants to use the local cluster hook-scripts somehow.
343

344
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
345
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
346
    @param hook_results: the results of the multi-node hooks rpc call
347
    @param feedback_fn: function used send feedback back to the caller
348
    @param lu_result: the previous Exec result this LU had, or None
349
        in the PRE phase
350
    @return: the new Exec result, based on the previous result
351
        and hook results
352

353
    """
354
    # API must be kept, thus we ignore the unused argument and could
355
    # be a function warnings
356
    # pylint: disable-msg=W0613,R0201
357
    return lu_result
358

    
359
  def _ExpandAndLockInstance(self):
360
    """Helper function to expand and lock an instance.
361

362
    Many LUs that work on an instance take its name in self.op.instance_name
363
    and need to expand it and then declare the expanded name for locking. This
364
    function does it, and then updates self.op.instance_name to the expanded
365
    name. It also initializes needed_locks as a dict, if this hasn't been done
366
    before.
367

368
    """
369
    if self.needed_locks is None:
370
      self.needed_locks = {}
371
    else:
372
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
373
        "_ExpandAndLockInstance called with instance-level locks set"
374
    self.op.instance_name = _ExpandInstanceName(self.cfg,
375
                                                self.op.instance_name)
376
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
377

    
378
  def _LockInstancesNodes(self, primary_only=False):
379
    """Helper function to declare instances' nodes for locking.
380

381
    This function should be called after locking one or more instances to lock
382
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
383
    with all primary or secondary nodes for instances already locked and
384
    present in self.needed_locks[locking.LEVEL_INSTANCE].
385

386
    It should be called from DeclareLocks, and for safety only works if
387
    self.recalculate_locks[locking.LEVEL_NODE] is set.
388

389
    In the future it may grow parameters to just lock some instance's nodes, or
390
    to just lock primaries or secondary nodes, if needed.
391

392
    If should be called in DeclareLocks in a way similar to::
393

394
      if level == locking.LEVEL_NODE:
395
        self._LockInstancesNodes()
396

397
    @type primary_only: boolean
398
    @param primary_only: only lock primary nodes of locked instances
399

400
    """
401
    assert locking.LEVEL_NODE in self.recalculate_locks, \
402
      "_LockInstancesNodes helper function called with no nodes to recalculate"
403

    
404
    # TODO: check if we're really been called with the instance locks held
405

    
406
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
407
    # future we might want to have different behaviors depending on the value
408
    # of self.recalculate_locks[locking.LEVEL_NODE]
409
    wanted_nodes = []
410
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
411
      instance = self.context.cfg.GetInstanceInfo(instance_name)
412
      wanted_nodes.append(instance.primary_node)
413
      if not primary_only:
414
        wanted_nodes.extend(instance.secondary_nodes)
415

    
416
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
417
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
418
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
419
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
420

    
421
    del self.recalculate_locks[locking.LEVEL_NODE]
422

    
423

    
424
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
425
  """Simple LU which runs no hooks.
426

427
  This LU is intended as a parent for other LogicalUnits which will
428
  run no hooks, in order to reduce duplicate code.
429

430
  """
431
  HPATH = None
432
  HTYPE = None
433

    
434
  def BuildHooksEnv(self):
435
    """Empty BuildHooksEnv for NoHooksLu.
436

437
    This just raises an error.
438

439
    """
440
    assert False, "BuildHooksEnv called for NoHooksLUs"
441

    
442

    
443
class Tasklet:
444
  """Tasklet base class.
445

446
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
448
  tasklets know nothing about locks.
449

450
  Subclasses must follow these rules:
451
    - Implement CheckPrereq
452
    - Implement Exec
453

454
  """
455
  def __init__(self, lu):
456
    self.lu = lu
457

    
458
    # Shortcuts
459
    self.cfg = lu.cfg
460
    self.rpc = lu.rpc
461

    
462
  def CheckPrereq(self):
463
    """Check prerequisites for this tasklets.
464

465
    This method should check whether the prerequisites for the execution of
466
    this tasklet are fulfilled. It can do internode communication, but it
467
    should be idempotent - no cluster or system changes are allowed.
468

469
    The method should raise errors.OpPrereqError in case something is not
470
    fulfilled. Its return value is ignored.
471

472
    This method should also update all parameters to their canonical form if it
473
    hasn't been done before.
474

475
    """
476
    pass
477

    
478
  def Exec(self, feedback_fn):
479
    """Execute the tasklet.
480

481
    This method should implement the actual work. It should raise
482
    errors.OpExecError for failures that are somewhat dealt with in code, or
483
    expected.
484

485
    """
486
    raise NotImplementedError
487

    
488

    
489
def _GetWantedNodes(lu, nodes):
490
  """Returns list of checked and expanded node names.
491

492
  @type lu: L{LogicalUnit}
493
  @param lu: the logical unit on whose behalf we execute
494
  @type nodes: list
495
  @param nodes: list of node names or None for all nodes
496
  @rtype: list
497
  @return: the list of nodes, sorted
498
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
499

500
  """
501
  if not nodes:
502
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
503
      " non-empty list of nodes whose name is to be expanded.")
504

    
505
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
506
  return utils.NiceSort(wanted)
507

    
508

    
509
def _GetWantedInstances(lu, instances):
510
  """Returns list of checked and expanded instance names.
511

512
  @type lu: L{LogicalUnit}
513
  @param lu: the logical unit on whose behalf we execute
514
  @type instances: list
515
  @param instances: list of instance names or None for all instances
516
  @rtype: list
517
  @return: the list of instances, sorted
518
  @raise errors.OpPrereqError: if the instances parameter is wrong type
519
  @raise errors.OpPrereqError: if any of the passed instances is not found
520

521
  """
522
  if instances:
523
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
524
  else:
525
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
526
  return wanted
527

    
528

    
529
def _GetUpdatedParams(old_params, update_dict,
530
                      use_default=True, use_none=False):
531
  """Return the new version of a parameter dictionary.
532

533
  @type old_params: dict
534
  @param old_params: old parameters
535
  @type update_dict: dict
536
  @param update_dict: dict containing new parameter values, or
537
      constants.VALUE_DEFAULT to reset the parameter to its default
538
      value
539
  @param use_default: boolean
540
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
541
      values as 'to be deleted' values
542
  @param use_none: boolean
543
  @type use_none: whether to recognise C{None} values as 'to be
544
      deleted' values
545
  @rtype: dict
546
  @return: the new parameter dictionary
547

548
  """
549
  params_copy = copy.deepcopy(old_params)
550
  for key, val in update_dict.iteritems():
551
    if ((use_default and val == constants.VALUE_DEFAULT) or
552
        (use_none and val is None)):
553
      try:
554
        del params_copy[key]
555
      except KeyError:
556
        pass
557
    else:
558
      params_copy[key] = val
559
  return params_copy
560

    
561

    
562
def _CheckOutputFields(static, dynamic, selected):
563
  """Checks whether all selected fields are valid.
564

565
  @type static: L{utils.FieldSet}
566
  @param static: static fields set
567
  @type dynamic: L{utils.FieldSet}
568
  @param dynamic: dynamic fields set
569

570
  """
571
  f = utils.FieldSet()
572
  f.Extend(static)
573
  f.Extend(dynamic)
574

    
575
  delta = f.NonMatching(selected)
576
  if delta:
577
    raise errors.OpPrereqError("Unknown output fields selected: %s"
578
                               % ",".join(delta), errors.ECODE_INVAL)
579

    
580

    
581
def _CheckGlobalHvParams(params):
582
  """Validates that given hypervisor params are not global ones.
583

584
  This will ensure that instances don't get customised versions of
585
  global params.
586

587
  """
588
  used_globals = constants.HVC_GLOBALS.intersection(params)
589
  if used_globals:
590
    msg = ("The following hypervisor parameters are global and cannot"
591
           " be customized at instance level, please modify them at"
592
           " cluster level: %s" % utils.CommaJoin(used_globals))
593
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
594

    
595

    
596
def _CheckNodeOnline(lu, node):
597
  """Ensure that a given node is online.
598

599
  @param lu: the LU on behalf of which we make the check
600
  @param node: the node to check
601
  @raise errors.OpPrereqError: if the node is offline
602

603
  """
604
  if lu.cfg.GetNodeInfo(node).offline:
605
    raise errors.OpPrereqError("Can't use offline node %s" % node,
606
                               errors.ECODE_INVAL)
607

    
608

    
609
def _CheckNodeNotDrained(lu, node):
610
  """Ensure that a given node is not drained.
611

612
  @param lu: the LU on behalf of which we make the check
613
  @param node: the node to check
614
  @raise errors.OpPrereqError: if the node is drained
615

616
  """
617
  if lu.cfg.GetNodeInfo(node).drained:
618
    raise errors.OpPrereqError("Can't use drained node %s" % node,
619
                               errors.ECODE_INVAL)
620

    
621

    
622
def _CheckNodeHasOS(lu, node, os_name, force_variant):
623
  """Ensure that a node supports a given OS.
624

625
  @param lu: the LU on behalf of which we make the check
626
  @param node: the node to check
627
  @param os_name: the OS to query about
628
  @param force_variant: whether to ignore variant errors
629
  @raise errors.OpPrereqError: if the node is not supporting the OS
630

631
  """
632
  result = lu.rpc.call_os_get(node, os_name)
633
  result.Raise("OS '%s' not in supported OS list for node %s" %
634
               (os_name, node),
635
               prereq=True, ecode=errors.ECODE_INVAL)
636
  if not force_variant:
637
    _CheckOSVariant(result.payload, os_name)
638

    
639

    
640
def _RequireFileStorage():
641
  """Checks that file storage is enabled.
642

643
  @raise errors.OpPrereqError: when file storage is disabled
644

645
  """
646
  if not constants.ENABLE_FILE_STORAGE:
647
    raise errors.OpPrereqError("File storage disabled at configure time",
648
                               errors.ECODE_INVAL)
649

    
650

    
651
def _CheckDiskTemplate(template):
652
  """Ensure a given disk template is valid.
653

654
  """
655
  if template not in constants.DISK_TEMPLATES:
656
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
657
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
658
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
659
  if template == constants.DT_FILE:
660
    _RequireFileStorage()
661
  return True
662

    
663

    
664
def _CheckStorageType(storage_type):
665
  """Ensure a given storage type is valid.
666

667
  """
668
  if storage_type not in constants.VALID_STORAGE_TYPES:
669
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
670
                               errors.ECODE_INVAL)
671
  if storage_type == constants.ST_FILE:
672
    _RequireFileStorage()
673
  return True
674

    
675

    
676
def _GetClusterDomainSecret():
677
  """Reads the cluster domain secret.
678

679
  """
680
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
681
                               strict=True)
682

    
683

    
684
def _CheckInstanceDown(lu, instance, reason):
685
  """Ensure that an instance is not running."""
686
  if instance.admin_up:
687
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
688
                               (instance.name, reason), errors.ECODE_STATE)
689

    
690
  pnode = instance.primary_node
691
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
692
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
693
              prereq=True, ecode=errors.ECODE_ENVIRON)
694

    
695
  if instance.name in ins_l.payload:
696
    raise errors.OpPrereqError("Instance %s is running, %s" %
697
                               (instance.name, reason), errors.ECODE_STATE)
698

    
699

    
700
def _ExpandItemName(fn, name, kind):
701
  """Expand an item name.
702

703
  @param fn: the function to use for expansion
704
  @param name: requested item name
705
  @param kind: text description ('Node' or 'Instance')
706
  @return: the resolved (full) name
707
  @raise errors.OpPrereqError: if the item is not found
708

709
  """
710
  full_name = fn(name)
711
  if full_name is None:
712
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
713
                               errors.ECODE_NOENT)
714
  return full_name
715

    
716

    
717
def _ExpandNodeName(cfg, name):
718
  """Wrapper over L{_ExpandItemName} for nodes."""
719
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
720

    
721

    
722
def _ExpandInstanceName(cfg, name):
723
  """Wrapper over L{_ExpandItemName} for instance."""
724
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
725

    
726

    
727
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
728
                          memory, vcpus, nics, disk_template, disks,
729
                          bep, hvp, hypervisor_name):
730
  """Builds instance related env variables for hooks
731

732
  This builds the hook environment from individual variables.
733

734
  @type name: string
735
  @param name: the name of the instance
736
  @type primary_node: string
737
  @param primary_node: the name of the instance's primary node
738
  @type secondary_nodes: list
739
  @param secondary_nodes: list of secondary nodes as strings
740
  @type os_type: string
741
  @param os_type: the name of the instance's OS
742
  @type status: boolean
743
  @param status: the should_run status of the instance
744
  @type memory: string
745
  @param memory: the memory size of the instance
746
  @type vcpus: string
747
  @param vcpus: the count of VCPUs the instance has
748
  @type nics: list
749
  @param nics: list of tuples (ip, mac, mode, link) representing
750
      the NICs the instance has
751
  @type disk_template: string
752
  @param disk_template: the disk template of the instance
753
  @type disks: list
754
  @param disks: the list of (size, mode) pairs
755
  @type bep: dict
756
  @param bep: the backend parameters for the instance
757
  @type hvp: dict
758
  @param hvp: the hypervisor parameters for the instance
759
  @type hypervisor_name: string
760
  @param hypervisor_name: the hypervisor for the instance
761
  @rtype: dict
762
  @return: the hook environment for this instance
763

764
  """
765
  if status:
766
    str_status = "up"
767
  else:
768
    str_status = "down"
769
  env = {
770
    "OP_TARGET": name,
771
    "INSTANCE_NAME": name,
772
    "INSTANCE_PRIMARY": primary_node,
773
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
774
    "INSTANCE_OS_TYPE": os_type,
775
    "INSTANCE_STATUS": str_status,
776
    "INSTANCE_MEMORY": memory,
777
    "INSTANCE_VCPUS": vcpus,
778
    "INSTANCE_DISK_TEMPLATE": disk_template,
779
    "INSTANCE_HYPERVISOR": hypervisor_name,
780
  }
781

    
782
  if nics:
783
    nic_count = len(nics)
784
    for idx, (ip, mac, mode, link) in enumerate(nics):
785
      if ip is None:
786
        ip = ""
787
      env["INSTANCE_NIC%d_IP" % idx] = ip
788
      env["INSTANCE_NIC%d_MAC" % idx] = mac
789
      env["INSTANCE_NIC%d_MODE" % idx] = mode
790
      env["INSTANCE_NIC%d_LINK" % idx] = link
791
      if mode == constants.NIC_MODE_BRIDGED:
792
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
793
  else:
794
    nic_count = 0
795

    
796
  env["INSTANCE_NIC_COUNT"] = nic_count
797

    
798
  if disks:
799
    disk_count = len(disks)
800
    for idx, (size, mode) in enumerate(disks):
801
      env["INSTANCE_DISK%d_SIZE" % idx] = size
802
      env["INSTANCE_DISK%d_MODE" % idx] = mode
803
  else:
804
    disk_count = 0
805

    
806
  env["INSTANCE_DISK_COUNT"] = disk_count
807

    
808
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
809
    for key, value in source.items():
810
      env["INSTANCE_%s_%s" % (kind, key)] = value
811

    
812
  return env
813

    
814

    
815
def _NICListToTuple(lu, nics):
816
  """Build a list of nic information tuples.
817

818
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
819
  value in LUQueryInstanceData.
820

821
  @type lu:  L{LogicalUnit}
822
  @param lu: the logical unit on whose behalf we execute
823
  @type nics: list of L{objects.NIC}
824
  @param nics: list of nics to convert to hooks tuples
825

826
  """
827
  hooks_nics = []
828
  cluster = lu.cfg.GetClusterInfo()
829
  for nic in nics:
830
    ip = nic.ip
831
    mac = nic.mac
832
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
833
    mode = filled_params[constants.NIC_MODE]
834
    link = filled_params[constants.NIC_LINK]
835
    hooks_nics.append((ip, mac, mode, link))
836
  return hooks_nics
837

    
838

    
839
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
840
  """Builds instance related env variables for hooks from an object.
841

842
  @type lu: L{LogicalUnit}
843
  @param lu: the logical unit on whose behalf we execute
844
  @type instance: L{objects.Instance}
845
  @param instance: the instance for which we should build the
846
      environment
847
  @type override: dict
848
  @param override: dictionary with key/values that will override
849
      our values
850
  @rtype: dict
851
  @return: the hook environment dictionary
852

853
  """
854
  cluster = lu.cfg.GetClusterInfo()
855
  bep = cluster.FillBE(instance)
856
  hvp = cluster.FillHV(instance)
857
  args = {
858
    'name': instance.name,
859
    'primary_node': instance.primary_node,
860
    'secondary_nodes': instance.secondary_nodes,
861
    'os_type': instance.os,
862
    'status': instance.admin_up,
863
    'memory': bep[constants.BE_MEMORY],
864
    'vcpus': bep[constants.BE_VCPUS],
865
    'nics': _NICListToTuple(lu, instance.nics),
866
    'disk_template': instance.disk_template,
867
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
868
    'bep': bep,
869
    'hvp': hvp,
870
    'hypervisor_name': instance.hypervisor,
871
  }
872
  if override:
873
    args.update(override)
874
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
875

    
876

    
877
def _AdjustCandidatePool(lu, exceptions):
878
  """Adjust the candidate pool after node operations.
879

880
  """
881
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
882
  if mod_list:
883
    lu.LogInfo("Promoted nodes to master candidate role: %s",
884
               utils.CommaJoin(node.name for node in mod_list))
885
    for name in mod_list:
886
      lu.context.ReaddNode(name)
887
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
888
  if mc_now > mc_max:
889
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
890
               (mc_now, mc_max))
891

    
892

    
893
def _DecideSelfPromotion(lu, exceptions=None):
894
  """Decide whether I should promote myself as a master candidate.
895

896
  """
897
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
898
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
899
  # the new node will increase mc_max with one, so:
900
  mc_should = min(mc_should + 1, cp_size)
901
  return mc_now < mc_should
902

    
903

    
904
def _CheckNicsBridgesExist(lu, target_nics, target_node):
905
  """Check that the brigdes needed by a list of nics exist.
906

907
  """
908
  cluster = lu.cfg.GetClusterInfo()
909
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
910
  brlist = [params[constants.NIC_LINK] for params in paramslist
911
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
912
  if brlist:
913
    result = lu.rpc.call_bridges_exist(target_node, brlist)
914
    result.Raise("Error checking bridges on destination node '%s'" %
915
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
916

    
917

    
918
def _CheckInstanceBridgesExist(lu, instance, node=None):
919
  """Check that the brigdes needed by an instance exist.
920

921
  """
922
  if node is None:
923
    node = instance.primary_node
924
  _CheckNicsBridgesExist(lu, instance.nics, node)
925

    
926

    
927
def _CheckOSVariant(os_obj, name):
928
  """Check whether an OS name conforms to the os variants specification.
929

930
  @type os_obj: L{objects.OS}
931
  @param os_obj: OS object to check
932
  @type name: string
933
  @param name: OS name passed by the user, to check for validity
934

935
  """
936
  if not os_obj.supported_variants:
937
    return
938
  variant = objects.OS.GetVariant(name)
939
  if not variant:
940
    raise errors.OpPrereqError("OS name must include a variant",
941
                               errors.ECODE_INVAL)
942

    
943
  if variant not in os_obj.supported_variants:
944
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
945

    
946

    
947
def _GetNodeInstancesInner(cfg, fn):
948
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
949

    
950

    
951
def _GetNodeInstances(cfg, node_name):
952
  """Returns a list of all primary and secondary instances on a node.
953

954
  """
955

    
956
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
957

    
958

    
959
def _GetNodePrimaryInstances(cfg, node_name):
960
  """Returns primary instances on a node.
961

962
  """
963
  return _GetNodeInstancesInner(cfg,
964
                                lambda inst: node_name == inst.primary_node)
965

    
966

    
967
def _GetNodeSecondaryInstances(cfg, node_name):
968
  """Returns secondary instances on a node.
969

970
  """
971
  return _GetNodeInstancesInner(cfg,
972
                                lambda inst: node_name in inst.secondary_nodes)
973

    
974

    
975
def _GetStorageTypeArgs(cfg, storage_type):
976
  """Returns the arguments for a storage type.
977

978
  """
979
  # Special case for file storage
980
  if storage_type == constants.ST_FILE:
981
    # storage.FileStorage wants a list of storage directories
982
    return [[cfg.GetFileStorageDir()]]
983

    
984
  return []
985

    
986

    
987
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
988
  faulty = []
989

    
990
  for dev in instance.disks:
991
    cfg.SetDiskID(dev, node_name)
992

    
993
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
994
  result.Raise("Failed to get disk status from node %s" % node_name,
995
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
996

    
997
  for idx, bdev_status in enumerate(result.payload):
998
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
999
      faulty.append(idx)
1000

    
1001
  return faulty
1002

    
1003

    
1004
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1005
  """Check the sanity of iallocator and node arguments and use the
1006
  cluster-wide iallocator if appropriate.
1007

1008
  Check that at most one of (iallocator, node) is specified. If none is
1009
  specified, then the LU's opcode's iallocator slot is filled with the
1010
  cluster-wide default iallocator.
1011

1012
  @type iallocator_slot: string
1013
  @param iallocator_slot: the name of the opcode iallocator slot
1014
  @type node_slot: string
1015
  @param node_slot: the name of the opcode target node slot
1016

1017
  """
1018
  node = getattr(lu.op, node_slot, None)
1019
  iallocator = getattr(lu.op, iallocator_slot, None)
1020

    
1021
  if node is not None and iallocator is not None:
1022
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1023
                               errors.ECODE_INVAL)
1024
  elif node is None and iallocator is None:
1025
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1026
    if default_iallocator:
1027
      setattr(lu.op, iallocator_slot, default_iallocator)
1028
    else:
1029
      raise errors.OpPrereqError("No iallocator or node given and no"
1030
                                 " cluster-wide default iallocator found."
1031
                                 " Please specify either an iallocator or a"
1032
                                 " node, or set a cluster-wide default"
1033
                                 " iallocator.")
1034

    
1035

    
1036
class LUPostInitCluster(LogicalUnit):
1037
  """Logical unit for running hooks after cluster initialization.
1038

1039
  """
1040
  HPATH = "cluster-init"
1041
  HTYPE = constants.HTYPE_CLUSTER
1042

    
1043
  def BuildHooksEnv(self):
1044
    """Build hooks env.
1045

1046
    """
1047
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1048
    mn = self.cfg.GetMasterNode()
1049
    return env, [], [mn]
1050

    
1051
  def Exec(self, feedback_fn):
1052
    """Nothing to do.
1053

1054
    """
1055
    return True
1056

    
1057

    
1058
class LUDestroyCluster(LogicalUnit):
1059
  """Logical unit for destroying the cluster.
1060

1061
  """
1062
  HPATH = "cluster-destroy"
1063
  HTYPE = constants.HTYPE_CLUSTER
1064

    
1065
  def BuildHooksEnv(self):
1066
    """Build hooks env.
1067

1068
    """
1069
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1070
    return env, [], []
1071

    
1072
  def CheckPrereq(self):
1073
    """Check prerequisites.
1074

1075
    This checks whether the cluster is empty.
1076

1077
    Any errors are signaled by raising errors.OpPrereqError.
1078

1079
    """
1080
    master = self.cfg.GetMasterNode()
1081

    
1082
    nodelist = self.cfg.GetNodeList()
1083
    if len(nodelist) != 1 or nodelist[0] != master:
1084
      raise errors.OpPrereqError("There are still %d node(s) in"
1085
                                 " this cluster." % (len(nodelist) - 1),
1086
                                 errors.ECODE_INVAL)
1087
    instancelist = self.cfg.GetInstanceList()
1088
    if instancelist:
1089
      raise errors.OpPrereqError("There are still %d instance(s) in"
1090
                                 " this cluster." % len(instancelist),
1091
                                 errors.ECODE_INVAL)
1092

    
1093
  def Exec(self, feedback_fn):
1094
    """Destroys the cluster.
1095

1096
    """
1097
    master = self.cfg.GetMasterNode()
1098

    
1099
    # Run post hooks on master node before it's removed
1100
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1101
    try:
1102
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1103
    except:
1104
      # pylint: disable-msg=W0702
1105
      self.LogWarning("Errors occurred running hooks on %s" % master)
1106

    
1107
    result = self.rpc.call_node_stop_master(master, False)
1108
    result.Raise("Could not disable the master role")
1109

    
1110
    return master
1111

    
1112

    
1113
def _VerifyCertificate(filename):
1114
  """Verifies a certificate for LUVerifyCluster.
1115

1116
  @type filename: string
1117
  @param filename: Path to PEM file
1118

1119
  """
1120
  try:
1121
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1122
                                           utils.ReadFile(filename))
1123
  except Exception, err: # pylint: disable-msg=W0703
1124
    return (LUVerifyCluster.ETYPE_ERROR,
1125
            "Failed to load X509 certificate %s: %s" % (filename, err))
1126

    
1127
  (errcode, msg) = \
1128
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1129
                                constants.SSL_CERT_EXPIRATION_ERROR)
1130

    
1131
  if msg:
1132
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1133
  else:
1134
    fnamemsg = None
1135

    
1136
  if errcode is None:
1137
    return (None, fnamemsg)
1138
  elif errcode == utils.CERT_WARNING:
1139
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1140
  elif errcode == utils.CERT_ERROR:
1141
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1142

    
1143
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1144

    
1145

    
1146
class LUVerifyCluster(LogicalUnit):
1147
  """Verifies the cluster status.
1148

1149
  """
1150
  HPATH = "cluster-verify"
1151
  HTYPE = constants.HTYPE_CLUSTER
1152
  _OP_PARAMS = [
1153
    ("skip_checks", ht.EmptyList,
1154
     ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1155
    ("verbose", False, ht.TBool),
1156
    ("error_codes", False, ht.TBool),
1157
    ("debug_simulate_errors", False, ht.TBool),
1158
    ]
1159
  REQ_BGL = False
1160

    
1161
  TCLUSTER = "cluster"
1162
  TNODE = "node"
1163
  TINSTANCE = "instance"
1164

    
1165
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1166
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1167
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1168
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1169
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1170
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1171
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1172
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1173
  ENODEDRBD = (TNODE, "ENODEDRBD")
1174
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1175
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1176
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1177
  ENODEHV = (TNODE, "ENODEHV")
1178
  ENODELVM = (TNODE, "ENODELVM")
1179
  ENODEN1 = (TNODE, "ENODEN1")
1180
  ENODENET = (TNODE, "ENODENET")
1181
  ENODEOS = (TNODE, "ENODEOS")
1182
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1183
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1184
  ENODERPC = (TNODE, "ENODERPC")
1185
  ENODESSH = (TNODE, "ENODESSH")
1186
  ENODEVERSION = (TNODE, "ENODEVERSION")
1187
  ENODESETUP = (TNODE, "ENODESETUP")
1188
  ENODETIME = (TNODE, "ENODETIME")
1189

    
1190
  ETYPE_FIELD = "code"
1191
  ETYPE_ERROR = "ERROR"
1192
  ETYPE_WARNING = "WARNING"
1193

    
1194
  class NodeImage(object):
1195
    """A class representing the logical and physical status of a node.
1196

1197
    @type name: string
1198
    @ivar name: the node name to which this object refers
1199
    @ivar volumes: a structure as returned from
1200
        L{ganeti.backend.GetVolumeList} (runtime)
1201
    @ivar instances: a list of running instances (runtime)
1202
    @ivar pinst: list of configured primary instances (config)
1203
    @ivar sinst: list of configured secondary instances (config)
1204
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1205
        of this node (config)
1206
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1207
    @ivar dfree: free disk, as reported by the node (runtime)
1208
    @ivar offline: the offline status (config)
1209
    @type rpc_fail: boolean
1210
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1211
        not whether the individual keys were correct) (runtime)
1212
    @type lvm_fail: boolean
1213
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1214
    @type hyp_fail: boolean
1215
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1216
    @type ghost: boolean
1217
    @ivar ghost: whether this is a known node or not (config)
1218
    @type os_fail: boolean
1219
    @ivar os_fail: whether the RPC call didn't return valid OS data
1220
    @type oslist: list
1221
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1222

1223
    """
1224
    def __init__(self, offline=False, name=None):
1225
      self.name = name
1226
      self.volumes = {}
1227
      self.instances = []
1228
      self.pinst = []
1229
      self.sinst = []
1230
      self.sbp = {}
1231
      self.mfree = 0
1232
      self.dfree = 0
1233
      self.offline = offline
1234
      self.rpc_fail = False
1235
      self.lvm_fail = False
1236
      self.hyp_fail = False
1237
      self.ghost = False
1238
      self.os_fail = False
1239
      self.oslist = {}
1240

    
1241
  def ExpandNames(self):
1242
    self.needed_locks = {
1243
      locking.LEVEL_NODE: locking.ALL_SET,
1244
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1245
    }
1246
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1247

    
1248
  def _Error(self, ecode, item, msg, *args, **kwargs):
1249
    """Format an error message.
1250

1251
    Based on the opcode's error_codes parameter, either format a
1252
    parseable error code, or a simpler error string.
1253

1254
    This must be called only from Exec and functions called from Exec.
1255

1256
    """
1257
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1258
    itype, etxt = ecode
1259
    # first complete the msg
1260
    if args:
1261
      msg = msg % args
1262
    # then format the whole message
1263
    if self.op.error_codes:
1264
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1265
    else:
1266
      if item:
1267
        item = " " + item
1268
      else:
1269
        item = ""
1270
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1271
    # and finally report it via the feedback_fn
1272
    self._feedback_fn("  - %s" % msg)
1273

    
1274
  def _ErrorIf(self, cond, *args, **kwargs):
1275
    """Log an error message if the passed condition is True.
1276

1277
    """
1278
    cond = bool(cond) or self.op.debug_simulate_errors
1279
    if cond:
1280
      self._Error(*args, **kwargs)
1281
    # do not mark the operation as failed for WARN cases only
1282
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1283
      self.bad = self.bad or cond
1284

    
1285
  def _VerifyNode(self, ninfo, nresult):
1286
    """Perform some basic validation on data returned from a node.
1287

1288
      - check the result data structure is well formed and has all the
1289
        mandatory fields
1290
      - check ganeti version
1291

1292
    @type ninfo: L{objects.Node}
1293
    @param ninfo: the node to check
1294
    @param nresult: the results from the node
1295
    @rtype: boolean
1296
    @return: whether overall this call was successful (and we can expect
1297
         reasonable values in the respose)
1298

1299
    """
1300
    node = ninfo.name
1301
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1302

    
1303
    # main result, nresult should be a non-empty dict
1304
    test = not nresult or not isinstance(nresult, dict)
1305
    _ErrorIf(test, self.ENODERPC, node,
1306
                  "unable to verify node: no data returned")
1307
    if test:
1308
      return False
1309

    
1310
    # compares ganeti version
1311
    local_version = constants.PROTOCOL_VERSION
1312
    remote_version = nresult.get("version", None)
1313
    test = not (remote_version and
1314
                isinstance(remote_version, (list, tuple)) and
1315
                len(remote_version) == 2)
1316
    _ErrorIf(test, self.ENODERPC, node,
1317
             "connection to node returned invalid data")
1318
    if test:
1319
      return False
1320

    
1321
    test = local_version != remote_version[0]
1322
    _ErrorIf(test, self.ENODEVERSION, node,
1323
             "incompatible protocol versions: master %s,"
1324
             " node %s", local_version, remote_version[0])
1325
    if test:
1326
      return False
1327

    
1328
    # node seems compatible, we can actually try to look into its results
1329

    
1330
    # full package version
1331
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1332
                  self.ENODEVERSION, node,
1333
                  "software version mismatch: master %s, node %s",
1334
                  constants.RELEASE_VERSION, remote_version[1],
1335
                  code=self.ETYPE_WARNING)
1336

    
1337
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1338
    if isinstance(hyp_result, dict):
1339
      for hv_name, hv_result in hyp_result.iteritems():
1340
        test = hv_result is not None
1341
        _ErrorIf(test, self.ENODEHV, node,
1342
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1343

    
1344

    
1345
    test = nresult.get(constants.NV_NODESETUP,
1346
                           ["Missing NODESETUP results"])
1347
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1348
             "; ".join(test))
1349

    
1350
    return True
1351

    
1352
  def _VerifyNodeTime(self, ninfo, nresult,
1353
                      nvinfo_starttime, nvinfo_endtime):
1354
    """Check the node time.
1355

1356
    @type ninfo: L{objects.Node}
1357
    @param ninfo: the node to check
1358
    @param nresult: the remote results for the node
1359
    @param nvinfo_starttime: the start time of the RPC call
1360
    @param nvinfo_endtime: the end time of the RPC call
1361

1362
    """
1363
    node = ninfo.name
1364
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1365

    
1366
    ntime = nresult.get(constants.NV_TIME, None)
1367
    try:
1368
      ntime_merged = utils.MergeTime(ntime)
1369
    except (ValueError, TypeError):
1370
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1371
      return
1372

    
1373
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1374
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1375
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1376
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1377
    else:
1378
      ntime_diff = None
1379

    
1380
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1381
             "Node time diverges by at least %s from master node time",
1382
             ntime_diff)
1383

    
1384
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1385
    """Check the node time.
1386

1387
    @type ninfo: L{objects.Node}
1388
    @param ninfo: the node to check
1389
    @param nresult: the remote results for the node
1390
    @param vg_name: the configured VG name
1391

1392
    """
1393
    if vg_name is None:
1394
      return
1395

    
1396
    node = ninfo.name
1397
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1398

    
1399
    # checks vg existence and size > 20G
1400
    vglist = nresult.get(constants.NV_VGLIST, None)
1401
    test = not vglist
1402
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1403
    if not test:
1404
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1405
                                            constants.MIN_VG_SIZE)
1406
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1407

    
1408
    # check pv names
1409
    pvlist = nresult.get(constants.NV_PVLIST, None)
1410
    test = pvlist is None
1411
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1412
    if not test:
1413
      # check that ':' is not present in PV names, since it's a
1414
      # special character for lvcreate (denotes the range of PEs to
1415
      # use on the PV)
1416
      for _, pvname, owner_vg in pvlist:
1417
        test = ":" in pvname
1418
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1419
                 " '%s' of VG '%s'", pvname, owner_vg)
1420

    
1421
  def _VerifyNodeNetwork(self, ninfo, nresult):
1422
    """Check the node time.
1423

1424
    @type ninfo: L{objects.Node}
1425
    @param ninfo: the node to check
1426
    @param nresult: the remote results for the node
1427

1428
    """
1429
    node = ninfo.name
1430
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1431

    
1432
    test = constants.NV_NODELIST not in nresult
1433
    _ErrorIf(test, self.ENODESSH, node,
1434
             "node hasn't returned node ssh connectivity data")
1435
    if not test:
1436
      if nresult[constants.NV_NODELIST]:
1437
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1438
          _ErrorIf(True, self.ENODESSH, node,
1439
                   "ssh communication with node '%s': %s", a_node, a_msg)
1440

    
1441
    test = constants.NV_NODENETTEST not in nresult
1442
    _ErrorIf(test, self.ENODENET, node,
1443
             "node hasn't returned node tcp connectivity data")
1444
    if not test:
1445
      if nresult[constants.NV_NODENETTEST]:
1446
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1447
        for anode in nlist:
1448
          _ErrorIf(True, self.ENODENET, node,
1449
                   "tcp communication with node '%s': %s",
1450
                   anode, nresult[constants.NV_NODENETTEST][anode])
1451

    
1452
    test = constants.NV_MASTERIP not in nresult
1453
    _ErrorIf(test, self.ENODENET, node,
1454
             "node hasn't returned node master IP reachability data")
1455
    if not test:
1456
      if not nresult[constants.NV_MASTERIP]:
1457
        if node == self.master_node:
1458
          msg = "the master node cannot reach the master IP (not configured?)"
1459
        else:
1460
          msg = "cannot reach the master IP"
1461
        _ErrorIf(True, self.ENODENET, node, msg)
1462

    
1463

    
1464
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1465
    """Verify an instance.
1466

1467
    This function checks to see if the required block devices are
1468
    available on the instance's node.
1469

1470
    """
1471
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1472
    node_current = instanceconfig.primary_node
1473

    
1474
    node_vol_should = {}
1475
    instanceconfig.MapLVsByNode(node_vol_should)
1476

    
1477
    for node in node_vol_should:
1478
      n_img = node_image[node]
1479
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1480
        # ignore missing volumes on offline or broken nodes
1481
        continue
1482
      for volume in node_vol_should[node]:
1483
        test = volume not in n_img.volumes
1484
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1485
                 "volume %s missing on node %s", volume, node)
1486

    
1487
    if instanceconfig.admin_up:
1488
      pri_img = node_image[node_current]
1489
      test = instance not in pri_img.instances and not pri_img.offline
1490
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1491
               "instance not running on its primary node %s",
1492
               node_current)
1493

    
1494
    for node, n_img in node_image.items():
1495
      if (not node == node_current):
1496
        test = instance in n_img.instances
1497
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1498
                 "instance should not run on node %s", node)
1499

    
1500
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1501
    """Verify if there are any unknown volumes in the cluster.
1502

1503
    The .os, .swap and backup volumes are ignored. All other volumes are
1504
    reported as unknown.
1505

1506
    @type reserved: L{ganeti.utils.FieldSet}
1507
    @param reserved: a FieldSet of reserved volume names
1508

1509
    """
1510
    for node, n_img in node_image.items():
1511
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1512
        # skip non-healthy nodes
1513
        continue
1514
      for volume in n_img.volumes:
1515
        test = ((node not in node_vol_should or
1516
                volume not in node_vol_should[node]) and
1517
                not reserved.Matches(volume))
1518
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1519
                      "volume %s is unknown", volume)
1520

    
1521
  def _VerifyOrphanInstances(self, instancelist, node_image):
1522
    """Verify the list of running instances.
1523

1524
    This checks what instances are running but unknown to the cluster.
1525

1526
    """
1527
    for node, n_img in node_image.items():
1528
      for o_inst in n_img.instances:
1529
        test = o_inst not in instancelist
1530
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1531
                      "instance %s on node %s should not exist", o_inst, node)
1532

    
1533
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1534
    """Verify N+1 Memory Resilience.
1535

1536
    Check that if one single node dies we can still start all the
1537
    instances it was primary for.
1538

1539
    """
1540
    for node, n_img in node_image.items():
1541
      # This code checks that every node which is now listed as
1542
      # secondary has enough memory to host all instances it is
1543
      # supposed to should a single other node in the cluster fail.
1544
      # FIXME: not ready for failover to an arbitrary node
1545
      # FIXME: does not support file-backed instances
1546
      # WARNING: we currently take into account down instances as well
1547
      # as up ones, considering that even if they're down someone
1548
      # might want to start them even in the event of a node failure.
1549
      for prinode, instances in n_img.sbp.items():
1550
        needed_mem = 0
1551
        for instance in instances:
1552
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1553
          if bep[constants.BE_AUTO_BALANCE]:
1554
            needed_mem += bep[constants.BE_MEMORY]
1555
        test = n_img.mfree < needed_mem
1556
        self._ErrorIf(test, self.ENODEN1, node,
1557
                      "not enough memory on to accommodate"
1558
                      " failovers should peer node %s fail", prinode)
1559

    
1560
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1561
                       master_files):
1562
    """Verifies and computes the node required file checksums.
1563

1564
    @type ninfo: L{objects.Node}
1565
    @param ninfo: the node to check
1566
    @param nresult: the remote results for the node
1567
    @param file_list: required list of files
1568
    @param local_cksum: dictionary of local files and their checksums
1569
    @param master_files: list of files that only masters should have
1570

1571
    """
1572
    node = ninfo.name
1573
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1574

    
1575
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1576
    test = not isinstance(remote_cksum, dict)
1577
    _ErrorIf(test, self.ENODEFILECHECK, node,
1578
             "node hasn't returned file checksum data")
1579
    if test:
1580
      return
1581

    
1582
    for file_name in file_list:
1583
      node_is_mc = ninfo.master_candidate
1584
      must_have = (file_name not in master_files) or node_is_mc
1585
      # missing
1586
      test1 = file_name not in remote_cksum
1587
      # invalid checksum
1588
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1589
      # existing and good
1590
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1591
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1592
               "file '%s' missing", file_name)
1593
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1594
               "file '%s' has wrong checksum", file_name)
1595
      # not candidate and this is not a must-have file
1596
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1597
               "file '%s' should not exist on non master"
1598
               " candidates (and the file is outdated)", file_name)
1599
      # all good, except non-master/non-must have combination
1600
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1601
               "file '%s' should not exist"
1602
               " on non master candidates", file_name)
1603

    
1604
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1605
                      drbd_map):
1606
    """Verifies and the node DRBD status.
1607

1608
    @type ninfo: L{objects.Node}
1609
    @param ninfo: the node to check
1610
    @param nresult: the remote results for the node
1611
    @param instanceinfo: the dict of instances
1612
    @param drbd_helper: the configured DRBD usermode helper
1613
    @param drbd_map: the DRBD map as returned by
1614
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1615

1616
    """
1617
    node = ninfo.name
1618
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1619

    
1620
    if drbd_helper:
1621
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1622
      test = (helper_result == None)
1623
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1624
               "no drbd usermode helper returned")
1625
      if helper_result:
1626
        status, payload = helper_result
1627
        test = not status
1628
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1629
                 "drbd usermode helper check unsuccessful: %s", payload)
1630
        test = status and (payload != drbd_helper)
1631
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1632
                 "wrong drbd usermode helper: %s", payload)
1633

    
1634
    # compute the DRBD minors
1635
    node_drbd = {}
1636
    for minor, instance in drbd_map[node].items():
1637
      test = instance not in instanceinfo
1638
      _ErrorIf(test, self.ECLUSTERCFG, None,
1639
               "ghost instance '%s' in temporary DRBD map", instance)
1640
        # ghost instance should not be running, but otherwise we
1641
        # don't give double warnings (both ghost instance and
1642
        # unallocated minor in use)
1643
      if test:
1644
        node_drbd[minor] = (instance, False)
1645
      else:
1646
        instance = instanceinfo[instance]
1647
        node_drbd[minor] = (instance.name, instance.admin_up)
1648

    
1649
    # and now check them
1650
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1651
    test = not isinstance(used_minors, (tuple, list))
1652
    _ErrorIf(test, self.ENODEDRBD, node,
1653
             "cannot parse drbd status file: %s", str(used_minors))
1654
    if test:
1655
      # we cannot check drbd status
1656
      return
1657

    
1658
    for minor, (iname, must_exist) in node_drbd.items():
1659
      test = minor not in used_minors and must_exist
1660
      _ErrorIf(test, self.ENODEDRBD, node,
1661
               "drbd minor %d of instance %s is not active", minor, iname)
1662
    for minor in used_minors:
1663
      test = minor not in node_drbd
1664
      _ErrorIf(test, self.ENODEDRBD, node,
1665
               "unallocated drbd minor %d is in use", minor)
1666

    
1667
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1668
    """Builds the node OS structures.
1669

1670
    @type ninfo: L{objects.Node}
1671
    @param ninfo: the node to check
1672
    @param nresult: the remote results for the node
1673
    @param nimg: the node image object
1674

1675
    """
1676
    node = ninfo.name
1677
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1678

    
1679
    remote_os = nresult.get(constants.NV_OSLIST, None)
1680
    test = (not isinstance(remote_os, list) or
1681
            not compat.all(isinstance(v, list) and len(v) == 7
1682
                           for v in remote_os))
1683

    
1684
    _ErrorIf(test, self.ENODEOS, node,
1685
             "node hasn't returned valid OS data")
1686

    
1687
    nimg.os_fail = test
1688

    
1689
    if test:
1690
      return
1691

    
1692
    os_dict = {}
1693

    
1694
    for (name, os_path, status, diagnose,
1695
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1696

    
1697
      if name not in os_dict:
1698
        os_dict[name] = []
1699

    
1700
      # parameters is a list of lists instead of list of tuples due to
1701
      # JSON lacking a real tuple type, fix it:
1702
      parameters = [tuple(v) for v in parameters]
1703
      os_dict[name].append((os_path, status, diagnose,
1704
                            set(variants), set(parameters), set(api_ver)))
1705

    
1706
    nimg.oslist = os_dict
1707

    
1708
  def _VerifyNodeOS(self, ninfo, nimg, base):
1709
    """Verifies the node OS list.
1710

1711
    @type ninfo: L{objects.Node}
1712
    @param ninfo: the node to check
1713
    @param nimg: the node image object
1714
    @param base: the 'template' node we match against (e.g. from the master)
1715

1716
    """
1717
    node = ninfo.name
1718
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1719

    
1720
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1721

    
1722
    for os_name, os_data in nimg.oslist.items():
1723
      assert os_data, "Empty OS status for OS %s?!" % os_name
1724
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1725
      _ErrorIf(not f_status, self.ENODEOS, node,
1726
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1727
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1728
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1729
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1730
      # this will catched in backend too
1731
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1732
               and not f_var, self.ENODEOS, node,
1733
               "OS %s with API at least %d does not declare any variant",
1734
               os_name, constants.OS_API_V15)
1735
      # comparisons with the 'base' image
1736
      test = os_name not in base.oslist
1737
      _ErrorIf(test, self.ENODEOS, node,
1738
               "Extra OS %s not present on reference node (%s)",
1739
               os_name, base.name)
1740
      if test:
1741
        continue
1742
      assert base.oslist[os_name], "Base node has empty OS status?"
1743
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1744
      if not b_status:
1745
        # base OS is invalid, skipping
1746
        continue
1747
      for kind, a, b in [("API version", f_api, b_api),
1748
                         ("variants list", f_var, b_var),
1749
                         ("parameters", f_param, b_param)]:
1750
        _ErrorIf(a != b, self.ENODEOS, node,
1751
                 "OS %s %s differs from reference node %s: %s vs. %s",
1752
                 kind, os_name, base.name,
1753
                 utils.CommaJoin(a), utils.CommaJoin(b))
1754

    
1755
    # check any missing OSes
1756
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1757
    _ErrorIf(missing, self.ENODEOS, node,
1758
             "OSes present on reference node %s but missing on this node: %s",
1759
             base.name, utils.CommaJoin(missing))
1760

    
1761
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1762
    """Verifies and updates the node volume data.
1763

1764
    This function will update a L{NodeImage}'s internal structures
1765
    with data from the remote call.
1766

1767
    @type ninfo: L{objects.Node}
1768
    @param ninfo: the node to check
1769
    @param nresult: the remote results for the node
1770
    @param nimg: the node image object
1771
    @param vg_name: the configured VG name
1772

1773
    """
1774
    node = ninfo.name
1775
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1776

    
1777
    nimg.lvm_fail = True
1778
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1779
    if vg_name is None:
1780
      pass
1781
    elif isinstance(lvdata, basestring):
1782
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1783
               utils.SafeEncode(lvdata))
1784
    elif not isinstance(lvdata, dict):
1785
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1786
    else:
1787
      nimg.volumes = lvdata
1788
      nimg.lvm_fail = False
1789

    
1790
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1791
    """Verifies and updates the node instance list.
1792

1793
    If the listing was successful, then updates this node's instance
1794
    list. Otherwise, it marks the RPC call as failed for the instance
1795
    list key.
1796

1797
    @type ninfo: L{objects.Node}
1798
    @param ninfo: the node to check
1799
    @param nresult: the remote results for the node
1800
    @param nimg: the node image object
1801

1802
    """
1803
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1804
    test = not isinstance(idata, list)
1805
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1806
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1807
    if test:
1808
      nimg.hyp_fail = True
1809
    else:
1810
      nimg.instances = idata
1811

    
1812
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1813
    """Verifies and computes a node information map
1814

1815
    @type ninfo: L{objects.Node}
1816
    @param ninfo: the node to check
1817
    @param nresult: the remote results for the node
1818
    @param nimg: the node image object
1819
    @param vg_name: the configured VG name
1820

1821
    """
1822
    node = ninfo.name
1823
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1824

    
1825
    # try to read free memory (from the hypervisor)
1826
    hv_info = nresult.get(constants.NV_HVINFO, None)
1827
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1828
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1829
    if not test:
1830
      try:
1831
        nimg.mfree = int(hv_info["memory_free"])
1832
      except (ValueError, TypeError):
1833
        _ErrorIf(True, self.ENODERPC, node,
1834
                 "node returned invalid nodeinfo, check hypervisor")
1835

    
1836
    # FIXME: devise a free space model for file based instances as well
1837
    if vg_name is not None:
1838
      test = (constants.NV_VGLIST not in nresult or
1839
              vg_name not in nresult[constants.NV_VGLIST])
1840
      _ErrorIf(test, self.ENODELVM, node,
1841
               "node didn't return data for the volume group '%s'"
1842
               " - it is either missing or broken", vg_name)
1843
      if not test:
1844
        try:
1845
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1846
        except (ValueError, TypeError):
1847
          _ErrorIf(True, self.ENODERPC, node,
1848
                   "node returned invalid LVM info, check LVM status")
1849

    
1850
  def BuildHooksEnv(self):
1851
    """Build hooks env.
1852

1853
    Cluster-Verify hooks just ran in the post phase and their failure makes
1854
    the output be logged in the verify output and the verification to fail.
1855

1856
    """
1857
    all_nodes = self.cfg.GetNodeList()
1858
    env = {
1859
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1860
      }
1861
    for node in self.cfg.GetAllNodesInfo().values():
1862
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1863

    
1864
    return env, [], all_nodes
1865

    
1866
  def Exec(self, feedback_fn):
1867
    """Verify integrity of cluster, performing various test on nodes.
1868

1869
    """
1870
    self.bad = False
1871
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1872
    verbose = self.op.verbose
1873
    self._feedback_fn = feedback_fn
1874
    feedback_fn("* Verifying global settings")
1875
    for msg in self.cfg.VerifyConfig():
1876
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1877

    
1878
    # Check the cluster certificates
1879
    for cert_filename in constants.ALL_CERT_FILES:
1880
      (errcode, msg) = _VerifyCertificate(cert_filename)
1881
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1882

    
1883
    vg_name = self.cfg.GetVGName()
1884
    drbd_helper = self.cfg.GetDRBDHelper()
1885
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1886
    cluster = self.cfg.GetClusterInfo()
1887
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1888
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1889
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1890
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1891
                        for iname in instancelist)
1892
    i_non_redundant = [] # Non redundant instances
1893
    i_non_a_balanced = [] # Non auto-balanced instances
1894
    n_offline = 0 # Count of offline nodes
1895
    n_drained = 0 # Count of nodes being drained
1896
    node_vol_should = {}
1897

    
1898
    # FIXME: verify OS list
1899
    # do local checksums
1900
    master_files = [constants.CLUSTER_CONF_FILE]
1901
    master_node = self.master_node = self.cfg.GetMasterNode()
1902
    master_ip = self.cfg.GetMasterIP()
1903

    
1904
    file_names = ssconf.SimpleStore().GetFileList()
1905
    file_names.extend(constants.ALL_CERT_FILES)
1906
    file_names.extend(master_files)
1907
    if cluster.modify_etc_hosts:
1908
      file_names.append(constants.ETC_HOSTS)
1909

    
1910
    local_checksums = utils.FingerprintFiles(file_names)
1911

    
1912
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1913
    node_verify_param = {
1914
      constants.NV_FILELIST: file_names,
1915
      constants.NV_NODELIST: [node.name for node in nodeinfo
1916
                              if not node.offline],
1917
      constants.NV_HYPERVISOR: hypervisors,
1918
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1919
                                  node.secondary_ip) for node in nodeinfo
1920
                                 if not node.offline],
1921
      constants.NV_INSTANCELIST: hypervisors,
1922
      constants.NV_VERSION: None,
1923
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1924
      constants.NV_NODESETUP: None,
1925
      constants.NV_TIME: None,
1926
      constants.NV_MASTERIP: (master_node, master_ip),
1927
      constants.NV_OSLIST: None,
1928
      }
1929

    
1930
    if vg_name is not None:
1931
      node_verify_param[constants.NV_VGLIST] = None
1932
      node_verify_param[constants.NV_LVLIST] = vg_name
1933
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1934
      node_verify_param[constants.NV_DRBDLIST] = None
1935

    
1936
    if drbd_helper:
1937
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
1938

    
1939
    # Build our expected cluster state
1940
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
1941
                                                 name=node.name))
1942
                      for node in nodeinfo)
1943

    
1944
    for instance in instancelist:
1945
      inst_config = instanceinfo[instance]
1946

    
1947
      for nname in inst_config.all_nodes:
1948
        if nname not in node_image:
1949
          # ghost node
1950
          gnode = self.NodeImage(name=nname)
1951
          gnode.ghost = True
1952
          node_image[nname] = gnode
1953

    
1954
      inst_config.MapLVsByNode(node_vol_should)
1955

    
1956
      pnode = inst_config.primary_node
1957
      node_image[pnode].pinst.append(instance)
1958

    
1959
      for snode in inst_config.secondary_nodes:
1960
        nimg = node_image[snode]
1961
        nimg.sinst.append(instance)
1962
        if pnode not in nimg.sbp:
1963
          nimg.sbp[pnode] = []
1964
        nimg.sbp[pnode].append(instance)
1965

    
1966
    # At this point, we have the in-memory data structures complete,
1967
    # except for the runtime information, which we'll gather next
1968

    
1969
    # Due to the way our RPC system works, exact response times cannot be
1970
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1971
    # time before and after executing the request, we can at least have a time
1972
    # window.
1973
    nvinfo_starttime = time.time()
1974
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1975
                                           self.cfg.GetClusterName())
1976
    nvinfo_endtime = time.time()
1977

    
1978
    all_drbd_map = self.cfg.ComputeDRBDMap()
1979

    
1980
    feedback_fn("* Verifying node status")
1981

    
1982
    refos_img = None
1983

    
1984
    for node_i in nodeinfo:
1985
      node = node_i.name
1986
      nimg = node_image[node]
1987

    
1988
      if node_i.offline:
1989
        if verbose:
1990
          feedback_fn("* Skipping offline node %s" % (node,))
1991
        n_offline += 1
1992
        continue
1993

    
1994
      if node == master_node:
1995
        ntype = "master"
1996
      elif node_i.master_candidate:
1997
        ntype = "master candidate"
1998
      elif node_i.drained:
1999
        ntype = "drained"
2000
        n_drained += 1
2001
      else:
2002
        ntype = "regular"
2003
      if verbose:
2004
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2005

    
2006
      msg = all_nvinfo[node].fail_msg
2007
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2008
      if msg:
2009
        nimg.rpc_fail = True
2010
        continue
2011

    
2012
      nresult = all_nvinfo[node].payload
2013

    
2014
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2015
      self._VerifyNodeNetwork(node_i, nresult)
2016
      self._VerifyNodeLVM(node_i, nresult, vg_name)
2017
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2018
                            master_files)
2019
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2020
                           all_drbd_map)
2021
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2022

    
2023
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2024
      self._UpdateNodeInstances(node_i, nresult, nimg)
2025
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2026
      self._UpdateNodeOS(node_i, nresult, nimg)
2027
      if not nimg.os_fail:
2028
        if refos_img is None:
2029
          refos_img = nimg
2030
        self._VerifyNodeOS(node_i, nimg, refos_img)
2031

    
2032
    feedback_fn("* Verifying instance status")
2033
    for instance in instancelist:
2034
      if verbose:
2035
        feedback_fn("* Verifying instance %s" % instance)
2036
      inst_config = instanceinfo[instance]
2037
      self._VerifyInstance(instance, inst_config, node_image)
2038
      inst_nodes_offline = []
2039

    
2040
      pnode = inst_config.primary_node
2041
      pnode_img = node_image[pnode]
2042
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2043
               self.ENODERPC, pnode, "instance %s, connection to"
2044
               " primary node failed", instance)
2045

    
2046
      if pnode_img.offline:
2047
        inst_nodes_offline.append(pnode)
2048

    
2049
      # If the instance is non-redundant we cannot survive losing its primary
2050
      # node, so we are not N+1 compliant. On the other hand we have no disk
2051
      # templates with more than one secondary so that situation is not well
2052
      # supported either.
2053
      # FIXME: does not support file-backed instances
2054
      if not inst_config.secondary_nodes:
2055
        i_non_redundant.append(instance)
2056
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2057
               instance, "instance has multiple secondary nodes: %s",
2058
               utils.CommaJoin(inst_config.secondary_nodes),
2059
               code=self.ETYPE_WARNING)
2060

    
2061
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2062
        i_non_a_balanced.append(instance)
2063

    
2064
      for snode in inst_config.secondary_nodes:
2065
        s_img = node_image[snode]
2066
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2067
                 "instance %s, connection to secondary node failed", instance)
2068

    
2069
        if s_img.offline:
2070
          inst_nodes_offline.append(snode)
2071

    
2072
      # warn that the instance lives on offline nodes
2073
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2074
               "instance lives on offline node(s) %s",
2075
               utils.CommaJoin(inst_nodes_offline))
2076
      # ... or ghost nodes
2077
      for node in inst_config.all_nodes:
2078
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2079
                 "instance lives on ghost node %s", node)
2080

    
2081
    feedback_fn("* Verifying orphan volumes")
2082
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2083
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2084

    
2085
    feedback_fn("* Verifying orphan instances")
2086
    self._VerifyOrphanInstances(instancelist, node_image)
2087

    
2088
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2089
      feedback_fn("* Verifying N+1 Memory redundancy")
2090
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2091

    
2092
    feedback_fn("* Other Notes")
2093
    if i_non_redundant:
2094
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2095
                  % len(i_non_redundant))
2096

    
2097
    if i_non_a_balanced:
2098
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2099
                  % len(i_non_a_balanced))
2100

    
2101
    if n_offline:
2102
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2103

    
2104
    if n_drained:
2105
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2106

    
2107
    return not self.bad
2108

    
2109
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2110
    """Analyze the post-hooks' result
2111

2112
    This method analyses the hook result, handles it, and sends some
2113
    nicely-formatted feedback back to the user.
2114

2115
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2116
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2117
    @param hooks_results: the results of the multi-node hooks rpc call
2118
    @param feedback_fn: function used send feedback back to the caller
2119
    @param lu_result: previous Exec result
2120
    @return: the new Exec result, based on the previous result
2121
        and hook results
2122

2123
    """
2124
    # We only really run POST phase hooks, and are only interested in
2125
    # their results
2126
    if phase == constants.HOOKS_PHASE_POST:
2127
      # Used to change hooks' output to proper indentation
2128
      indent_re = re.compile('^', re.M)
2129
      feedback_fn("* Hooks Results")
2130
      assert hooks_results, "invalid result from hooks"
2131

    
2132
      for node_name in hooks_results:
2133
        res = hooks_results[node_name]
2134
        msg = res.fail_msg
2135
        test = msg and not res.offline
2136
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2137
                      "Communication failure in hooks execution: %s", msg)
2138
        if res.offline or msg:
2139
          # No need to investigate payload if node is offline or gave an error.
2140
          # override manually lu_result here as _ErrorIf only
2141
          # overrides self.bad
2142
          lu_result = 1
2143
          continue
2144
        for script, hkr, output in res.payload:
2145
          test = hkr == constants.HKR_FAIL
2146
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2147
                        "Script %s failed, output:", script)
2148
          if test:
2149
            output = indent_re.sub('      ', output)
2150
            feedback_fn("%s" % output)
2151
            lu_result = 0
2152

    
2153
      return lu_result
2154

    
2155

    
2156
class LUVerifyDisks(NoHooksLU):
2157
  """Verifies the cluster disks status.
2158

2159
  """
2160
  REQ_BGL = False
2161

    
2162
  def ExpandNames(self):
2163
    self.needed_locks = {
2164
      locking.LEVEL_NODE: locking.ALL_SET,
2165
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2166
    }
2167
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2168

    
2169
  def Exec(self, feedback_fn):
2170
    """Verify integrity of cluster disks.
2171

2172
    @rtype: tuple of three items
2173
    @return: a tuple of (dict of node-to-node_error, list of instances
2174
        which need activate-disks, dict of instance: (node, volume) for
2175
        missing volumes
2176

2177
    """
2178
    result = res_nodes, res_instances, res_missing = {}, [], {}
2179

    
2180
    vg_name = self.cfg.GetVGName()
2181
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2182
    instances = [self.cfg.GetInstanceInfo(name)
2183
                 for name in self.cfg.GetInstanceList()]
2184

    
2185
    nv_dict = {}
2186
    for inst in instances:
2187
      inst_lvs = {}
2188
      if (not inst.admin_up or
2189
          inst.disk_template not in constants.DTS_NET_MIRROR):
2190
        continue
2191
      inst.MapLVsByNode(inst_lvs)
2192
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2193
      for node, vol_list in inst_lvs.iteritems():
2194
        for vol in vol_list:
2195
          nv_dict[(node, vol)] = inst
2196

    
2197
    if not nv_dict:
2198
      return result
2199

    
2200
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2201

    
2202
    for node in nodes:
2203
      # node_volume
2204
      node_res = node_lvs[node]
2205
      if node_res.offline:
2206
        continue
2207
      msg = node_res.fail_msg
2208
      if msg:
2209
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2210
        res_nodes[node] = msg
2211
        continue
2212

    
2213
      lvs = node_res.payload
2214
      for lv_name, (_, _, lv_online) in lvs.items():
2215
        inst = nv_dict.pop((node, lv_name), None)
2216
        if (not lv_online and inst is not None
2217
            and inst.name not in res_instances):
2218
          res_instances.append(inst.name)
2219

    
2220
    # any leftover items in nv_dict are missing LVs, let's arrange the
2221
    # data better
2222
    for key, inst in nv_dict.iteritems():
2223
      if inst.name not in res_missing:
2224
        res_missing[inst.name] = []
2225
      res_missing[inst.name].append(key)
2226

    
2227
    return result
2228

    
2229

    
2230
class LURepairDiskSizes(NoHooksLU):
2231
  """Verifies the cluster disks sizes.
2232

2233
  """
2234
  _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2235
  REQ_BGL = False
2236

    
2237
  def ExpandNames(self):
2238
    if self.op.instances:
2239
      self.wanted_names = []
2240
      for name in self.op.instances:
2241
        full_name = _ExpandInstanceName(self.cfg, name)
2242
        self.wanted_names.append(full_name)
2243
      self.needed_locks = {
2244
        locking.LEVEL_NODE: [],
2245
        locking.LEVEL_INSTANCE: self.wanted_names,
2246
        }
2247
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2248
    else:
2249
      self.wanted_names = None
2250
      self.needed_locks = {
2251
        locking.LEVEL_NODE: locking.ALL_SET,
2252
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2253
        }
2254
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2255

    
2256
  def DeclareLocks(self, level):
2257
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2258
      self._LockInstancesNodes(primary_only=True)
2259

    
2260
  def CheckPrereq(self):
2261
    """Check prerequisites.
2262

2263
    This only checks the optional instance list against the existing names.
2264

2265
    """
2266
    if self.wanted_names is None:
2267
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2268

    
2269
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2270
                             in self.wanted_names]
2271

    
2272
  def _EnsureChildSizes(self, disk):
2273
    """Ensure children of the disk have the needed disk size.
2274

2275
    This is valid mainly for DRBD8 and fixes an issue where the
2276
    children have smaller disk size.
2277

2278
    @param disk: an L{ganeti.objects.Disk} object
2279

2280
    """
2281
    if disk.dev_type == constants.LD_DRBD8:
2282
      assert disk.children, "Empty children for DRBD8?"
2283
      fchild = disk.children[0]
2284
      mismatch = fchild.size < disk.size
2285
      if mismatch:
2286
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2287
                     fchild.size, disk.size)
2288
        fchild.size = disk.size
2289

    
2290
      # and we recurse on this child only, not on the metadev
2291
      return self._EnsureChildSizes(fchild) or mismatch
2292
    else:
2293
      return False
2294

    
2295
  def Exec(self, feedback_fn):
2296
    """Verify the size of cluster disks.
2297

2298
    """
2299
    # TODO: check child disks too
2300
    # TODO: check differences in size between primary/secondary nodes
2301
    per_node_disks = {}
2302
    for instance in self.wanted_instances:
2303
      pnode = instance.primary_node
2304
      if pnode not in per_node_disks:
2305
        per_node_disks[pnode] = []
2306
      for idx, disk in enumerate(instance.disks):
2307
        per_node_disks[pnode].append((instance, idx, disk))
2308

    
2309
    changed = []
2310
    for node, dskl in per_node_disks.items():
2311
      newl = [v[2].Copy() for v in dskl]
2312
      for dsk in newl:
2313
        self.cfg.SetDiskID(dsk, node)
2314
      result = self.rpc.call_blockdev_getsizes(node, newl)
2315
      if result.fail_msg:
2316
        self.LogWarning("Failure in blockdev_getsizes call to node"
2317
                        " %s, ignoring", node)
2318
        continue
2319
      if len(result.data) != len(dskl):
2320
        self.LogWarning("Invalid result from node %s, ignoring node results",
2321
                        node)
2322
        continue
2323
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2324
        if size is None:
2325
          self.LogWarning("Disk %d of instance %s did not return size"
2326
                          " information, ignoring", idx, instance.name)
2327
          continue
2328
        if not isinstance(size, (int, long)):
2329
          self.LogWarning("Disk %d of instance %s did not return valid"
2330
                          " size information, ignoring", idx, instance.name)
2331
          continue
2332
        size = size >> 20
2333
        if size != disk.size:
2334
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2335
                       " correcting: recorded %d, actual %d", idx,
2336
                       instance.name, disk.size, size)
2337
          disk.size = size
2338
          self.cfg.Update(instance, feedback_fn)
2339
          changed.append((instance.name, idx, size))
2340
        if self._EnsureChildSizes(disk):
2341
          self.cfg.Update(instance, feedback_fn)
2342
          changed.append((instance.name, idx, disk.size))
2343
    return changed
2344

    
2345

    
2346
class LURenameCluster(LogicalUnit):
2347
  """Rename the cluster.
2348

2349
  """
2350
  HPATH = "cluster-rename"
2351
  HTYPE = constants.HTYPE_CLUSTER
2352
  _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)]
2353

    
2354
  def BuildHooksEnv(self):
2355
    """Build hooks env.
2356

2357
    """
2358
    env = {
2359
      "OP_TARGET": self.cfg.GetClusterName(),
2360
      "NEW_NAME": self.op.name,
2361
      }
2362
    mn = self.cfg.GetMasterNode()
2363
    all_nodes = self.cfg.GetNodeList()
2364
    return env, [mn], all_nodes
2365

    
2366
  def CheckPrereq(self):
2367
    """Verify that the passed name is a valid one.
2368

2369
    """
2370
    hostname = netutils.GetHostname(name=self.op.name,
2371
                                    family=self.cfg.GetPrimaryIPFamily())
2372

    
2373
    new_name = hostname.name
2374
    self.ip = new_ip = hostname.ip
2375
    old_name = self.cfg.GetClusterName()
2376
    old_ip = self.cfg.GetMasterIP()
2377
    if new_name == old_name and new_ip == old_ip:
2378
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2379
                                 " cluster has changed",
2380
                                 errors.ECODE_INVAL)
2381
    if new_ip != old_ip:
2382
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2383
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2384
                                   " reachable on the network" %
2385
                                   new_ip, errors.ECODE_NOTUNIQUE)
2386

    
2387
    self.op.name = new_name
2388

    
2389
  def Exec(self, feedback_fn):
2390
    """Rename the cluster.
2391

2392
    """
2393
    clustername = self.op.name
2394
    ip = self.ip
2395

    
2396
    # shutdown the master IP
2397
    master = self.cfg.GetMasterNode()
2398
    result = self.rpc.call_node_stop_master(master, False)
2399
    result.Raise("Could not disable the master role")
2400

    
2401
    try:
2402
      cluster = self.cfg.GetClusterInfo()
2403
      cluster.cluster_name = clustername
2404
      cluster.master_ip = ip
2405
      self.cfg.Update(cluster, feedback_fn)
2406

    
2407
      # update the known hosts file
2408
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2409
      node_list = self.cfg.GetNodeList()
2410
      try:
2411
        node_list.remove(master)
2412
      except ValueError:
2413
        pass
2414
      result = self.rpc.call_upload_file(node_list,
2415
                                         constants.SSH_KNOWN_HOSTS_FILE)
2416
      for to_node, to_result in result.iteritems():
2417
        msg = to_result.fail_msg
2418
        if msg:
2419
          msg = ("Copy of file %s to node %s failed: %s" %
2420
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2421
          self.proc.LogWarning(msg)
2422

    
2423
    finally:
2424
      result = self.rpc.call_node_start_master(master, False, False)
2425
      msg = result.fail_msg
2426
      if msg:
2427
        self.LogWarning("Could not re-enable the master role on"
2428
                        " the master, please restart manually: %s", msg)
2429

    
2430
    return clustername
2431

    
2432

    
2433
class LUSetClusterParams(LogicalUnit):
2434
  """Change the parameters of the cluster.
2435

2436
  """
2437
  HPATH = "cluster-modify"
2438
  HTYPE = constants.HTYPE_CLUSTER
2439
  _OP_PARAMS = [
2440
    ("vg_name", None, ht.TMaybeString),
2441
    ("enabled_hypervisors", None,
2442
     ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2443
            ht.TNone)),
2444
    ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2445
                              ht.TNone)),
2446
    ("beparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2447
                              ht.TNone)),
2448
    ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2449
                            ht.TNone)),
2450
    ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2451
                              ht.TNone)),
2452
    ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2453
    ("uid_pool", None, ht.NoType),
2454
    ("add_uids", None, ht.NoType),
2455
    ("remove_uids", None, ht.NoType),
2456
    ("maintain_node_health", None, ht.TMaybeBool),
2457
    ("prealloc_wipe_disks", None, ht.TMaybeBool),
2458
    ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2459
    ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2460
    ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)),
2461
    ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2462
    ("hidden_os", None, ht.TOr(ht.TListOf(\
2463
          ht.TAnd(ht.TList,
2464
                ht.TIsLength(2),
2465
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2466
          ht.TNone)),
2467
    ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2468
          ht.TAnd(ht.TList,
2469
                ht.TIsLength(2),
2470
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2471
          ht.TNone)),
2472
    ]
2473
  REQ_BGL = False
2474

    
2475
  def CheckArguments(self):
2476
    """Check parameters
2477

2478
    """
2479
    if self.op.uid_pool:
2480
      uidpool.CheckUidPool(self.op.uid_pool)
2481

    
2482
    if self.op.add_uids:
2483
      uidpool.CheckUidPool(self.op.add_uids)
2484

    
2485
    if self.op.remove_uids:
2486
      uidpool.CheckUidPool(self.op.remove_uids)
2487

    
2488
  def ExpandNames(self):
2489
    # FIXME: in the future maybe other cluster params won't require checking on
2490
    # all nodes to be modified.
2491
    self.needed_locks = {
2492
      locking.LEVEL_NODE: locking.ALL_SET,
2493
    }
2494
    self.share_locks[locking.LEVEL_NODE] = 1
2495

    
2496
  def BuildHooksEnv(self):
2497
    """Build hooks env.
2498

2499
    """
2500
    env = {
2501
      "OP_TARGET": self.cfg.GetClusterName(),
2502
      "NEW_VG_NAME": self.op.vg_name,
2503
      }
2504
    mn = self.cfg.GetMasterNode()
2505
    return env, [mn], [mn]
2506

    
2507
  def CheckPrereq(self):
2508
    """Check prerequisites.
2509

2510
    This checks whether the given params don't conflict and
2511
    if the given volume group is valid.
2512

2513
    """
2514
    if self.op.vg_name is not None and not self.op.vg_name:
2515
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2516
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2517
                                   " instances exist", errors.ECODE_INVAL)
2518

    
2519
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2520
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2521
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2522
                                   " drbd-based instances exist",
2523
                                   errors.ECODE_INVAL)
2524

    
2525
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2526

    
2527
    # if vg_name not None, checks given volume group on all nodes
2528
    if self.op.vg_name:
2529
      vglist = self.rpc.call_vg_list(node_list)
2530
      for node in node_list:
2531
        msg = vglist[node].fail_msg
2532
        if msg:
2533
          # ignoring down node
2534
          self.LogWarning("Error while gathering data on node %s"
2535
                          " (ignoring node): %s", node, msg)
2536
          continue
2537
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2538
                                              self.op.vg_name,
2539
                                              constants.MIN_VG_SIZE)
2540
        if vgstatus:
2541
          raise errors.OpPrereqError("Error on node '%s': %s" %
2542
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2543

    
2544
    if self.op.drbd_helper:
2545
      # checks given drbd helper on all nodes
2546
      helpers = self.rpc.call_drbd_helper(node_list)
2547
      for node in node_list:
2548
        ninfo = self.cfg.GetNodeInfo(node)
2549
        if ninfo.offline:
2550
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2551
          continue
2552
        msg = helpers[node].fail_msg
2553
        if msg:
2554
          raise errors.OpPrereqError("Error checking drbd helper on node"
2555
                                     " '%s': %s" % (node, msg),
2556
                                     errors.ECODE_ENVIRON)
2557
        node_helper = helpers[node].payload
2558
        if node_helper != self.op.drbd_helper:
2559
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2560
                                     (node, node_helper), errors.ECODE_ENVIRON)
2561

    
2562
    self.cluster = cluster = self.cfg.GetClusterInfo()
2563
    # validate params changes
2564
    if self.op.beparams:
2565
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2566
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2567

    
2568
    if self.op.nicparams:
2569
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2570
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2571
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2572
      nic_errors = []
2573

    
2574
      # check all instances for consistency
2575
      for instance in self.cfg.GetAllInstancesInfo().values():
2576
        for nic_idx, nic in enumerate(instance.nics):
2577
          params_copy = copy.deepcopy(nic.nicparams)
2578
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2579

    
2580
          # check parameter syntax
2581
          try:
2582
            objects.NIC.CheckParameterSyntax(params_filled)
2583
          except errors.ConfigurationError, err:
2584
            nic_errors.append("Instance %s, nic/%d: %s" %
2585
                              (instance.name, nic_idx, err))
2586

    
2587
          # if we're moving instances to routed, check that they have an ip
2588
          target_mode = params_filled[constants.NIC_MODE]
2589
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2590
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2591
                              (instance.name, nic_idx))
2592
      if nic_errors:
2593
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2594
                                   "\n".join(nic_errors))
2595

    
2596
    # hypervisor list/parameters
2597
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2598
    if self.op.hvparams:
2599
      for hv_name, hv_dict in self.op.hvparams.items():
2600
        if hv_name not in self.new_hvparams:
2601
          self.new_hvparams[hv_name] = hv_dict
2602
        else:
2603
          self.new_hvparams[hv_name].update(hv_dict)
2604

    
2605
    # os hypervisor parameters
2606
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2607
    if self.op.os_hvp:
2608
      for os_name, hvs in self.op.os_hvp.items():
2609
        if os_name not in self.new_os_hvp:
2610
          self.new_os_hvp[os_name] = hvs
2611
        else:
2612
          for hv_name, hv_dict in hvs.items():
2613
            if hv_name not in self.new_os_hvp[os_name]:
2614
              self.new_os_hvp[os_name][hv_name] = hv_dict
2615
            else:
2616
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2617

    
2618
    # os parameters
2619
    self.new_osp = objects.FillDict(cluster.osparams, {})
2620
    if self.op.osparams:
2621
      for os_name, osp in self.op.osparams.items():
2622
        if os_name not in self.new_osp:
2623
          self.new_osp[os_name] = {}
2624

    
2625
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2626
                                                  use_none=True)
2627

    
2628
        if not self.new_osp[os_name]:
2629
          # we removed all parameters
2630
          del self.new_osp[os_name]
2631
        else:
2632
          # check the parameter validity (remote check)
2633
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2634
                         os_name, self.new_osp[os_name])
2635

    
2636
    # changes to the hypervisor list
2637
    if self.op.enabled_hypervisors is not None:
2638
      self.hv_list = self.op.enabled_hypervisors
2639
      for hv in self.hv_list:
2640
        # if the hypervisor doesn't already exist in the cluster
2641
        # hvparams, we initialize it to empty, and then (in both
2642
        # cases) we make sure to fill the defaults, as we might not
2643
        # have a complete defaults list if the hypervisor wasn't
2644
        # enabled before
2645
        if hv not in new_hvp:
2646
          new_hvp[hv] = {}
2647
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2648
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2649
    else:
2650
      self.hv_list = cluster.enabled_hypervisors
2651

    
2652
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2653
      # either the enabled list has changed, or the parameters have, validate
2654
      for hv_name, hv_params in self.new_hvparams.items():
2655
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2656
            (self.op.enabled_hypervisors and
2657
             hv_name in self.op.enabled_hypervisors)):
2658
          # either this is a new hypervisor, or its parameters have changed
2659
          hv_class = hypervisor.GetHypervisor(hv_name)
2660
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2661
          hv_class.CheckParameterSyntax(hv_params)
2662
          _CheckHVParams(self, node_list, hv_name, hv_params)
2663

    
2664
    if self.op.os_hvp:
2665
      # no need to check any newly-enabled hypervisors, since the
2666
      # defaults have already been checked in the above code-block
2667
      for os_name, os_hvp in self.new_os_hvp.items():
2668
        for hv_name, hv_params in os_hvp.items():
2669
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2670
          # we need to fill in the new os_hvp on top of the actual hv_p
2671
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2672
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2673
          hv_class = hypervisor.GetHypervisor(hv_name)
2674
          hv_class.CheckParameterSyntax(new_osp)
2675
          _CheckHVParams(self, node_list, hv_name, new_osp)
2676

    
2677
    if self.op.default_iallocator:
2678
      alloc_script = utils.FindFile(self.op.default_iallocator,
2679
                                    constants.IALLOCATOR_SEARCH_PATH,
2680
                                    os.path.isfile)
2681
      if alloc_script is None:
2682
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2683
                                   " specified" % self.op.default_iallocator,
2684
                                   errors.ECODE_INVAL)
2685

    
2686
  def Exec(self, feedback_fn):
2687
    """Change the parameters of the cluster.
2688

2689
    """
2690
    if self.op.vg_name is not None:
2691
      new_volume = self.op.vg_name
2692
      if not new_volume:
2693
        new_volume = None
2694
      if new_volume != self.cfg.GetVGName():
2695
        self.cfg.SetVGName(new_volume)
2696
      else:
2697
        feedback_fn("Cluster LVM configuration already in desired"
2698
                    " state, not changing")
2699
    if self.op.drbd_helper is not None:
2700
      new_helper = self.op.drbd_helper
2701
      if not new_helper:
2702
        new_helper = None
2703
      if new_helper != self.cfg.GetDRBDHelper():
2704
        self.cfg.SetDRBDHelper(new_helper)
2705
      else:
2706
        feedback_fn("Cluster DRBD helper already in desired state,"
2707
                    " not changing")
2708
    if self.op.hvparams:
2709
      self.cluster.hvparams = self.new_hvparams
2710
    if self.op.os_hvp:
2711
      self.cluster.os_hvp = self.new_os_hvp
2712
    if self.op.enabled_hypervisors is not None:
2713
      self.cluster.hvparams = self.new_hvparams
2714
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2715
    if self.op.beparams:
2716
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2717
    if self.op.nicparams:
2718
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2719
    if self.op.osparams:
2720
      self.cluster.osparams = self.new_osp
2721

    
2722
    if self.op.candidate_pool_size is not None:
2723
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2724
      # we need to update the pool size here, otherwise the save will fail
2725
      _AdjustCandidatePool(self, [])
2726

    
2727
    if self.op.maintain_node_health is not None:
2728
      self.cluster.maintain_node_health = self.op.maintain_node_health
2729

    
2730
    if self.op.prealloc_wipe_disks is not None:
2731
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2732

    
2733
    if self.op.add_uids is not None:
2734
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2735

    
2736
    if self.op.remove_uids is not None:
2737
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2738

    
2739
    if self.op.uid_pool is not None:
2740
      self.cluster.uid_pool = self.op.uid_pool
2741

    
2742
    if self.op.default_iallocator is not None:
2743
      self.cluster.default_iallocator = self.op.default_iallocator
2744

    
2745
    if self.op.reserved_lvs is not None:
2746
      self.cluster.reserved_lvs = self.op.reserved_lvs
2747

    
2748
    def helper_os(aname, mods, desc):
2749
      desc += " OS list"
2750
      lst = getattr(self.cluster, aname)
2751
      for key, val in mods:
2752
        if key == constants.DDM_ADD:
2753
          if val in lst:
2754
            feedback_fn("OS %s already in %s, ignoring", val, desc)
2755
          else:
2756
            lst.append(val)
2757
        elif key == constants.DDM_REMOVE:
2758
          if val in lst:
2759
            lst.remove(val)
2760
          else:
2761
            feedback_fn("OS %s not found in %s, ignoring", val, desc)
2762
        else:
2763
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
2764

    
2765
    if self.op.hidden_os:
2766
      helper_os("hidden_os", self.op.hidden_os, "hidden")
2767

    
2768
    if self.op.blacklisted_os:
2769
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2770

    
2771
    self.cfg.Update(self.cluster, feedback_fn)
2772

    
2773

    
2774
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2775
  """Distribute additional files which are part of the cluster configuration.
2776

2777
  ConfigWriter takes care of distributing the config and ssconf files, but
2778
  there are more files which should be distributed to all nodes. This function
2779
  makes sure those are copied.
2780

2781
  @param lu: calling logical unit
2782
  @param additional_nodes: list of nodes not in the config to distribute to
2783

2784
  """
2785
  # 1. Gather target nodes
2786
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2787
  dist_nodes = lu.cfg.GetOnlineNodeList()
2788
  if additional_nodes is not None:
2789
    dist_nodes.extend(additional_nodes)
2790
  if myself.name in dist_nodes:
2791
    dist_nodes.remove(myself.name)
2792

    
2793
  # 2. Gather files to distribute
2794
  dist_files = set([constants.ETC_HOSTS,
2795
                    constants.SSH_KNOWN_HOSTS_FILE,
2796
                    constants.RAPI_CERT_FILE,
2797
                    constants.RAPI_USERS_FILE,
2798
                    constants.CONFD_HMAC_KEY,
2799
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2800
                   ])
2801

    
2802
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2803
  for hv_name in enabled_hypervisors:
2804
    hv_class = hypervisor.GetHypervisor(hv_name)
2805
    dist_files.update(hv_class.GetAncillaryFiles())
2806

    
2807
  # 3. Perform the files upload
2808
  for fname in dist_files:
2809
    if os.path.exists(fname):
2810
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2811
      for to_node, to_result in result.items():
2812
        msg = to_result.fail_msg
2813
        if msg:
2814
          msg = ("Copy of file %s to node %s failed: %s" %
2815
                 (fname, to_node, msg))
2816
          lu.proc.LogWarning(msg)
2817

    
2818

    
2819
class LURedistributeConfig(NoHooksLU):
2820
  """Force the redistribution of cluster configuration.
2821

2822
  This is a very simple LU.
2823

2824
  """
2825
  REQ_BGL = False
2826

    
2827
  def ExpandNames(self):
2828
    self.needed_locks = {
2829
      locking.LEVEL_NODE: locking.ALL_SET,
2830
    }
2831
    self.share_locks[locking.LEVEL_NODE] = 1
2832

    
2833
  def Exec(self, feedback_fn):
2834
    """Redistribute the configuration.
2835

2836
    """
2837
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2838
    _RedistributeAncillaryFiles(self)
2839

    
2840

    
2841
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2842
  """Sleep and poll for an instance's disk to sync.
2843

2844
  """
2845
  if not instance.disks or disks is not None and not disks:
2846
    return True
2847

    
2848
  disks = _ExpandCheckDisks(instance, disks)
2849

    
2850
  if not oneshot:
2851
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2852

    
2853
  node = instance.primary_node
2854

    
2855
  for dev in disks:
2856
    lu.cfg.SetDiskID(dev, node)
2857

    
2858
  # TODO: Convert to utils.Retry
2859

    
2860
  retries = 0
2861
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2862
  while True:
2863
    max_time = 0
2864
    done = True
2865
    cumul_degraded = False
2866
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2867
    msg = rstats.fail_msg
2868
    if msg:
2869
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2870
      retries += 1
2871
      if retries >= 10:
2872
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2873
                                 " aborting." % node)
2874
      time.sleep(6)
2875
      continue
2876
    rstats = rstats.payload
2877
    retries = 0
2878
    for i, mstat in enumerate(rstats):
2879
      if mstat is None:
2880
        lu.LogWarning("Can't compute data for node %s/%s",
2881
                           node, disks[i].iv_name)
2882
        continue
2883

    
2884
      cumul_degraded = (cumul_degraded or
2885
                        (mstat.is_degraded and mstat.sync_percent is None))
2886
      if mstat.sync_percent is not None:
2887
        done = False
2888
        if mstat.estimated_time is not None:
2889
          rem_time = ("%s remaining (estimated)" %
2890
                      utils.FormatSeconds(mstat.estimated_time))
2891
          max_time = mstat.estimated_time
2892
        else:
2893
          rem_time = "no time estimate"
2894
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2895
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
2896

    
2897
    # if we're done but degraded, let's do a few small retries, to
2898
    # make sure we see a stable and not transient situation; therefore
2899
    # we force restart of the loop
2900
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2901
      logging.info("Degraded disks found, %d retries left", degr_retries)
2902
      degr_retries -= 1
2903
      time.sleep(1)
2904
      continue
2905

    
2906
    if done or oneshot:
2907
      break
2908

    
2909
    time.sleep(min(60, max_time))
2910

    
2911
  if done:
2912
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2913
  return not cumul_degraded
2914

    
2915

    
2916
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2917
  """Check that mirrors are not degraded.
2918

2919
  The ldisk parameter, if True, will change the test from the
2920
  is_degraded attribute (which represents overall non-ok status for
2921
  the device(s)) to the ldisk (representing the local storage status).
2922

2923
  """
2924
  lu.cfg.SetDiskID(dev, node)
2925

    
2926
  result = True
2927

    
2928
  if on_primary or dev.AssembleOnSecondary():
2929
    rstats = lu.rpc.call_blockdev_find(node, dev)
2930
    msg = rstats.fail_msg
2931
    if msg:
2932
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2933
      result = False
2934
    elif not rstats.payload:
2935
      lu.LogWarning("Can't find disk on node %s", node)
2936
      result = False
2937
    else:
2938
      if ldisk:
2939
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2940
      else:
2941
        result = result and not rstats.payload.is_degraded
2942

    
2943
  if dev.children:
2944
    for child in dev.children:
2945
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2946

    
2947
  return result
2948

    
2949

    
2950
class LUDiagnoseOS(NoHooksLU):
2951
  """Logical unit for OS diagnose/query.
2952

2953
  """
2954
  _OP_PARAMS = [
2955
    _POutputFields,
2956
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
2957
    ]
2958
  REQ_BGL = False
2959
  _HID = "hidden"
2960
  _BLK = "blacklisted"
2961
  _VLD = "valid"
2962
  _FIELDS_STATIC = utils.FieldSet()
2963
  _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
2964
                                   "parameters", "api_versions", _HID, _BLK)
2965

    
2966
  def CheckArguments(self):
2967
    if self.op.names:
2968
      raise errors.OpPrereqError("Selective OS query not supported",
2969
                                 errors.ECODE_INVAL)
2970

    
2971
    _CheckOutputFields(static=self._FIELDS_STATIC,
2972
                       dynamic=self._FIELDS_DYNAMIC,
2973
                       selected=self.op.output_fields)
2974

    
2975
  def ExpandNames(self):
2976
    # Lock all nodes, in shared mode
2977
    # Temporary removal of locks, should be reverted later
2978
    # TODO: reintroduce locks when they are lighter-weight
2979
    self.needed_locks = {}
2980
    #self.share_locks[locking.LEVEL_NODE] = 1
2981
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2982

    
2983
  @staticmethod
2984
  def _DiagnoseByOS(rlist):
2985
    """Remaps a per-node return list into an a per-os per-node dictionary
2986

2987
    @param rlist: a map with node names as keys and OS objects as values
2988

2989
    @rtype: dict
2990
    @return: a dictionary with osnames as keys and as value another
2991
        map, with nodes as keys and tuples of (path, status, diagnose,
2992
        variants, parameters, api_versions) as values, eg::
2993

2994
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2995
                                     (/srv/..., False, "invalid api")],
2996
                           "node2": [(/srv/..., True, "", [], [])]}
2997
          }
2998

2999
    """
3000
    all_os = {}
3001
    # we build here the list of nodes that didn't fail the RPC (at RPC
3002
    # level), so that nodes with a non-responding node daemon don't
3003
    # make all OSes invalid
3004
    good_nodes = [node_name for node_name in rlist
3005
                  if not rlist[node_name].fail_msg]
3006
    for node_name, nr in rlist.items():
3007
      if nr.fail_msg or not nr.payload:
3008
        continue
3009
      for (name, path, status, diagnose, variants,
3010
           params, api_versions) in nr.payload:
3011
        if name not in all_os:
3012
          # build a list of nodes for this os containing empty lists
3013
          # for each node in node_list
3014
          all_os[name] = {}
3015
          for nname in good_nodes:
3016
            all_os[name][nname] = []
3017
        # convert params from [name, help] to (name, help)
3018
        params = [tuple(v) for v in params]
3019
        all_os[name][node_name].append((path, status, diagnose,
3020
                                        variants, params, api_versions))
3021
    return all_os
3022

    
3023
  def Exec(self, feedback_fn):
3024
    """Compute the list of OSes.
3025

3026
    """
3027
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3028
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3029
    pol = self._DiagnoseByOS(node_data)
3030
    output = []
3031
    cluster = self.cfg.GetClusterInfo()
3032

    
3033
    for os_name in utils.NiceSort(pol.keys()):
3034
      os_data = pol[os_name]
3035
      row = []
3036
      valid = True
3037
      (variants, params, api_versions) = null_state = (set(), set(), set())
3038
      for idx, osl in enumerate(os_data.values()):
3039
        valid = bool(valid and osl and osl[0][1])
3040
        if not valid:
3041
          (variants, params, api_versions) = null_state
3042
          break
3043
        node_variants, node_params, node_api = osl[0][3:6]
3044
        if idx == 0: # first entry
3045
          variants = set(node_variants)
3046
          params = set(node_params)
3047
          api_versions = set(node_api)
3048
        else: # keep consistency
3049
          variants.intersection_update(node_variants)
3050
          params.intersection_update(node_params)
3051
          api_versions.intersection_update(node_api)
3052

    
3053
      is_hid = os_name in cluster.hidden_os
3054
      is_blk = os_name in cluster.blacklisted_os
3055
      if ((self._HID not in self.op.output_fields and is_hid) or
3056
          (self._BLK not in self.op.output_fields and is_blk) or
3057
          (self._VLD not in self.op.output_fields and not valid)):
3058
        continue
3059

    
3060
      for field in self.op.output_fields:
3061
        if field == "name":
3062
          val = os_name
3063
        elif field == self._VLD:
3064
          val = valid
3065
        elif field == "node_status":
3066
          # this is just a copy of the dict
3067
          val = {}
3068
          for node_name, nos_list in os_data.items():
3069
            val[node_name] = nos_list
3070
        elif field == "variants":
3071
          val = utils.NiceSort(list(variants))
3072
        elif field == "parameters":
3073
          val = list(params)
3074
        elif field == "api_versions":
3075
          val = list(api_versions)
3076
        elif field == self._HID:
3077
          val = is_hid
3078
        elif field == self._BLK:
3079
          val = is_blk
3080
        else:
3081
          raise errors.ParameterError(field)
3082
        row.append(val)
3083
      output.append(row)
3084

    
3085
    return output
3086

    
3087

    
3088
class LURemoveNode(LogicalUnit):
3089
  """Logical unit for removing a node.
3090

3091
  """
3092
  HPATH = "node-remove"
3093
  HTYPE = constants.HTYPE_NODE
3094
  _OP_PARAMS = [
3095
    _PNodeName,
3096
    ]
3097

    
3098
  def BuildHooksEnv(self):
3099
    """Build hooks env.
3100

3101
    This doesn't run on the target node in the pre phase as a failed
3102
    node would then be impossible to remove.
3103

3104
    """
3105
    env = {
3106
      "OP_TARGET": self.op.node_name,
3107
      "NODE_NAME": self.op.node_name,
3108
      }
3109
    all_nodes = self.cfg.GetNodeList()
3110
    try:
3111
      all_nodes.remove(self.op.node_name)
3112
    except ValueError:
3113
      logging.warning("Node %s which is about to be removed not found"
3114
                      " in the all nodes list", self.op.node_name)
3115
    return env, all_nodes, all_nodes
3116

    
3117
  def CheckPrereq(self):
3118
    """Check prerequisites.
3119

3120
    This checks:
3121
     - the node exists in the configuration
3122
     - it does not have primary or secondary instances
3123
     - it's not the master
3124

3125
    Any errors are signaled by raising errors.OpPrereqError.
3126

3127
    """
3128
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3129
    node = self.cfg.GetNodeInfo(self.op.node_name)
3130
    assert node is not None
3131

    
3132
    instance_list = self.cfg.GetInstanceList()
3133

    
3134
    masternode = self.cfg.GetMasterNode()
3135
    if node.name == masternode:
3136
      raise errors.OpPrereqError("Node is the master node,"
3137
                                 " you need to failover first.",
3138
                                 errors.ECODE_INVAL)
3139

    
3140
    for instance_name in instance_list:
3141
      instance = self.cfg.GetInstanceInfo(instance_name)
3142
      if node.name in instance.all_nodes:
3143
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3144
                                   " please remove first." % instance_name,
3145
                                   errors.ECODE_INVAL)
3146
    self.op.node_name = node.name
3147
    self.node = node
3148

    
3149
  def Exec(self, feedback_fn):
3150
    """Removes the node from the cluster.
3151

3152
    """
3153
    node = self.node
3154
    logging.info("Stopping the node daemon and removing configs from node %s",
3155
                 node.name)
3156

    
3157
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3158

    
3159
    # Promote nodes to master candidate as needed
3160
    _AdjustCandidatePool(self, exceptions=[node.name])
3161
    self.context.RemoveNode(node.name)
3162

    
3163
    # Run post hooks on the node before it's removed
3164
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3165
    try:
3166
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3167
    except:
3168
      # pylint: disable-msg=W0702
3169
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3170

    
3171
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3172
    msg = result.fail_msg
3173
    if msg:
3174
      self.LogWarning("Errors encountered on the remote node while leaving"
3175
                      " the cluster: %s", msg)
3176

    
3177
    # Remove node from our /etc/hosts
3178
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3179
      master_node = self.cfg.GetMasterNode()
3180
      result = self.rpc.call_etc_hosts_modify(master_node,
3181
                                              constants.ETC_HOSTS_REMOVE,
3182
                                              node.name, None)
3183
      result.Raise("Can't update hosts file with new host data")
3184
      _RedistributeAncillaryFiles(self)
3185

    
3186

    
3187
class LUQueryNodes(NoHooksLU):
3188
  """Logical unit for querying nodes.
3189

3190
  """
3191
  # pylint: disable-msg=W0142
3192
  _OP_PARAMS = [
3193
    _POutputFields,
3194
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3195
    ("use_locking", False, ht.TBool),
3196
    ]
3197
  REQ_BGL = False
3198

    
3199
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3200
                    "master_candidate", "offline", "drained"]
3201

    
3202
  _FIELDS_DYNAMIC = utils.FieldSet(
3203
    "dtotal", "dfree",
3204
    "mtotal", "mnode", "mfree",
3205
    "bootid",
3206
    "ctotal", "cnodes", "csockets",
3207
    )
3208

    
3209
  _FIELDS_STATIC = utils.FieldSet(*[
3210
    "pinst_cnt", "sinst_cnt",
3211
    "pinst_list", "sinst_list",
3212
    "pip", "sip", "tags",
3213
    "master",
3214
    "role"] + _SIMPLE_FIELDS
3215
    )
3216

    
3217
  def CheckArguments(self):
3218
    _CheckOutputFields(static=self._FIELDS_STATIC,
3219
                       dynamic=self._FIELDS_DYNAMIC,
3220
                       selected=self.op.output_fields)
3221

    
3222
  def ExpandNames(self):
3223
    self.needed_locks = {}
3224
    self.share_locks[locking.LEVEL_NODE] = 1
3225

    
3226
    if self.op.names:
3227
      self.wanted = _GetWantedNodes(self, self.op.names)
3228
    else:
3229
      self.wanted = locking.ALL_SET
3230

    
3231
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3232
    self.do_locking = self.do_node_query and self.op.use_locking
3233
    if self.do_locking:
3234
      # if we don't request only static fields, we need to lock the nodes
3235
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
3236

    
3237
  def Exec(self, feedback_fn):
3238
    """Computes the list of nodes and their attributes.
3239

3240
    """
3241
    all_info = self.cfg.GetAllNodesInfo()
3242
    if self.do_locking:
3243
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
3244
    elif self.wanted != locking.ALL_SET:
3245
      nodenames = self.wanted
3246
      missing = set(nodenames).difference(all_info.keys())
3247
      if missing:
3248
        raise errors.OpExecError(
3249
          "Some nodes were removed before retrieving their data: %s" % missing)
3250
    else:
3251
      nodenames = all_info.keys()
3252

    
3253
    nodenames = utils.NiceSort(nodenames)
3254
    nodelist = [all_info[name] for name in nodenames]
3255

    
3256
    # begin data gathering
3257

    
3258
    if self.do_node_query:
3259
      live_data = {}
3260
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3261
                                          self.cfg.GetHypervisorType())
3262
      for name in nodenames:
3263
        nodeinfo = node_data[name]
3264
        if not nodeinfo.fail_msg and nodeinfo.payload:
3265
          nodeinfo = nodeinfo.payload
3266
          fn = utils.TryConvert
3267
          live_data[name] = {
3268
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3269
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3270
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
3271
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3272
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3273
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3274
            "bootid": nodeinfo.get('bootid', None),
3275
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3276
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3277
            }
3278
        else:
3279
          live_data[name] = {}
3280
    else:
3281
      live_data = dict.fromkeys(nodenames, {})
3282

    
3283
    node_to_primary = dict([(name, set()) for name in nodenames])
3284
    node_to_secondary = dict([(name, set()) for name in nodenames])
3285

    
3286
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3287
                             "sinst_cnt", "sinst_list"))
3288
    if inst_fields & frozenset(self.op.output_fields):
3289
      inst_data = self.cfg.GetAllInstancesInfo()
3290

    
3291
      for inst in inst_data.values():
3292
        if inst.primary_node in node_to_primary:
3293
          node_to_primary[inst.primary_node].add(inst.name)
3294
        for secnode in inst.secondary_nodes:
3295
          if secnode in node_to_secondary:
3296
            node_to_secondary[secnode].add(inst.name)
3297

    
3298
    master_node = self.cfg.GetMasterNode()
3299

    
3300
    # end data gathering
3301

    
3302
    output = []
3303
    for node in nodelist:
3304
      node_output = []
3305
      for field in self.op.output_fields:
3306
        if field in self._SIMPLE_FIELDS:
3307
          val = getattr(node, field)
3308
        elif field == "pinst_list":
3309
          val = list(node_to_primary[node.name])
3310
        elif field == "sinst_list":
3311
          val = list(node_to_secondary[node.name])
3312
        elif field == "pinst_cnt":
3313
          val = len(node_to_primary[node.name])
3314
        elif field == "sinst_cnt":
3315
          val = len(node_to_secondary[node.name])
3316
        elif field == "pip":
3317
          val = node.primary_ip
3318
        elif field == "sip":
3319
          val = node.secondary_ip
3320
        elif field == "tags":
3321
          val = list(node.GetTags())
3322
        elif field == "master":
3323
          val = node.name == master_node
3324
        elif self._FIELDS_DYNAMIC.Matches(field):
3325
          val = live_data[node.name].get(field, None)
3326
        elif field == "role":
3327
          if node.name == master_node:
3328
            val = "M"
3329
          elif node.master_candidate:
3330
            val = "C"
3331
          elif node.drained:
3332
            val = "D"
3333
          elif node.offline:
3334
            val = "O"
3335
          else:
3336
            val = "R"
3337
        else:
3338
          raise errors.ParameterError(field)
3339
        node_output.append(val)
3340
      output.append(node_output)
3341

    
3342
    return output
3343

    
3344

    
3345
class LUQueryNodeVolumes(NoHooksLU):
3346
  """Logical unit for getting volumes on node(s).
3347

3348
  """
3349
  _OP_PARAMS = [
3350
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3351
    ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3352
    ]
3353
  REQ_BGL = False
3354
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3355
  _FIELDS_STATIC = utils.FieldSet("node")
3356

    
3357
  def CheckArguments(self):
3358
    _CheckOutputFields(static=self._FIELDS_STATIC,
3359
                       dynamic=self._FIELDS_DYNAMIC,
3360
                       selected=self.op.output_fields)
3361

    
3362
  def ExpandNames(self):
3363
    self.needed_locks = {}
3364
    self.share_locks[locking.LEVEL_NODE] = 1
3365
    if not self.op.nodes:
3366
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3367
    else:
3368
      self.needed_locks[locking.LEVEL_NODE] = \
3369
        _GetWantedNodes(self, self.op.nodes)
3370

    
3371
  def Exec(self, feedback_fn):
3372
    """Computes the list of nodes and their attributes.
3373

3374
    """
3375
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3376
    volumes = self.rpc.call_node_volumes(nodenames)
3377

    
3378
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3379
             in self.cfg.GetInstanceList()]
3380

    
3381
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3382

    
3383
    output = []
3384
    for node in nodenames:
3385
      nresult = volumes[node]
3386
      if nresult.offline:
3387
        continue
3388
      msg = nresult.fail_msg
3389
      if msg:
3390
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3391
        continue
3392

    
3393
      node_vols = nresult.payload[:]
3394
      node_vols.sort(key=lambda vol: vol['dev'])
3395

    
3396
      for vol in node_vols:
3397
        node_output = []
3398
        for field in self.op.output_fields:
3399
          if field == "node":
3400
            val = node
3401
          elif field == "phys":
3402
            val = vol['dev']
3403
          elif field == "vg":
3404
            val = vol['vg']
3405
          elif field == "name":
3406
            val = vol['name']
3407
          elif field == "size":
3408
            val = int(float(vol['size']))
3409
          elif field == "instance":
3410
            for inst in ilist:
3411
              if node not in lv_by_node[inst]:
3412
                continue
3413
              if vol['name'] in lv_by_node[inst][node]:
3414
                val = inst.name
3415
                break
3416
            else:
3417
              val = '-'
3418
          else:
3419
            raise errors.ParameterError(field)
3420
          node_output.append(str(val))
3421

    
3422
        output.append(node_output)
3423

    
3424
    return output
3425

    
3426

    
3427
class LUQueryNodeStorage(NoHooksLU):
3428
  """Logical unit for getting information on storage units on node(s).
3429

3430
  """
3431
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3432
  _OP_PARAMS = [
3433
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3434
    ("storage_type", ht.NoDefault, _CheckStorageType),
3435
    ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3436
    ("name", None, ht.TMaybeString),
3437
    ]
3438
  REQ_BGL = False
3439

    
3440
  def CheckArguments(self):
3441
    _CheckOutputFields(static=self._FIELDS_STATIC,
3442
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3443
                       selected=self.op.output_fields)
3444

    
3445
  def ExpandNames(self):
3446
    self.needed_locks = {}
3447
    self.share_locks[locking.LEVEL_NODE] = 1
3448

    
3449
    if self.op.nodes:
3450
      self.needed_locks[locking.LEVEL_NODE] = \
3451
        _GetWantedNodes(self, self.op.nodes)
3452
    else:
3453
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3454

    
3455
  def Exec(self, feedback_fn):
3456
    """Computes the list of nodes and their attributes.
3457

3458
    """
3459
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3460

    
3461
    # Always get name to sort by
3462
    if constants.SF_NAME in self.op.output_fields:
3463
      fields = self.op.output_fields[:]
3464
    else:
3465
      fields = [constants.SF_NAME] + self.op.output_fields
3466

    
3467
    # Never ask for node or type as it's only known to the LU
3468
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3469
      while extra in fields:
3470
        fields.remove(extra)
3471

    
3472
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3473
    name_idx = field_idx[constants.SF_NAME]
3474

    
3475
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3476
    data = self.rpc.call_storage_list(self.nodes,
3477
                                      self.op.storage_type, st_args,
3478
                                      self.op.name, fields)
3479

    
3480
    result = []
3481

    
3482
    for node in utils.NiceSort(self.nodes):
3483
      nresult = data[node]
3484
      if nresult.offline:
3485
        continue
3486

    
3487
      msg = nresult.fail_msg
3488
      if msg:
3489
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3490
        continue
3491

    
3492
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3493

    
3494
      for name in utils.NiceSort(rows.keys()):
3495
        row = rows[name]
3496

    
3497
        out = []
3498

    
3499
        for field in self.op.output_fields:
3500
          if field == constants.SF_NODE:
3501
            val = node
3502
          elif field == constants.SF_TYPE:
3503
            val = self.op.storage_type
3504
          elif field in field_idx:
3505
            val = row[field_idx[field]]
3506
          else:
3507
            raise errors.ParameterError(field)
3508

    
3509
          out.append(val)
3510

    
3511
        result.append(out)
3512

    
3513
    return result
3514

    
3515

    
3516
class LUModifyNodeStorage(NoHooksLU):
3517
  """Logical unit for modifying a storage volume on a node.
3518

3519
  """
3520
  _OP_PARAMS = [
3521
    _PNodeName,
3522
    ("storage_type", ht.NoDefault, _CheckStorageType),
3523
    ("name", ht.NoDefault, ht.TNonEmptyString),
3524
    ("changes", ht.NoDefault, ht.TDict),
3525
    ]
3526
  REQ_BGL = False
3527

    
3528
  def CheckArguments(self):
3529
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3530

    
3531
    storage_type = self.op.storage_type
3532

    
3533
    try:
3534
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3535
    except KeyError:
3536
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3537
                                 " modified" % storage_type,
3538
                                 errors.ECODE_INVAL)
3539

    
3540
    diff = set(self.op.changes.keys()) - modifiable
3541
    if diff:
3542
      raise errors.OpPrereqError("The following fields can not be modified for"
3543
                                 " storage units of type '%s': %r" %
3544
                                 (storage_type, list(diff)),
3545
                                 errors.ECODE_INVAL)
3546

    
3547
  def ExpandNames(self):
3548
    self.needed_locks = {
3549
      locking.LEVEL_NODE: self.op.node_name,
3550
      }
3551

    
3552
  def Exec(self, feedback_fn):
3553
    """Computes the list of nodes and their attributes.
3554

3555
    """
3556
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3557
    result = self.rpc.call_storage_modify(self.op.node_name,
3558
                                          self.op.storage_type, st_args,
3559
                                          self.op.name, self.op.changes)
3560
    result.Raise("Failed to modify storage unit '%s' on %s" %
3561
                 (self.op.name, self.op.node_name))
3562

    
3563

    
3564
class LUAddNode(LogicalUnit):
3565
  """Logical unit for adding node to the cluster.
3566

3567
  """
3568
  HPATH = "node-add"
3569
  HTYPE = constants.HTYPE_NODE
3570
  _OP_PARAMS = [
3571
    _PNodeName,
3572
    ("primary_ip", None, ht.NoType),
3573
    ("secondary_ip", None, ht.TMaybeString),
3574
    ("readd", False, ht.TBool),
3575
    ("group", None, ht.TMaybeString)
3576
    ]
3577

    
3578
  def CheckArguments(self):
3579
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3580
    # validate/normalize the node name
3581
    self.hostname = netutils.GetHostname(name=self.op.node_name,
3582
                                         family=self.primary_ip_family)
3583
    self.op.node_name = self.hostname.name
3584
    if self.op.readd and self.op.group:
3585
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
3586
                                 " being readded", errors.ECODE_INVAL)
3587

    
3588
  def BuildHooksEnv(self):
3589
    """Build hooks env.
3590

3591
    This will run on all nodes before, and on all nodes + the new node after.
3592

3593
    """
3594
    env = {
3595
      "OP_TARGET": self.op.node_name,
3596
      "NODE_NAME": self.op.node_name,
3597
      "NODE_PIP": self.op.primary_ip,
3598
      "NODE_SIP": self.op.secondary_ip,
3599
      }
3600
    nodes_0 = self.cfg.GetNodeList()
3601
    nodes_1 = nodes_0 + [self.op.node_name, ]
3602
    return env, nodes_0, nodes_1
3603

    
3604
  def CheckPrereq(self):
3605
    """Check prerequisites.
3606

3607
    This checks:
3608
     - the new node is not already in the config
3609
     - it is resolvable
3610
     - its parameters (single/dual homed) matches the cluster
3611

3612
    Any errors are signaled by raising errors.OpPrereqError.
3613

3614
    """
3615
    cfg = self.cfg
3616
    hostname = self.hostname
3617
    node = hostname.name
3618
    primary_ip = self.op.primary_ip = hostname.ip
3619
    if self.op.secondary_ip is None:
3620
      if self.primary_ip_family == netutils.IP6Address.family:
3621
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3622
                                   " IPv4 address must be given as secondary",
3623
                                   errors.ECODE_INVAL)
3624
      self.op.secondary_ip = primary_ip
3625

    
3626
    secondary_ip = self.op.secondary_ip
3627
    if not netutils.IP4Address.IsValid(secondary_ip):
3628
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3629
                                 " address" % secondary_ip, errors.ECODE_INVAL)
3630

    
3631
    node_list = cfg.GetNodeList()
3632
    if not self.op.readd and node in node_list:
3633
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3634
                                 node, errors.ECODE_EXISTS)
3635
    elif self.op.readd and node not in node_list:
3636
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3637
                                 errors.ECODE_NOENT)
3638

    
3639
    self.changed_primary_ip = False
3640

    
3641
    for existing_node_name in node_list:
3642
      existing_node = cfg.GetNodeInfo(existing_node_name)
3643

    
3644
      if self.op.readd and node == existing_node_name:
3645
        if existing_node.secondary_ip != secondary_ip:
3646
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3647
                                     " address configuration as before",
3648
                                     errors.ECODE_INVAL)
3649
        if existing_node.primary_ip != primary_ip:
3650
          self.changed_primary_ip = True
3651

    
3652
        continue
3653

    
3654
      if (existing_node.primary_ip == primary_ip or
3655
          existing_node.secondary_ip == primary_ip or
3656
          existing_node.primary_ip == secondary_ip or
3657
          existing_node.secondary_ip == secondary_ip):
3658
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3659
                                   " existing node %s" % existing_node.name,
3660
                                   errors.ECODE_NOTUNIQUE)
3661

    
3662
    # check that the type of the node (single versus dual homed) is the
3663
    # same as for the master
3664
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3665
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3666
    newbie_singlehomed = secondary_ip == primary_ip
3667
    if master_singlehomed != newbie_singlehomed:
3668
      if master_singlehomed:
3669
        raise errors.OpPrereqError("The master has no private ip but the"
3670
                                   " new node has one",
3671
                                   errors.ECODE_INVAL)
3672
      else:
3673
        raise errors.OpPrereqError("The master has a private ip but the"
3674
                                   " new node doesn't have one",
3675
                                   errors.ECODE_INVAL)
3676

    
3677
    # checks reachability
3678
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3679
      raise errors.OpPrereqError("Node not reachable by ping",
3680
                                 errors.ECODE_ENVIRON)
3681

    
3682
    if not newbie_singlehomed:
3683
      # check reachability from my secondary ip to newbie's secondary ip
3684
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3685
                           source=myself.secondary_ip):
3686
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3687
                                   " based ping to noded port",
3688
                                   errors.ECODE_ENVIRON)
3689

    
3690
    if self.op.readd:
3691
      exceptions = [node]
3692
    else:
3693
      exceptions = []
3694

    
3695
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3696

    
3697
    if self.op.readd:
3698
      self.new_node = self.cfg.GetNodeInfo(node)
3699
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3700
    else:
3701
      node_group = cfg.LookupNodeGroup(self.op.group)
3702
      self.new_node = objects.Node(name=node,
3703
                                   primary_ip=primary_ip,
3704
                                   secondary_ip=secondary_ip,
3705
                                   master_candidate=self.master_candidate,
3706
                                   master_capable=True,
3707
                                   vm_capable=True,
3708
                                   offline=False, drained=False,
3709
                                   group=node_group)
3710

    
3711
  def Exec(self, feedback_fn):
3712
    """Adds the new node to the cluster.
3713

3714
    """
3715
    new_node = self.new_node
3716
    node = new_node.name
3717

    
3718
    # for re-adds, reset the offline/drained/master-candidate flags;
3719
    # we need to reset here, otherwise offline would prevent RPC calls
3720
    # later in the procedure; this also means that if the re-add
3721
    # fails, we are left with a non-offlined, broken node
3722
    if self.op.readd:
3723
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3724
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3725
      # if we demote the node, we do cleanup later in the procedure
3726
      new_node.master_candidate = self.master_candidate
3727
      if self.changed_primary_ip:
3728
        new_node.primary_ip = self.op.primary_ip
3729

    
3730
    # notify the user about any possible mc promotion
3731
    if new_node.master_candidate:
3732
      self.LogInfo("Node will be a master candidate")
3733

    
3734
    # check connectivity
3735
    result = self.rpc.call_version([node])[node]
3736
    result.Raise("Can't get version information from node %s" % node)
3737
    if constants.PROTOCOL_VERSION == result.payload:
3738
      logging.info("Communication to node %s fine, sw version %s match",
3739
                   node, result.payload)
3740
    else:
3741
      raise errors.OpExecError("Version mismatch master version %s,"
3742
                               " node version %s" %
3743
                               (constants.PROTOCOL_VERSION, result.payload))
3744

    
3745
    # Add node to our /etc/hosts, and add key to known_hosts
3746
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3747
      master_node = self.cfg.GetMasterNode()
3748
      result = self.rpc.call_etc_hosts_modify(master_node,
3749
                                              constants.ETC_HOSTS_ADD,
3750
                                              self.hostname.name,
3751
                                              self.hostname.ip)
3752
      result.Raise("Can't update hosts file with new host data")
3753

    
3754
    if new_node.secondary_ip != new_node.primary_ip:
3755
      result = self.rpc.call_node_has_ip_address(new_node.name,
3756
                                                 new_node.secondary_ip)
3757
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3758
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3759
      if not result.payload:
3760
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3761
                                 " you gave (%s). Please fix and re-run this"
3762
                                 " command." % new_node.secondary_ip)
3763

    
3764
    node_verify_list = [self.cfg.GetMasterNode()]
3765
    node_verify_param = {
3766
      constants.NV_NODELIST: [node],
3767
      # TODO: do a node-net-test as well?
3768
    }
3769

    
3770
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3771
                                       self.cfg.GetClusterName())
3772
    for verifier in node_verify_list:
3773
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3774
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3775
      if nl_payload:
3776
        for failed in nl_payload:
3777
          feedback_fn("ssh/hostname verification failed"
3778
                      " (checking from %s): %s" %
3779
                      (verifier, nl_payload[failed]))
3780
        raise errors.OpExecError("ssh/hostname verification failed.")
3781

    
3782
    if self.op.readd:
3783
      _RedistributeAncillaryFiles(self)
3784
      self.context.ReaddNode(new_node)
3785
      # make sure we redistribute the config
3786
      self.cfg.Update(new_node, feedback_fn)
3787
      # and make sure the new node will not have old files around
3788
      if not new_node.master_candidate:
3789
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3790
        msg = result.fail_msg
3791
        if msg:
3792
          self.LogWarning("Node failed to demote itself from master"
3793
                          " candidate status: %s" % msg)
3794
    else:
3795
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3796
      self.context.AddNode(new_node, self.proc.GetECId())
3797

    
3798

    
3799
class LUSetNodeParams(LogicalUnit):
3800
  """Modifies the parameters of a node.
3801

3802
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
3803
      to the node role (as _ROLE_*)
3804
  @cvar _R2F: a dictionary from node role to tuples of flags
3805
  @cvar _FLAGS: a list of attribute names corresponding to the flags
3806

3807
  """
3808
  HPATH = "node-modify"
3809
  HTYPE = constants.HTYPE_NODE
3810
  _OP_PARAMS = [
3811
    _PNodeName,
3812
    ("master_candidate", None, ht.TMaybeBool),
3813
    ("offline", None, ht.TMaybeBool),
3814
    ("drained", None, ht.TMaybeBool),
3815
    ("auto_promote", False, ht.TBool),
3816
    _PForce,
3817
    ]
3818
  REQ_BGL = False
3819
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
3820
  _F2R = {
3821
    (True, False, False): _ROLE_CANDIDATE,
3822
    (False, True, False): _ROLE_DRAINED,
3823
    (False, False, True): _ROLE_OFFLINE,
3824
    (False, False, False): _ROLE_REGULAR,
3825
    }
3826
  _R2F = dict((v, k) for k, v in _F2R.items())
3827
  _FLAGS = ["master_candidate", "drained", "offline"]
3828

    
3829
  def CheckArguments(self):
3830
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3831
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3832
    if all_mods.count(None) == len(all_mods):
3833
      raise errors.OpPrereqError("Please pass at least one modification",
3834
                                 errors.ECODE_INVAL)
3835
    if all_mods.count(True) > 1:
3836
      raise errors.OpPrereqError("Can't set the node into more than one"
3837
                                 " state at the same time",
3838
                                 errors.ECODE_INVAL)
3839

    
3840
    # Boolean value that tells us whether we might be demoting from MC
3841
    self.might_demote = (self.op.master_candidate == False or
3842
                         self.op.offline == True or
3843
                         self.op.drained == True)
3844

    
3845
    self.lock_all = self.op.auto_promote and self.might_demote
3846

    
3847
  def ExpandNames(self):
3848
    if self.lock_all:
3849
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3850
    else:
3851
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3852

    
3853
  def BuildHooksEnv(self):
3854
    """Build hooks env.
3855

3856
    This runs on the master node.
3857

3858
    """
3859
    env = {
3860
      "OP_TARGET": self.op.node_name,
3861
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3862
      "OFFLINE": str(self.op.offline),
3863
      "DRAINED": str(self.op.drained),
3864
      }
3865
    nl = [self.cfg.GetMasterNode(),
3866
          self.op.node_name]
3867
    return env, nl, nl
3868

    
3869
  def CheckPrereq(self):
3870
    """Check prerequisites.
3871

3872
    This only checks the instance list against the existing names.
3873

3874
    """
3875
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3876

    
3877
    if (self.op.master_candidate is not None or
3878
        self.op.drained is not None or
3879
        self.op.offline is not None):
3880
      # we can't change the master's node flags
3881
      if self.op.node_name == self.cfg.GetMasterNode():
3882
        raise errors.OpPrereqError("The master role can be changed"
3883
                                   " only via master-failover",
3884
                                   errors.ECODE_INVAL)
3885

    
3886

    
3887
    if node.master_candidate and self.might_demote and not self.lock_all:
3888
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3889
      # check if after removing the current node, we're missing master
3890
      # candidates
3891
      (mc_remaining, mc_should, _) = \
3892
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3893
      if mc_remaining < mc_should:
3894
        raise errors.OpPrereqError("Not enough master candidates, please"
3895
                                   " pass auto_promote to allow promotion",
3896
                                   errors.ECODE_STATE)
3897

    
3898
    self.old_flags = old_flags = (node.master_candidate,
3899
                                  node.drained, node.offline)
3900
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
3901
    self.old_role = self._F2R[old_flags]
3902

    
3903
    # Check for ineffective changes
3904
    for attr in self._FLAGS:
3905
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
3906
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
3907
        setattr(self.op, attr, None)
3908

    
3909
    # Past this point, any flag change to False means a transition
3910
    # away from the respective state, as only real changes are kept
3911

    
3912
    # If we're being deofflined/drained, we'll MC ourself if needed
3913
    if self.op.drained == False or self.op.offline == False:
3914
      if _DecideSelfPromotion(self):
3915
        self.op.master_candidate = True
3916
        self.LogInfo("Auto-promoting node to master candidate")
3917

    
3918
  def Exec(self, feedback_fn):
3919
    """Modifies a node.
3920

3921
    """
3922
    node = self.node
3923
    old_role = self.old_role
3924

    
3925
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
3926

    
3927
    # compute new flags
3928
    if self.op.master_candidate:
3929
      new_role = self._ROLE_CANDIDATE
3930
    elif self.op.drained:
3931
      new_role = self._ROLE_DRAINED
3932
    elif self.op.offline:
3933
      new_role = self._ROLE_OFFLINE
3934
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
3935
      # False is still in new flags, which means we're un-setting (the
3936
      # only) True flag
3937
      new_role = self._ROLE_REGULAR
3938
    else: # no new flags, nothing, keep old role
3939
      new_role = old_role
3940

    
3941
    result = []
3942
    changed_mc = [old_role, new_role].count(self._ROLE_CANDIDATE) == 1
3943

    
3944
    # Tell the node to demote itself, if no longer MC and not offline
3945
    if (old_role == self._ROLE_CANDIDATE and
3946
        new_role != self._ROLE_OFFLINE and new_role != old_role):
3947
      msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
3948
      if msg:
3949
        self.LogWarning("Node failed to demote itself: %s", msg)
3950

    
3951
    new_flags = self._R2F[new_role]
3952
    for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
3953
      if of != nf:
3954
        result.append((desc, str(nf)))
3955
    (node.master_candidate, node.drained, node.offline) = new_flags
3956

    
3957
    # we locked all nodes, we adjust the CP before updating this node
3958
    if self.lock_all:
3959
      _AdjustCandidatePool(self, [node.name])
3960

    
3961
    # this will trigger configuration file update, if needed
3962
    self.cfg.Update(node, feedback_fn)
3963

    
3964
    # this will trigger job queue propagation or cleanup
3965
    if changed_mc:
3966
      self.context.ReaddNode(node)
3967

    
3968
    return result
3969

    
3970

    
3971
class LUPowercycleNode(NoHooksLU):
3972
  """Powercycles a node.
3973

3974
  """
3975
  _OP_PARAMS = [
3976
    _PNodeName,
3977
    _PForce,
3978
    ]
3979
  REQ_BGL = False
3980

    
3981
  def CheckArguments(self):
3982
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3983
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3984
      raise errors.OpPrereqError("The node is the master and the force"
3985
                                 " parameter was not set",
3986
                                 errors.ECODE_INVAL)
3987

    
3988
  def ExpandNames(self):
3989
    """Locking for PowercycleNode.
3990

3991
    This is a last-resort option and shouldn't block on other
3992
    jobs. Therefore, we grab no locks.
3993

3994
    """
3995
    self.needed_locks = {}
3996

    
3997
  def Exec(self, feedback_fn):
3998
    """Reboots a node.
3999

4000
    """
4001
    result = self.rpc.call_node_powercycle(self.op.node_name,
4002
                                           self.cfg.GetHypervisorType())
4003
    result.Raise("Failed to schedule the reboot")
4004
    return result.payload
4005

    
4006

    
4007
class LUQueryClusterInfo(NoHooksLU):
4008
  """Query cluster configuration.
4009

4010
  """
4011
  REQ_BGL = False
4012

    
4013
  def ExpandNames(self):
4014
    self.needed_locks = {}
4015

    
4016
  def Exec(self, feedback_fn):
4017
    """Return cluster config.
4018

4019
    """
4020
    cluster = self.cfg.GetClusterInfo()
4021
    os_hvp = {}
4022

    
4023
    # Filter just for enabled hypervisors
4024
    for os_name, hv_dict in cluster.os_hvp.items():
4025
      os_hvp[os_name] = {}
4026
      for hv_name, hv_params in hv_dict.items():
4027
        if hv_name in cluster.enabled_hypervisors:
4028
          os_hvp[os_name][hv_name] = hv_params
4029

    
4030
    # Convert ip_family to ip_version
4031
    primary_ip_version = constants.IP4_VERSION
4032
    if cluster.primary_ip_family == netutils.IP6Address.family:
4033
      primary_ip_version = constants.IP6_VERSION
4034

    
4035
    result = {
4036
      "software_version": constants.RELEASE_VERSION,
4037
      "protocol_version": constants.PROTOCOL_VERSION,
4038
      "config_version": constants.CONFIG_VERSION,
4039
      "os_api_version": max(constants.OS_API_VERSIONS),
4040
      "export_version": constants.EXPORT_VERSION,
4041
      "architecture": (platform.architecture()[0], platform.machine()),
4042
      "name": cluster.cluster_name,
4043
      "master": cluster.master_node,
4044
      "default_hypervisor": cluster.enabled_hypervisors[0],
4045
      "enabled_hypervisors": cluster.enabled_hypervisors,
4046
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4047
                        for hypervisor_name in cluster.enabled_hypervisors]),
4048
      "os_hvp": os_hvp,
4049
      "beparams": cluster.beparams,
4050
      "osparams": cluster.osparams,
4051
      "nicparams": cluster.nicparams,
4052
      "candidate_pool_size": cluster.candidate_pool_size,
4053
      "master_netdev": cluster.master_netdev,
4054
      "volume_group_name": cluster.volume_group_name,
4055
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4056
      "file_storage_dir": cluster.file_storage_dir,
4057
      "maintain_node_health": cluster.maintain_node_health,
4058
      "ctime": cluster.ctime,
4059
      "mtime": cluster.mtime,
4060
      "uuid": cluster.uuid,
4061
      "tags": list(cluster.GetTags()),
4062
      "uid_pool": cluster.uid_pool,
4063
      "default_iallocator": cluster.default_iallocator,
4064
      "reserved_lvs": cluster.reserved_lvs,
4065
      "primary_ip_version": primary_ip_version,
4066
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4067
      }
4068

    
4069
    return result
4070

    
4071

    
4072
class LUQueryConfigValues(NoHooksLU):
4073
  """Return configuration values.
4074

4075
  """
4076
  _OP_PARAMS = [_POutputFields]
4077
  REQ_BGL = False
4078
  _FIELDS_DYNAMIC = utils.FieldSet()
4079
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4080
                                  "watcher_pause", "volume_group_name")
4081

    
4082
  def CheckArguments(self):
4083
    _CheckOutputFields(static=self._FIELDS_STATIC,
4084
                       dynamic=self._FIELDS_DYNAMIC,
4085
                       selected=self.op.output_fields)
4086

    
4087
  def ExpandNames(self):
4088
    self.needed_locks = {}
4089

    
4090
  def Exec(self, feedback_fn):
4091
    """Dump a representation of the cluster config to the standard output.
4092

4093
    """
4094
    values = []
4095
    for field in self.op.output_fields:
4096
      if field == "cluster_name":
4097
        entry = self.cfg.GetClusterName()
4098
      elif field == "master_node":
4099
        entry = self.cfg.GetMasterNode()
4100
      elif field == "drain_flag":
4101
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4102
      elif field == "watcher_pause":
4103
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4104
      elif field == "volume_group_name":
4105
        entry = self.cfg.GetVGName()
4106
      else:
4107
        raise errors.ParameterError(field)
4108
      values.append(entry)
4109
    return values
4110

    
4111

    
4112
class LUActivateInstanceDisks(NoHooksLU):
4113
  """Bring up an instance's disks.
4114

4115
  """
4116
  _OP_PARAMS = [
4117
    _PInstanceName,
4118
    ("ignore_size", False, ht.TBool),
4119
    ]
4120
  REQ_BGL = False
4121

    
4122
  def ExpandNames(self):
4123
    self._ExpandAndLockInstance()
4124
    self.needed_locks[locking.LEVEL_NODE] = []
4125
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4126

    
4127
  def DeclareLocks(self, level):
4128
    if level == locking.LEVEL_NODE:
4129
      self._LockInstancesNodes()
4130

    
4131
  def CheckPrereq(self):
4132
    """Check prerequisites.
4133

4134
    This checks that the instance is in the cluster.
4135

4136
    """
4137
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4138
    assert self.instance is not None, \
4139
      "Cannot retrieve locked instance %s" % self.op.instance_name
4140
    _CheckNodeOnline(self, self.instance.primary_node)
4141

    
4142
  def Exec(self, feedback_fn):
4143
    """Activate the disks.
4144

4145
    """
4146
    disks_ok, disks_info = \
4147
              _AssembleInstanceDisks(self, self.instance,
4148
                                     ignore_size=self.op.ignore_size)
4149
    if not disks_ok:
4150
      raise errors.OpExecError("Cannot activate block devices")
4151

    
4152
    return disks_info
4153

    
4154

    
4155
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4156
                           ignore_size=False):
4157
  """Prepare the block devices for an instance.
4158

4159
  This sets up the block devices on all nodes.
4160

4161
  @type lu: L{LogicalUnit}
4162
  @param lu: the logical unit on whose behalf we execute
4163
  @type instance: L{objects.Instance}
4164
  @param instance: the instance for whose disks we assemble
4165
  @type disks: list of L{objects.Disk} or None
4166
  @param disks: which disks to assemble (or all, if None)
4167
  @type ignore_secondaries: boolean
4168
  @param ignore_secondaries: if true, errors on secondary nodes
4169
      won't result in an error return from the function
4170
  @type ignore_size: boolean
4171
  @param ignore_size: if true, the current known size of the disk
4172
      will not be used during the disk activation, useful for cases
4173
      when the size is wrong
4174
  @return: False if the operation failed, otherwise a list of
4175
      (host, instance_visible_name, node_visible_name)
4176
      with the mapping from node devices to instance devices
4177

4178
  """
4179
  device_info = []
4180
  disks_ok = True
4181
  iname = instance.name
4182
  disks = _ExpandCheckDisks(instance, disks)
4183

    
4184
  # With the two passes mechanism we try to reduce the window of
4185
  # opportunity for the race condition of switching DRBD to primary
4186
  # before handshaking occured, but we do not eliminate it
4187

    
4188
  # The proper fix would be to wait (with some limits) until the
4189
  # connection has been made and drbd transitions from WFConnection
4190
  # into any other network-connected state (Connected, SyncTarget,
4191
  # SyncSource, etc.)
4192

    
4193
  # 1st pass, assemble on all nodes in secondary mode
4194
  for inst_disk in disks:
4195
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4196
      if ignore_size:
4197
        node_disk = node_disk.Copy()
4198
        node_disk.UnsetSize()
4199
      lu.cfg.SetDiskID(node_disk, node)
4200
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4201
      msg = result.fail_msg
4202
      if msg:
4203
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4204
                           " (is_primary=False, pass=1): %s",
4205
                           inst_disk.iv_name, node, msg)
4206
        if not ignore_secondaries:
4207
          disks_ok = False
4208

    
4209
  # FIXME: race condition on drbd migration to primary
4210

    
4211
  # 2nd pass, do only the primary node
4212
  for inst_disk in disks:
4213
    dev_path = None
4214

    
4215
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4216
      if node != instance.primary_node:
4217
        continue
4218
      if ignore_size:
4219
        node_disk = node_disk.Copy()
4220
        node_disk.UnsetSize()
4221
      lu.cfg.SetDiskID(node_disk, node)
4222
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4223
      msg = result.fail_msg
4224
      if msg:
4225
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4226
                           " (is_primary=True, pass=2): %s",
4227
                           inst_disk.iv_name, node, msg)
4228
        disks_ok = False
4229
      else:
4230
        dev_path = result.payload
4231

    
4232
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4233

    
4234
  # leave the disks configured for the primary node
4235
  # this is a workaround that would be fixed better by
4236
  # improving the logical/physical id handling
4237
  for disk in disks:
4238
    lu.cfg.SetDiskID(disk, instance.primary_node)
4239

    
4240
  return disks_ok, device_info
4241

    
4242

    
4243
def _StartInstanceDisks(lu, instance, force):
4244
  """Start the disks of an instance.
4245

4246
  """
4247
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4248
                                           ignore_secondaries=force)
4249
  if not disks_ok:
4250
    _ShutdownInstanceDisks(lu, instance)
4251
    if force is not None and not force:
4252
      lu.proc.LogWarning("", hint="If the message above refers to a"
4253
                         " secondary node,"
4254
                         " you can retry the operation using '--force'.")
4255
    raise errors.OpExecError("Disk consistency error")
4256

    
4257

    
4258
class LUDeactivateInstanceDisks(NoHooksLU):
4259
  """Shutdown an instance's disks.
4260

4261
  """
4262
  _OP_PARAMS = [
4263
    _PInstanceName,
4264
    ]
4265
  REQ_BGL = False
4266

    
4267
  def ExpandNames(self):
4268
    self._ExpandAndLockInstance()
4269
    self.needed_locks[locking.LEVEL_NODE] = []
4270
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4271

    
4272
  def DeclareLocks(self, level):
4273
    if level == locking.LEVEL_NODE:
4274
      self._LockInstancesNodes()
4275

    
4276
  def CheckPrereq(self):
4277
    """Check prerequisites.
4278

4279
    This checks that the instance is in the cluster.
4280

4281
    """
4282
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4283
    assert self.instance is not None, \
4284
      "Cannot retrieve locked instance %s" % self.op.instance_name
4285

    
4286
  def Exec(self, feedback_fn):
4287
    """Deactivate the disks
4288

4289
    """
4290
    instance = self.instance
4291
    _SafeShutdownInstanceDisks(self, instance)
4292

    
4293

    
4294
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4295
  """Shutdown block devices of an instance.
4296

4297
  This function checks if an instance is running, before calling
4298
  _ShutdownInstanceDisks.
4299

4300
  """
4301
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4302
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4303

    
4304

    
4305
def _ExpandCheckDisks(instance, disks):
4306
  """Return the instance disks selected by the disks list
4307

4308
  @type disks: list of L{objects.Disk} or None
4309
  @param disks: selected disks
4310
  @rtype: list of L{objects.Disk}
4311
  @return: selected instance disks to act on
4312

4313
  """
4314
  if disks is None:
4315
    return instance.disks
4316
  else:
4317
    if not set(disks).issubset(instance.disks):
4318
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4319
                                   " target instance")
4320
    return disks
4321

    
4322

    
4323
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4324
  """Shutdown block devices of an instance.
4325

4326
  This does the shutdown on all nodes of the instance.
4327

4328
  If the ignore_primary is false, errors on the primary node are
4329
  ignored.
4330

4331
  """
4332
  all_result = True
4333
  disks = _ExpandCheckDisks(instance, disks)
4334

    
4335
  for disk in disks:
4336
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4337
      lu.cfg.SetDiskID(top_disk, node)
4338
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4339
      msg = result.fail_msg
4340
      if msg:
4341
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4342
                      disk.iv_name, node, msg)
4343
        if not ignore_primary or node != instance.primary_node:
4344
          all_result = False
4345
  return all_result
4346

    
4347

    
4348
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4349
  """Checks if a node has enough free memory.
4350

4351
  This function check if a given node has the needed amount of free
4352
  memory. In case the node has less memory or we cannot get the
4353
  information from the node, this function raise an OpPrereqError
4354
  exception.
4355

4356
  @type lu: C{LogicalUnit}
4357
  @param lu: a logical unit from which we get configuration data
4358
  @type node: C{str}
4359
  @param node: the node to check
4360
  @type reason: C{str}
4361
  @param reason: string to use in the error message
4362
  @type requested: C{int}
4363
  @param requested: the amount of memory in MiB to check for
4364
  @type hypervisor_name: C{str}
4365
  @param hypervisor_name: the hypervisor to ask for memory stats
4366
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4367
      we cannot check the node
4368

4369
  """
4370
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4371
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4372
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4373
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4374
  if not isinstance(free_mem, int):
4375
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4376
                               " was '%s'" % (node, free_mem),
4377
                               errors.ECODE_ENVIRON)
4378
  if requested > free_mem:
4379
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4380
                               " needed %s MiB, available %s MiB" %
4381
                               (node, reason, requested, free_mem),
4382
                               errors.ECODE_NORES)
4383

    
4384

    
4385
def _CheckNodesFreeDisk(lu, nodenames, requested):
4386
  """Checks if nodes have enough free disk space in the default VG.
4387

4388
  This function check if all given nodes have the needed amount of
4389
  free disk. In case any node has less disk or we cannot get the
4390
  information from the node, this function raise an OpPrereqError
4391
  exception.
4392

4393
  @type lu: C{LogicalUnit}
4394
  @param lu: a logical unit from which we get configuration data
4395
  @type nodenames: C{list}
4396
  @param nodenames: the list of node names to check
4397
  @type requested: C{int}
4398
  @param requested: the amount of disk in MiB to check for
4399
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4400
      we cannot check the node
4401

4402
  """
4403
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4404
                                   lu.cfg.GetHypervisorType())
4405
  for node in nodenames:
4406
    info = nodeinfo[node]
4407
    info.Raise("Cannot get current information from node %s" % node,
4408
               prereq=True, ecode=errors.ECODE_ENVIRON)
4409
    vg_free = info.payload.get("vg_free", None)
4410
    if not isinstance(vg_free, int):
4411
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4412
                                 " result was '%s'" % (node, vg_free),
4413
                                 errors.ECODE_ENVIRON)
4414
    if requested > vg_free:
4415
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4416
                                 " required %d MiB, available %d MiB" %
4417
                                 (node, requested, vg_free),
4418
                                 errors.ECODE_NORES)
4419

    
4420

    
4421
class LUStartupInstance(LogicalUnit):
4422
  """Starts an instance.
4423

4424
  """
4425
  HPATH = "instance-start"
4426
  HTYPE = constants.HTYPE_INSTANCE
4427
  _OP_PARAMS = [
4428
    _PInstanceName,
4429
    _PForce,
4430
    _PIgnoreOfflineNodes,
4431
    ("hvparams", ht.EmptyDict, ht.TDict),
4432
    ("beparams", ht.EmptyDict, ht.TDict),
4433
    ]
4434
  REQ_BGL = False
4435

    
4436
  def CheckArguments(self):
4437
    # extra beparams
4438
    if self.op.beparams:
4439
      # fill the beparams dict
4440
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4441

    
4442
  def ExpandNames(self):
4443
    self._ExpandAndLockInstance()
4444

    
4445
  def BuildHooksEnv(self):
4446
    """Build hooks env.
4447

4448
    This runs on master, primary and secondary nodes of the instance.
4449

4450
    """
4451
    env = {
4452
      "FORCE": self.op.force,
4453
      }
4454
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4455
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4456
    return env, nl, nl
4457

    
4458
  def CheckPrereq(self):
4459
    """Check prerequisites.
4460

4461
    This checks that the instance is in the cluster.
4462

4463
    """
4464
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4465
    assert self.instance is not None, \
4466
      "Cannot retrieve locked instance %s" % self.op.instance_name
4467

    
4468
    # extra hvparams
4469
    if self.op.hvparams:
4470
      # check hypervisor parameter syntax (locally)
4471
      cluster = self.cfg.GetClusterInfo()
4472
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4473
      filled_hvp = cluster.FillHV(instance)
4474
      filled_hvp.update(self.op.hvparams)
4475
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4476
      hv_type.CheckParameterSyntax(filled_hvp)
4477
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4478

    
4479
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
4480

    
4481
    if self.primary_offline and self.op.ignore_offline_nodes:
4482
      self.proc.LogWarning("Ignoring offline primary node")
4483

    
4484
      if self.op.hvparams or self.op.beparams:
4485
        self.proc.LogWarning("Overridden parameters are ignored")
4486
    else:
4487
      _CheckNodeOnline(self, instance.primary_node)
4488

    
4489
      bep = self.cfg.GetClusterInfo().FillBE(instance)
4490

    
4491
      # check bridges existence
4492
      _CheckInstanceBridgesExist(self, instance)
4493

    
4494
      remote_info = self.rpc.call_instance_info(instance.primary_node,
4495
                                                instance.name,
4496
                                                instance.hypervisor)
4497
      remote_info.Raise("Error checking node %s" % instance.primary_node,
4498
                        prereq=True, ecode=errors.ECODE_ENVIRON)
4499
      if not remote_info.payload: # not running already
4500
        _CheckNodeFreeMemory(self, instance.primary_node,
4501
                             "starting instance %s" % instance.name,
4502
                             bep[constants.BE_MEMORY], instance.hypervisor)
4503

    
4504
  def Exec(self, feedback_fn):
4505
    """Start the instance.
4506

4507
    """
4508
    instance = self.instance
4509
    force = self.op.force
4510

    
4511
    self.cfg.MarkInstanceUp(instance.name)
4512

    
4513
    if self.primary_offline:
4514
      assert self.op.ignore_offline_nodes
4515
      self.proc.LogInfo("Primary node offline, marked instance as started")
4516
    else:
4517
      node_current = instance.primary_node
4518

    
4519
      _StartInstanceDisks(self, instance, force)
4520

    
4521
      result = self.rpc.call_instance_start(node_current, instance,
4522
                                            self.op.hvparams, self.op.beparams)
4523
      msg = result.fail_msg
4524
      if msg:
4525
        _ShutdownInstanceDisks(self, instance)
4526
        raise errors.OpExecError("Could not start instance: %s" % msg)
4527

    
4528

    
4529
class LURebootInstance(LogicalUnit):
4530
  """Reboot an instance.
4531

4532
  """
4533
  HPATH = "instance-reboot"
4534
  HTYPE = constants.HTYPE_INSTANCE
4535
  _OP_PARAMS = [
4536
    _PInstanceName,
4537
    ("ignore_secondaries", False, ht.TBool),
4538
    ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
4539
    _PShutdownTimeout,
4540
    ]
4541
  REQ_BGL = False
4542

    
4543
  def ExpandNames(self):
4544
    self._ExpandAndLockInstance()
4545

    
4546
  def BuildHooksEnv(self):
4547
    """Build hooks env.
4548

4549
    This runs on master, primary and secondary nodes of the instance.
4550

4551
    """
4552
    env = {
4553
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4554
      "REBOOT_TYPE": self.op.reboot_type,
4555
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4556
      }
4557
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4558
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4559
    return env, nl, nl
4560

    
4561
  def CheckPrereq(self):
4562
    """Check prerequisites.
4563

4564
    This checks that the instance is in the cluster.
4565

4566
    """
4567
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4568
    assert self.instance is not None, \
4569
      "Cannot retrieve locked instance %s" % self.op.instance_name
4570

    
4571
    _CheckNodeOnline(self, instance.primary_node)
4572

    
4573
    # check bridges existence
4574
    _CheckInstanceBridgesExist(self, instance)
4575

    
4576
  def Exec(self, feedback_fn):
4577
    """Reboot the instance.
4578

4579
    """
4580
    instance = self.instance
4581
    ignore_secondaries = self.op.ignore_secondaries
4582
    reboot_type = self.op.reboot_type
4583

    
4584
    node_current = instance.primary_node
4585

    
4586
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4587
                       constants.INSTANCE_REBOOT_HARD]:
4588
      for disk in instance.disks:
4589
        self.cfg.SetDiskID(disk, node_current)
4590
      result = self.rpc.call_instance_reboot(node_current, instance,
4591
                                             reboot_type,
4592
                                             self.op.shutdown_timeout)
4593
      result.Raise("Could not reboot instance")
4594
    else:
4595
      result = self.rpc.call_instance_shutdown(node_current, instance,
4596
                                               self.op.shutdown_timeout)
4597
      result.Raise("Could not shutdown instance for full reboot")
4598
      _ShutdownInstanceDisks(self, instance)
4599
      _StartInstanceDisks(self, instance, ignore_secondaries)
4600
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4601
      msg = result.fail_msg
4602
      if msg:
4603
        _ShutdownInstanceDisks(self, instance)
4604
        raise errors.OpExecError("Could not start instance for"
4605
                                 " full reboot: %s" % msg)
4606

    
4607
    self.cfg.MarkInstanceUp(instance.name)
4608

    
4609

    
4610
class LUShutdownInstance(LogicalUnit):
4611
  """Shutdown an instance.
4612

4613
  """
4614
  HPATH = "instance-stop"
4615
  HTYPE = constants.HTYPE_INSTANCE
4616
  _OP_PARAMS = [
4617
    _PInstanceName,
4618
    _PIgnoreOfflineNodes,
4619
    ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt),
4620
    ]
4621
  REQ_BGL = False
4622

    
4623
  def ExpandNames(self):
4624
    self._ExpandAndLockInstance()
4625

    
4626
  def BuildHooksEnv(self):
4627
    """Build hooks env.
4628

4629
    This runs on master, primary and secondary nodes of the instance.
4630

4631
    """
4632
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4633
    env["TIMEOUT"] = self.op.timeout
4634
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4635
    return env, nl, nl
4636

    
4637
  def CheckPrereq(self):
4638
    """Check prerequisites.
4639

4640
    This checks that the instance is in the cluster.
4641

4642
    """
4643
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4644
    assert self.instance is not None, \
4645
      "Cannot retrieve locked instance %s" % self.op.instance_name
4646

    
4647
    self.primary_offline = \
4648
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
4649

    
4650
    if self.primary_offline and self.op.ignore_offline_nodes:
4651
      self.proc.LogWarning("Ignoring offline primary node")
4652
    else:
4653
      _CheckNodeOnline(self, self.instance.primary_node)
4654

    
4655
  def Exec(self, feedback_fn):
4656
    """Shutdown the instance.
4657

4658
    """
4659
    instance = self.instance
4660
    node_current = instance.primary_node
4661
    timeout = self.op.timeout
4662

    
4663
    self.cfg.MarkInstanceDown(instance.name)
4664

    
4665
    if self.primary_offline:
4666
      assert self.op.ignore_offline_nodes
4667
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
4668
    else:
4669
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4670
      msg = result.fail_msg
4671
      if msg:
4672
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4673

    
4674
      _ShutdownInstanceDisks(self, instance)
4675

    
4676

    
4677
class LUReinstallInstance(LogicalUnit):
4678
  """Reinstall an instance.
4679

4680
  """
4681
  HPATH = "instance-reinstall"
4682
  HTYPE = constants.HTYPE_INSTANCE
4683
  _OP_PARAMS = [
4684
    _PInstanceName,
4685
    ("os_type", None, ht.TMaybeString),
4686
    ("force_variant", False, ht.TBool),
4687
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
4688
    ]
4689
  REQ_BGL = False
4690

    
4691
  def ExpandNames(self):
4692
    self._ExpandAndLockInstance()
4693

    
4694
  def BuildHooksEnv(self):
4695
    """Build hooks env.
4696

4697
    This runs on master, primary and secondary nodes of the instance.
4698

4699
    """
4700
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4701
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4702
    return env, nl, nl
4703

    
4704
  def CheckPrereq(self):
4705
    """Check prerequisites.
4706

4707
    This checks that the instance is in the cluster and is not running.
4708

4709
    """
4710
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4711
    assert instance is not None, \
4712
      "Cannot retrieve locked instance %s" % self.op.instance_name
4713
    _CheckNodeOnline(self, instance.primary_node)
4714

    
4715
    if instance.disk_template == constants.DT_DISKLESS:
4716
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4717
                                 self.op.instance_name,
4718
                                 errors.ECODE_INVAL)
4719
    _CheckInstanceDown(self, instance, "cannot reinstall")
4720

    
4721
    if self.op.os_type is not None:
4722
      # OS verification
4723
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4724
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4725
      instance_os = self.op.os_type
4726
    else:
4727
      instance_os = instance.os
4728

    
4729
    nodelist = list(instance.all_nodes)
4730

    
4731
    if self.op.osparams:
4732
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
4733
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
4734
      self.os_inst = i_osdict # the new dict (without defaults)
4735
    else:
4736
      self.os_inst = None
4737

    
4738
    self.instance = instance
4739

    
4740
  def Exec(self, feedback_fn):
4741
    """Reinstall the instance.
4742

4743
    """
4744
    inst = self.instance
4745

    
4746
    if self.op.os_type is not None:
4747
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4748
      inst.os = self.op.os_type
4749
      # Write to configuration
4750
      self.cfg.Update(inst, feedback_fn)
4751

    
4752
    _StartInstanceDisks(self, inst, None)
4753
    try:
4754
      feedback_fn("Running the instance OS create scripts...")
4755
      # FIXME: pass debug option from opcode to backend
4756
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4757
                                             self.op.debug_level,
4758
                                             osparams=self.os_inst)
4759
      result.Raise("Could not install OS for instance %s on node %s" %
4760
                   (inst.name, inst.primary_node))
4761
    finally:
4762
      _ShutdownInstanceDisks(self, inst)
4763

    
4764

    
4765
class LURecreateInstanceDisks(LogicalUnit):
4766
  """Recreate an instance's missing disks.
4767

4768
  """
4769
  HPATH = "instance-recreate-disks"
4770
  HTYPE = constants.HTYPE_INSTANCE
4771
  _OP_PARAMS = [
4772
    _PInstanceName,
4773
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
4774
    ]
4775
  REQ_BGL = False
4776

    
4777
  def ExpandNames(self):
4778
    self._ExpandAndLockInstance()
4779

    
4780
  def BuildHooksEnv(self):
4781
    """Build hooks env.
4782

4783
    This runs on master, primary and secondary nodes of the instance.
4784

4785
    """
4786
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4787
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4788
    return env, nl, nl
4789

    
4790
  def CheckPrereq(self):
4791
    """Check prerequisites.
4792

4793
    This checks that the instance is in the cluster and is not running.
4794

4795
    """
4796
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4797
    assert instance is not None, \
4798
      "Cannot retrieve locked instance %s" % self.op.instance_name
4799
    _CheckNodeOnline(self, instance.primary_node)
4800

    
4801
    if instance.disk_template == constants.DT_DISKLESS:
4802
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4803
                                 self.op.instance_name, errors.ECODE_INVAL)
4804
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4805

    
4806
    if not self.op.disks:
4807
      self.op.disks = range(len(instance.disks))
4808
    else:
4809
      for idx in self.op.disks:
4810
        if idx >= len(instance.disks):
4811
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4812
                                     errors.ECODE_INVAL)
4813

    
4814
    self.instance = instance
4815

    
4816
  def Exec(self, feedback_fn):
4817
    """Recreate the disks.
4818

4819
    """
4820
    to_skip = []
4821
    for idx, _ in enumerate(self.instance.disks):
4822
      if idx not in self.op.disks: # disk idx has not been passed in
4823
        to_skip.append(idx)
4824
        continue
4825

    
4826
    _CreateDisks(self, self.instance, to_skip=to_skip)
4827

    
4828

    
4829
class LURenameInstance(LogicalUnit):
4830
  """Rename an instance.
4831

4832
  """
4833
  HPATH = "instance-rename"
4834
  HTYPE = constants.HTYPE_INSTANCE
4835
  _OP_PARAMS = [
4836
    _PInstanceName,
4837
    ("new_name", ht.NoDefault, ht.TNonEmptyString),
4838
    ("ip_check", False, ht.TBool),
4839
    ("name_check", True, ht.TBool),
4840
    ]
4841

    
4842
  def CheckArguments(self):
4843
    """Check arguments.
4844

4845
    """
4846
    if self.op.ip_check and not self.op.name_check:
4847
      # TODO: make the ip check more flexible and not depend on the name check
4848
      raise errors.OpPrereqError("Cannot do ip check without a name check",
4849
                                 errors.ECODE_INVAL)
4850

    
4851
  def BuildHooksEnv(self):
4852
    """Build hooks env.
4853

4854
    This runs on master, primary and secondary nodes of the instance.
4855

4856
    """
4857
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4858
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4859
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4860
    return env, nl, nl
4861

    
4862
  def CheckPrereq(self):
4863
    """Check prerequisites.
4864

4865
    This checks that the instance is in the cluster and is not running.
4866

4867
    """
4868
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4869
                                                self.op.instance_name)
4870
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4871
    assert instance is not None
4872
    _CheckNodeOnline(self, instance.primary_node)
4873
    _CheckInstanceDown(self, instance, "cannot rename")
4874
    self.instance = instance
4875

    
4876
    new_name = self.op.new_name
4877
    if self.op.name_check:
4878
      hostname = netutils.GetHostname(name=new_name)
4879
      new_name = self.op.new_name = hostname.name
4880
      if (self.op.ip_check and
4881
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
4882
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4883
                                   (hostname.ip, new_name),
4884
                                   errors.ECODE_NOTUNIQUE)
4885

    
4886
    instance_list = self.cfg.GetInstanceList()
4887
    if new_name in instance_list:
4888
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4889
                                 new_name, errors.ECODE_EXISTS)
4890

    
4891
  def Exec(self, feedback_fn):
4892
    """Reinstall the instance.
4893

4894
    """
4895
    inst = self.instance
4896
    old_name = inst.name
4897

    
4898
    if inst.disk_template == constants.DT_FILE:
4899
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4900

    
4901
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4902
    # Change the instance lock. This is definitely safe while we hold the BGL
4903
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4904
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4905

    
4906
    # re-read the instance from the configuration after rename
4907
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4908

    
4909
    if inst.disk_template == constants.DT_FILE:
4910
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4911
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4912
                                                     old_file_storage_dir,
4913
                                                     new_file_storage_dir)
4914
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4915
                   " (but the instance has been renamed in Ganeti)" %
4916
                   (inst.primary_node, old_file_storage_dir,
4917
                    new_file_storage_dir))
4918

    
4919
    _StartInstanceDisks(self, inst, None)
4920
    try:
4921
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4922
                                                 old_name, self.op.debug_level)
4923
      msg = result.fail_msg
4924
      if msg:
4925
        msg = ("Could not run OS rename script for instance %s on node %s"
4926
               " (but the instance has been renamed in Ganeti): %s" %
4927
               (inst.name, inst.primary_node, msg))
4928
        self.proc.LogWarning(msg)
4929
    finally:
4930
      _ShutdownInstanceDisks(self, inst)
4931

    
4932
    return inst.name
4933

    
4934

    
4935
class LURemoveInstance(LogicalUnit):
4936
  """Remove an instance.
4937

4938
  """
4939
  HPATH = "instance-remove"
4940
  HTYPE = constants.HTYPE_INSTANCE
4941
  _OP_PARAMS = [
4942
    _PInstanceName,
4943
    ("ignore_failures", False, ht.TBool),
4944
    _PShutdownTimeout,
4945
    ]
4946
  REQ_BGL = False
4947

    
4948
  def ExpandNames(self):
4949
    self._ExpandAndLockInstance()
4950
    self.needed_locks[locking.LEVEL_NODE] = []
4951
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4952

    
4953
  def DeclareLocks(self, level):
4954
    if level == locking.LEVEL_NODE:
4955
      self._LockInstancesNodes()
4956

    
4957
  def BuildHooksEnv(self):
4958
    """Build hooks env.
4959

4960
    This runs on master, primary and secondary nodes of the instance.
4961

4962
    """
4963
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4964
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4965
    nl = [self.cfg.GetMasterNode()]
4966
    nl_post = list(self.instance.all_nodes) + nl
4967
    return env, nl, nl_post
4968

    
4969
  def CheckPrereq(self):
4970
    """Check prerequisites.
4971

4972
    This checks that the instance is in the cluster.
4973

4974
    """
4975
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4976
    assert self.instance is not None, \
4977
      "Cannot retrieve locked instance %s" % self.op.instance_name
4978

    
4979
  def Exec(self, feedback_fn):
4980
    """Remove the instance.
4981

4982
    """
4983
    instance = self.instance
4984
    logging.info("Shutting down instance %s on node %s",
4985
                 instance.name, instance.primary_node)
4986

    
4987
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4988
                                             self.op.shutdown_timeout)
4989
    msg = result.fail_msg
4990
    if msg:
4991
      if self.op.ignore_failures:
4992
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4993
      else:
4994
        raise errors.OpExecError("Could not shutdown instance %s on"
4995
                                 " node %s: %s" %
4996
                                 (instance.name, instance.primary_node, msg))
4997

    
4998
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4999

    
5000

    
5001
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5002
  """Utility function to remove an instance.
5003

5004
  """
5005
  logging.info("Removing block devices for instance %s", instance.name)
5006

    
5007
  if not _RemoveDisks(lu, instance):
5008
    if not ignore_failures:
5009
      raise errors.OpExecError("Can't remove instance's disks")
5010
    feedback_fn("Warning: can't remove instance's disks")
5011

    
5012
  logging.info("Removing instance %s out of cluster config", instance.name)
5013

    
5014
  lu.cfg.RemoveInstance(instance.name)
5015

    
5016
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5017
    "Instance lock removal conflict"
5018

    
5019
  # Remove lock for the instance
5020
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5021

    
5022

    
5023
class LUQueryInstances(NoHooksLU):
5024
  """Logical unit for querying instances.
5025

5026
  """
5027
  # pylint: disable-msg=W0142
5028
  _OP_PARAMS = [
5029
    ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
5030
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5031
    ("use_locking", False, ht.TBool),
5032
    ]
5033
  REQ_BGL = False
5034
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5035
                    "serial_no", "ctime", "mtime", "uuid"]
5036
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5037
                                    "admin_state",
5038
                                    "disk_template", "ip", "mac", "bridge",
5039
                                    "nic_mode", "nic_link",
5040
                                    "sda_size", "sdb_size", "vcpus", "tags",
5041
                                    "network_port", "beparams",
5042
                                    r"(disk)\.(size)/([0-9]+)",
5043
                                    r"(disk)\.(sizes)", "disk_usage",
5044
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5045
                                    r"(nic)\.(bridge)/([0-9]+)",
5046
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
5047
                                    r"(disk|nic)\.(count)",
5048
                                    "hvparams", "custom_hvparams",
5049
                                    "custom_beparams", "custom_nicparams",
5050
                                    ] + _SIMPLE_FIELDS +
5051
                                  ["hv/%s" % name
5052
                                   for name in constants.HVS_PARAMETERS
5053
                                   if name not in constants.HVC_GLOBALS] +
5054
                                  ["be/%s" % name
5055
                                   for name in constants.BES_PARAMETERS])
5056
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5057
                                   "oper_ram",
5058
                                   "oper_vcpus",
5059
                                   "status")
5060

    
5061

    
5062
  def CheckArguments(self):
5063
    _CheckOutputFields(static=self._FIELDS_STATIC,
5064
                       dynamic=self._FIELDS_DYNAMIC,
5065
                       selected=self.op.output_fields)
5066

    
5067
  def ExpandNames(self):
5068
    self.needed_locks = {}
5069
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5070
    self.share_locks[locking.LEVEL_NODE] = 1
5071

    
5072
    if self.op.names:
5073
      self.wanted = _GetWantedInstances(self, self.op.names)
5074
    else:
5075
      self.wanted = locking.ALL_SET
5076

    
5077
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5078
    self.do_locking = self.do_node_query and self.op.use_locking
5079
    if self.do_locking:
5080
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5081
      self.needed_locks[locking.LEVEL_NODE] = []
5082
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5083

    
5084
  def DeclareLocks(self, level):
5085
    if level == locking.LEVEL_NODE and self.do_locking:
5086
      self._LockInstancesNodes()
5087

    
5088
  def Exec(self, feedback_fn):
5089
    """Computes the list of nodes and their attributes.
5090

5091
    """
5092
    # pylint: disable-msg=R0912
5093
    # way too many branches here
5094
    all_info = self.cfg.GetAllInstancesInfo()
5095
    if self.wanted == locking.ALL_SET:
5096
      # caller didn't specify instance names, so ordering is not important
5097
      if self.do_locking:
5098
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5099
      else:
5100
        instance_names = all_info.keys()
5101
      instance_names = utils.NiceSort(instance_names)
5102
    else:
5103
      # caller did specify names, so we must keep the ordering
5104
      if self.do_locking:
5105
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5106
      else:
5107
        tgt_set = all_info.keys()
5108
      missing = set(self.wanted).difference(tgt_set)
5109
      if missing:
5110
        raise errors.OpExecError("Some instances were removed before"
5111
                                 " retrieving their data: %s" % missing)
5112
      instance_names = self.wanted
5113

    
5114
    instance_list = [all_info[iname] for iname in instance_names]
5115

    
5116
    # begin data gathering
5117

    
5118
    nodes = frozenset([inst.primary_node for inst in instance_list])
5119
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
5120

    
5121
    bad_nodes = []
5122
    off_nodes = []
5123
    if self.do_node_query:
5124
      live_data = {}
5125
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5126
      for name in nodes:
5127
        result = node_data[name]
5128
        if result.offline:
5129
          # offline nodes will be in both lists
5130
          off_nodes.append(name)
5131
        if result.fail_msg:
5132
          bad_nodes.append(name)
5133
        else:
5134
          if result.payload:
5135
            live_data.update(result.payload)
5136
          # else no instance is alive
5137
    else:
5138
      live_data = dict([(name, {}) for name in instance_names])
5139

    
5140
    # end data gathering
5141

    
5142
    HVPREFIX = "hv/"
5143
    BEPREFIX = "be/"
5144
    output = []
5145
    cluster = self.cfg.GetClusterInfo()
5146
    for instance in instance_list:
5147
      iout = []
5148
      i_hv = cluster.FillHV(instance, skip_globals=True)
5149
      i_be = cluster.FillBE(instance)
5150
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5151
      for field in self.op.output_fields:
5152
        st_match = self._FIELDS_STATIC.Matches(field)
5153
        if field in self._SIMPLE_FIELDS:
5154
          val = getattr(instance, field)
5155
        elif field == "pnode":
5156
          val = instance.primary_node
5157
        elif field == "snodes":
5158
          val = list(instance.secondary_nodes)
5159
        elif field == "admin_state":
5160
          val = instance.admin_up
5161
        elif field == "oper_state":
5162
          if instance.primary_node in bad_nodes:
5163
            val = None
5164
          else:
5165
            val = bool(live_data.get(instance.name))
5166
        elif field == "status":
5167
          if instance.primary_node in off_nodes:
5168
            val = "ERROR_nodeoffline"
5169
          elif instance.primary_node in bad_nodes:
5170
            val = "ERROR_nodedown"
5171
          else:
5172
            running = bool(live_data.get(instance.name))
5173
            if running:
5174
              if instance.admin_up:
5175
                val = "running"
5176
              else:
5177
                val = "ERROR_up"
5178
            else:
5179
              if instance.admin_up:
5180
                val = "ERROR_down"
5181
              else:
5182
                val = "ADMIN_down"
5183
        elif field == "oper_ram":
5184
          if instance.primary_node in bad_nodes:
5185
            val = None
5186
          elif instance.name in live_data:
5187
            val = live_data[instance.name].get("memory", "?")
5188
          else:
5189
            val = "-"
5190
        elif field == "oper_vcpus":
5191
          if instance.primary_node in bad_nodes:
5192
            val = None
5193
          elif instance.name in live_data:
5194
            val = live_data[instance.name].get("vcpus", "?")
5195
          else:
5196
            val = "-"
5197
        elif field == "vcpus":
5198
          val = i_be[constants.BE_VCPUS]
5199
        elif field == "disk_template":
5200
          val = instance.disk_template
5201
        elif field == "ip":
5202
          if instance.nics:
5203
            val = instance.nics[0].ip
5204
          else:
5205
            val = None
5206
        elif field == "nic_mode":
5207
          if instance.nics:
5208
            val = i_nicp[0][constants.NIC_MODE]
5209
          else:
5210
            val = None
5211
        elif field == "nic_link":
5212
          if instance.nics:
5213
            val = i_nicp[0][constants.NIC_LINK]
5214
          else:
5215
            val = None
5216
        elif field == "bridge":
5217
          if (instance.nics and
5218
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5219
            val = i_nicp[0][constants.NIC_LINK]
5220
          else:
5221
            val = None
5222
        elif field == "mac":
5223
          if instance.nics:
5224
            val = instance.nics[0].mac
5225
          else:
5226
            val = None
5227
        elif field == "custom_nicparams":
5228
          val = [nic.nicparams for nic in instance.nics]
5229
        elif field == "sda_size" or field == "sdb_size":
5230
          idx = ord(field[2]) - ord('a')
5231
          try:
5232
            val = instance.FindDisk(idx).size
5233
          except errors.OpPrereqError:
5234
            val = None
5235
        elif field == "disk_usage": # total disk usage per node
5236
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5237
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5238
        elif field == "tags":
5239
          val = list(instance.GetTags())
5240
        elif field == "custom_hvparams":
5241
          val = instance.hvparams # not filled!
5242
        elif field == "hvparams":
5243
          val = i_hv
5244
        elif (field.startswith(HVPREFIX) and
5245
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5246
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5247
          val = i_hv.get(field[len(HVPREFIX):], None)
5248
        elif field == "custom_beparams":
5249
          val = instance.beparams
5250
        elif field == "beparams":
5251
          val = i_be
5252
        elif (field.startswith(BEPREFIX) and
5253
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5254
          val = i_be.get(field[len(BEPREFIX):], None)
5255
        elif st_match and st_match.groups():
5256
          # matches a variable list
5257
          st_groups = st_match.groups()
5258
          if st_groups and st_groups[0] == "disk":
5259
            if st_groups[1] == "count":
5260
              val = len(instance.disks)
5261
            elif st_groups[1] == "sizes":
5262
              val = [disk.size for disk in instance.disks]
5263
            elif st_groups[1] == "size":
5264
              try:
5265
                val = instance.FindDisk(st_groups[2]).size
5266
              except errors.OpPrereqError:
5267
                val = None
5268
            else:
5269
              assert False, "Unhandled disk parameter"
5270
          elif st_groups[0] == "nic":
5271
            if st_groups[1] == "count":
5272
              val = len(instance.nics)
5273
            elif st_groups[1] == "macs":
5274
              val = [nic.mac for nic in instance.nics]
5275
            elif st_groups[1] == "ips":
5276
              val = [nic.ip for nic in instance.nics]
5277
            elif st_groups[1] == "modes":
5278
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5279
            elif st_groups[1] == "links":
5280
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5281
            elif st_groups[1] == "bridges":
5282
              val = []
5283
              for nicp in i_nicp:
5284
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5285
                  val.append(nicp[constants.NIC_LINK])
5286
                else:
5287
                  val.append(None)
5288
            else:
5289
              # index-based item
5290
              nic_idx = int(st_groups[2])
5291
              if nic_idx >= len(instance.nics):
5292
                val = None
5293
              else:
5294
                if st_groups[1] == "mac":
5295
                  val = instance.nics[nic_idx].mac
5296
                elif st_groups[1] == "ip":
5297
                  val = instance.nics[nic_idx].ip
5298
                elif st_groups[1] == "mode":
5299
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5300
                elif st_groups[1] == "link":
5301
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5302
                elif st_groups[1] == "bridge":
5303
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5304
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5305
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5306
                  else:
5307
                    val = None
5308
                else:
5309
                  assert False, "Unhandled NIC parameter"
5310
          else:
5311
            assert False, ("Declared but unhandled variable parameter '%s'" %
5312
                           field)
5313
        else:
5314
          assert False, "Declared but unhandled parameter '%s'" % field
5315
        iout.append(val)
5316
      output.append(iout)
5317

    
5318
    return output
5319

    
5320

    
5321
class LUFailoverInstance(LogicalUnit):
5322
  """Failover an instance.
5323

5324
  """
5325
  HPATH = "instance-failover"
5326
  HTYPE = constants.HTYPE_INSTANCE
5327
  _OP_PARAMS = [
5328
    _PInstanceName,
5329
    ("ignore_consistency", False, ht.TBool),
5330
    _PShutdownTimeout,
5331
    ]
5332
  REQ_BGL = False
5333

    
5334
  def ExpandNames(self):
5335
    self._ExpandAndLockInstance()
5336
    self.needed_locks[locking.LEVEL_NODE] = []
5337
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5338

    
5339
  def DeclareLocks(self, level):
5340
    if level == locking.LEVEL_NODE:
5341
      self._LockInstancesNodes()
5342

    
5343
  def BuildHooksEnv(self):
5344
    """Build hooks env.
5345

5346
    This runs on master, primary and secondary nodes of the instance.
5347

5348
    """
5349
    instance = self.instance
5350
    source_node = instance.primary_node
5351
    target_node = instance.secondary_nodes[0]
5352
    env = {
5353
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5354
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5355
      "OLD_PRIMARY": source_node,
5356
      "OLD_SECONDARY": target_node,
5357
      "NEW_PRIMARY": target_node,
5358
      "NEW_SECONDARY": source_node,
5359
      }
5360
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5361
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5362
    nl_post = list(nl)
5363
    nl_post.append(source_node)
5364
    return env, nl, nl_post
5365

    
5366
  def CheckPrereq(self):
5367
    """Check prerequisites.
5368

5369
    This checks that the instance is in the cluster.
5370

5371
    """
5372
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5373
    assert self.instance is not None, \
5374
      "Cannot retrieve locked instance %s" % self.op.instance_name
5375

    
5376
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5377
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5378
      raise errors.OpPrereqError("Instance's disk layout is not"
5379
                                 " network mirrored, cannot failover.",
5380
                                 errors.ECODE_STATE)
5381

    
5382
    secondary_nodes = instance.secondary_nodes
5383
    if not secondary_nodes:
5384
      raise errors.ProgrammerError("no secondary node but using "
5385
                                   "a mirrored disk template")
5386

    
5387
    target_node = secondary_nodes[0]
5388
    _CheckNodeOnline(self, target_node)
5389
    _CheckNodeNotDrained(self, target_node)
5390
    if instance.admin_up:
5391
      # check memory requirements on the secondary node
5392
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5393
                           instance.name, bep[constants.BE_MEMORY],
5394
                           instance.hypervisor)
5395
    else:
5396
      self.LogInfo("Not checking memory on the secondary node as"
5397
                   " instance will not be started")
5398

    
5399
    # check bridge existance
5400
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5401

    
5402
  def Exec(self, feedback_fn):
5403
    """Failover an instance.
5404

5405
    The failover is done by shutting it down on its present node and
5406
    starting it on the secondary.
5407

5408
    """
5409
    instance = self.instance
5410
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5411

    
5412
    source_node = instance.primary_node
5413
    target_node = instance.secondary_nodes[0]
5414

    
5415
    if instance.admin_up:
5416
      feedback_fn("* checking disk consistency between source and target")
5417
      for dev in instance.disks:
5418
        # for drbd, these are drbd over lvm
5419
        if not _CheckDiskConsistency(self, dev, target_node, False):
5420
          if not self.op.ignore_consistency:
5421
            raise errors.OpExecError("Disk %s is degraded on target node,"
5422
                                     " aborting failover." % dev.iv_name)
5423
    else:
5424
      feedback_fn("* not checking disk consistency as instance is not running")
5425

    
5426
    feedback_fn("* shutting down instance on source node")
5427
    logging.info("Shutting down instance %s on node %s",
5428
                 instance.name, source_node)
5429

    
5430
    result = self.rpc.call_instance_shutdown(source_node, instance,
5431
                                             self.op.shutdown_timeout)
5432
    msg = result.fail_msg
5433
    if msg:
5434
      if self.op.ignore_consistency or primary_node.offline:
5435
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5436
                             " Proceeding anyway. Please make sure node"
5437
                             " %s is down. Error details: %s",
5438
                             instance.name, source_node, source_node, msg)
5439
      else:
5440
        raise errors.OpExecError("Could not shutdown instance %s on"
5441
                                 " node %s: %s" %
5442
                                 (instance.name, source_node, msg))
5443

    
5444
    feedback_fn("* deactivating the instance's disks on source node")
5445
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5446
      raise errors.OpExecError("Can't shut down the instance's disks.")
5447

    
5448
    instance.primary_node = target_node
5449
    # distribute new instance config to the other nodes
5450
    self.cfg.Update(instance, feedback_fn)
5451

    
5452
    # Only start the instance if it's marked as up
5453
    if instance.admin_up:
5454
      feedback_fn("* activating the instance's disks on target node")
5455
      logging.info("Starting instance %s on node %s",
5456
                   instance.name, target_node)
5457

    
5458
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5459
                                           ignore_secondaries=True)
5460
      if not disks_ok:
5461
        _ShutdownInstanceDisks(self, instance)
5462
        raise errors.OpExecError("Can't activate the instance's disks")
5463

    
5464
      feedback_fn("* starting the instance on the target node")
5465
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5466
      msg = result.fail_msg
5467
      if msg:
5468
        _ShutdownInstanceDisks(self, instance)
5469
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5470
                                 (instance.name, target_node, msg))
5471

    
5472

    
5473
class LUMigrateInstance(LogicalUnit):
5474
  """Migrate an instance.
5475

5476
  This is migration without shutting down, compared to the failover,
5477
  which is done with shutdown.
5478

5479
  """
5480
  HPATH = "instance-migrate"
5481
  HTYPE = constants.HTYPE_INSTANCE
5482
  _OP_PARAMS = [
5483
    _PInstanceName,
5484
    _PMigrationMode,
5485
    _PMigrationLive,
5486
    ("cleanup", False, ht.TBool),
5487
    ]
5488

    
5489
  REQ_BGL = False
5490

    
5491
  def ExpandNames(self):
5492
    self._ExpandAndLockInstance()
5493

    
5494
    self.needed_locks[locking.LEVEL_NODE] = []
5495
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5496

    
5497
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5498
                                       self.op.cleanup)
5499
    self.tasklets = [self._migrater]
5500

    
5501
  def DeclareLocks(self, level):
5502
    if level == locking.LEVEL_NODE:
5503
      self._LockInstancesNodes()
5504

    
5505
  def BuildHooksEnv(self):
5506
    """Build hooks env.
5507

5508
    This runs on master, primary and secondary nodes of the instance.
5509

5510
    """
5511
    instance = self._migrater.instance
5512
    source_node = instance.primary_node
5513
    target_node = instance.secondary_nodes[0]
5514
    env = _BuildInstanceHookEnvByObject(self, instance)
5515
    env["MIGRATE_LIVE"] = self._migrater.live
5516
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5517
    env.update({
5518
        "OLD_PRIMARY": source_node,
5519
        "OLD_SECONDARY": target_node,
5520
        "NEW_PRIMARY": target_node,
5521
        "NEW_SECONDARY": source_node,
5522
        })
5523
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5524
    nl_post = list(nl)
5525
    nl_post.append(source_node)
5526
    return env, nl, nl_post
5527

    
5528

    
5529
class LUMoveInstance(LogicalUnit):
5530
  """Move an instance by data-copying.
5531

5532
  """
5533
  HPATH = "instance-move"
5534
  HTYPE = constants.HTYPE_INSTANCE
5535
  _OP_PARAMS = [
5536
    _PInstanceName,
5537
    ("target_node", ht.NoDefault, ht.TNonEmptyString),
5538
    _PShutdownTimeout,
5539
    ]
5540
  REQ_BGL = False
5541

    
5542
  def ExpandNames(self):
5543
    self._ExpandAndLockInstance()
5544
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5545
    self.op.target_node = target_node
5546
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5547
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5548

    
5549
  def DeclareLocks(self, level):
5550
    if level == locking.LEVEL_NODE:
5551
      self._LockInstancesNodes(primary_only=True)
5552

    
5553
  def BuildHooksEnv(self):
5554
    """Build hooks env.
5555

5556
    This runs on master, primary and secondary nodes of the instance.
5557

5558
    """
5559
    env = {
5560
      "TARGET_NODE": self.op.target_node,
5561
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5562
      }
5563
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5564
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5565
                                       self.op.target_node]
5566
    return env, nl, nl
5567

    
5568
  def CheckPrereq(self):
5569
    """Check prerequisites.
5570

5571
    This checks that the instance is in the cluster.
5572

5573
    """
5574
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5575
    assert self.instance is not None, \
5576
      "Cannot retrieve locked instance %s" % self.op.instance_name
5577

    
5578
    node = self.cfg.GetNodeInfo(self.op.target_node)
5579
    assert node is not None, \
5580
      "Cannot retrieve locked node %s" % self.op.target_node
5581

    
5582
    self.target_node = target_node = node.name
5583

    
5584
    if target_node == instance.primary_node:
5585
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5586
                                 (instance.name, target_node),
5587
                                 errors.ECODE_STATE)
5588

    
5589
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5590

    
5591
    for idx, dsk in enumerate(instance.disks):
5592
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5593
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5594
                                   " cannot copy" % idx, errors.ECODE_STATE)
5595

    
5596
    _CheckNodeOnline(self, target_node)
5597
    _CheckNodeNotDrained(self, target_node)
5598

    
5599
    if instance.admin_up:
5600
      # check memory requirements on the secondary node
5601
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5602
                           instance.name, bep[constants.BE_MEMORY],
5603
                           instance.hypervisor)
5604
    else:
5605
      self.LogInfo("Not checking memory on the secondary node as"
5606
                   " instance will not be started")
5607

    
5608
    # check bridge existance
5609
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5610

    
5611
  def Exec(self, feedback_fn):
5612
    """Move an instance.
5613

5614
    The move is done by shutting it down on its present node, copying
5615
    the data over (slow) and starting it on the new node.
5616

5617
    """
5618
    instance = self.instance
5619

    
5620
    source_node = instance.primary_node
5621
    target_node = self.target_node
5622

    
5623
    self.LogInfo("Shutting down instance %s on source node %s",
5624
                 instance.name, source_node)
5625

    
5626
    result = self.rpc.call_instance_shutdown(source_node, instance,
5627
                                             self.op.shutdown_timeout)
5628
    msg = result.fail_msg
5629
    if msg:
5630
      if self.op.ignore_consistency:
5631
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5632
                             " Proceeding anyway. Please make sure node"
5633
                             " %s is down. Error details: %s",
5634
                             instance.name, source_node, source_node, msg)
5635
      else:
5636
        raise errors.OpExecError("Could not shutdown instance %s on"
5637
                                 " node %s: %s" %
5638
                                 (instance.name, source_node, msg))
5639

    
5640
    # create the target disks
5641
    try:
5642
      _CreateDisks(self, instance, target_node=target_node)
5643
    except errors.OpExecError:
5644
      self.LogWarning("Device creation failed, reverting...")
5645
      try:
5646
        _RemoveDisks(self, instance, target_node=target_node)
5647
      finally:
5648
        self.cfg.ReleaseDRBDMinors(instance.name)
5649
        raise
5650

    
5651
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5652

    
5653
    errs = []
5654
    # activate, get path, copy the data over
5655
    for idx, disk in enumerate(instance.disks):
5656
      self.LogInfo("Copying data for disk %d", idx)
5657
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5658
                                               instance.name, True)
5659
      if result.fail_msg:
5660
        self.LogWarning("Can't assemble newly created disk %d: %s",
5661
                        idx, result.fail_msg)
5662
        errs.append(result.fail_msg)
5663
        break
5664
      dev_path = result.payload
5665
      result = self.rpc.call_blockdev_export(source_node, disk,
5666
                                             target_node, dev_path,
5667
                                             cluster_name)
5668
      if result.fail_msg:
5669
        self.LogWarning("Can't copy data over for disk %d: %s",
5670
                        idx, result.fail_msg)
5671
        errs.append(result.fail_msg)
5672
        break
5673

    
5674
    if errs:
5675
      self.LogWarning("Some disks failed to copy, aborting")
5676
      try:
5677
        _RemoveDisks(self, instance, target_node=target_node)
5678
      finally:
5679
        self.cfg.ReleaseDRBDMinors(instance.name)
5680
        raise errors.OpExecError("Errors during disk copy: %s" %
5681
                                 (",".join(errs),))
5682

    
5683
    instance.primary_node = target_node
5684
    self.cfg.Update(instance, feedback_fn)
5685

    
5686
    self.LogInfo("Removing the disks on the original node")
5687
    _RemoveDisks(self, instance, target_node=source_node)
5688

    
5689
    # Only start the instance if it's marked as up
5690
    if instance.admin_up:
5691
      self.LogInfo("Starting instance %s on node %s",
5692
                   instance.name, target_node)
5693

    
5694
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5695
                                           ignore_secondaries=True)
5696
      if not disks_ok:
5697
        _ShutdownInstanceDisks(self, instance)
5698
        raise errors.OpExecError("Can't activate the instance's disks")
5699

    
5700
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5701
      msg = result.fail_msg
5702
      if msg:
5703
        _ShutdownInstanceDisks(self, instance)
5704
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5705
                                 (instance.name, target_node, msg))
5706

    
5707

    
5708
class LUMigrateNode(LogicalUnit):
5709
  """Migrate all instances from a node.
5710

5711
  """
5712
  HPATH = "node-migrate"
5713
  HTYPE = constants.HTYPE_NODE
5714
  _OP_PARAMS = [
5715
    _PNodeName,
5716
    _PMigrationMode,
5717
    _PMigrationLive,
5718
    ]
5719
  REQ_BGL = False
5720

    
5721
  def ExpandNames(self):
5722
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5723

    
5724
    self.needed_locks = {
5725
      locking.LEVEL_NODE: [self.op.node_name],
5726
      }
5727

    
5728
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5729

    
5730
    # Create tasklets for migrating instances for all instances on this node
5731
    names = []
5732
    tasklets = []
5733

    
5734
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5735
      logging.debug("Migrating instance %s", inst.name)
5736
      names.append(inst.name)
5737

    
5738
      tasklets.append(TLMigrateInstance(self, inst.name, False))
5739

    
5740
    self.tasklets = tasklets
5741

    
5742
    # Declare instance locks
5743
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5744

    
5745
  def DeclareLocks(self, level):
5746
    if level == locking.LEVEL_NODE:
5747
      self._LockInstancesNodes()
5748

    
5749
  def BuildHooksEnv(self):
5750
    """Build hooks env.
5751

5752
    This runs on the master, the primary and all the secondaries.
5753

5754
    """
5755
    env = {
5756
      "NODE_NAME": self.op.node_name,
5757
      }
5758

    
5759
    nl = [self.cfg.GetMasterNode()]
5760

    
5761
    return (env, nl, nl)
5762

    
5763

    
5764
class TLMigrateInstance(Tasklet):
5765
  """Tasklet class for instance migration.
5766

5767
  @type live: boolean
5768
  @ivar live: whether the migration will be done live or non-live;
5769
      this variable is initalized only after CheckPrereq has run
5770

5771
  """
5772
  def __init__(self, lu, instance_name, cleanup):
5773
    """Initializes this class.
5774

5775
    """
5776
    Tasklet.__init__(self, lu)
5777

    
5778
    # Parameters
5779
    self.instance_name = instance_name
5780
    self.cleanup = cleanup
5781
    self.live = False # will be overridden later
5782

    
5783
  def CheckPrereq(self):
5784
    """Check prerequisites.
5785

5786
    This checks that the instance is in the cluster.
5787

5788
    """
5789
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5790
    instance = self.cfg.GetInstanceInfo(instance_name)
5791
    assert instance is not None
5792

    
5793
    if instance.disk_template != constants.DT_DRBD8:
5794
      raise errors.OpPrereqError("Instance's disk layout is not"
5795
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5796

    
5797
    secondary_nodes = instance.secondary_nodes
5798
    if not secondary_nodes:
5799
      raise errors.ConfigurationError("No secondary node but using"
5800
                                      " drbd8 disk template")
5801

    
5802
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5803

    
5804
    target_node = secondary_nodes[0]
5805
    # check memory requirements on the secondary node
5806
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5807
                         instance.name, i_be[constants.BE_MEMORY],
5808
                         instance.hypervisor)
5809

    
5810
    # check bridge existance
5811
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5812

    
5813
    if not self.cleanup:
5814
      _CheckNodeNotDrained(self.lu, target_node)
5815
      result = self.rpc.call_instance_migratable(instance.primary_node,
5816
                                                 instance)
5817
      result.Raise("Can't migrate, please use failover",
5818
                   prereq=True, ecode=errors.ECODE_STATE)
5819

    
5820
    self.instance = instance
5821

    
5822
    if self.lu.op.live is not None and self.lu.op.mode is not None:
5823
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5824
                                 " parameters are accepted",
5825
                                 errors.ECODE_INVAL)
5826
    if self.lu.op.live is not None:
5827
      if self.lu.op.live:
5828
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
5829
      else:
5830
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5831
      # reset the 'live' parameter to None so that repeated
5832
      # invocations of CheckPrereq do not raise an exception
5833
      self.lu.op.live = None
5834
    elif self.lu.op.mode is None:
5835
      # read the default value from the hypervisor
5836
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5837
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5838

    
5839
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5840

    
5841
  def _WaitUntilSync(self):
5842
    """Poll with custom rpc for disk sync.
5843

5844
    This uses our own step-based rpc call.
5845

5846
    """
5847
    self.feedback_fn("* wait until resync is done")
5848
    all_done = False
5849
    while not all_done:
5850
      all_done = True
5851
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5852
                                            self.nodes_ip,
5853
                                            self.instance.disks)
5854
      min_percent = 100
5855
      for node, nres in result.items():
5856
        nres.Raise("Cannot resync disks on node %s" % node)
5857
        node_done, node_percent = nres.payload
5858
        all_done = all_done and node_done
5859
        if node_percent is not None:
5860
          min_percent = min(min_percent, node_percent)
5861
      if not all_done:
5862
        if min_percent < 100:
5863
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5864
        time.sleep(2)
5865

    
5866
  def _EnsureSecondary(self, node):
5867
    """Demote a node to secondary.
5868

5869
    """
5870
    self.feedback_fn("* switching node %s to secondary mode" % node)
5871

    
5872
    for dev in self.instance.disks:
5873
      self.cfg.SetDiskID(dev, node)
5874

    
5875
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5876
                                          self.instance.disks)
5877
    result.Raise("Cannot change disk to secondary on node %s" % node)
5878

    
5879
  def _GoStandalone(self):
5880
    """Disconnect from the network.
5881

5882
    """
5883
    self.feedback_fn("* changing into standalone mode")
5884
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5885
                                               self.instance.disks)
5886
    for node, nres in result.items():
5887
      nres.Raise("Cannot disconnect disks node %s" % node)
5888

    
5889
  def _GoReconnect(self, multimaster):
5890
    """Reconnect to the network.
5891

5892
    """
5893
    if multimaster:
5894
      msg = "dual-master"
5895
    else:
5896
      msg = "single-master"
5897
    self.feedback_fn("* changing disks into %s mode" % msg)
5898
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5899
                                           self.instance.disks,
5900
                                           self.instance.name, multimaster)
5901
    for node, nres in result.items():
5902
      nres.Raise("Cannot change disks config on node %s" % node)
5903

    
5904
  def _ExecCleanup(self):
5905
    """Try to cleanup after a failed migration.
5906

5907
    The cleanup is done by:
5908
      - check that the instance is running only on one node
5909
        (and update the config if needed)
5910
      - change disks on its secondary node to secondary
5911
      - wait until disks are fully synchronized
5912
      - disconnect from the network
5913
      - change disks into single-master mode
5914
      - wait again until disks are fully synchronized
5915

5916
    """
5917
    instance = self.instance
5918
    target_node = self.target_node
5919
    source_node = self.source_node
5920

    
5921
    # check running on only one node
5922
    self.feedback_fn("* checking where the instance actually runs"
5923
                     " (if this hangs, the hypervisor might be in"
5924
                     " a bad state)")
5925
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5926
    for node, result in ins_l.items():
5927
      result.Raise("Can't contact node %s" % node)
5928

    
5929
    runningon_source = instance.name in ins_l[source_node].payload
5930
    runningon_target = instance.name in ins_l[target_node].payload
5931

    
5932
    if runningon_source and runningon_target:
5933
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5934
                               " or the hypervisor is confused. You will have"
5935
                               " to ensure manually that it runs only on one"
5936
                               " and restart this operation.")
5937

    
5938
    if not (runningon_source or runningon_target):
5939
      raise errors.OpExecError("Instance does not seem to be running at all."
5940
                               " In this case, it's safer to repair by"
5941
                               " running 'gnt-instance stop' to ensure disk"
5942
                               " shutdown, and then restarting it.")
5943

    
5944
    if runningon_target:
5945
      # the migration has actually succeeded, we need to update the config
5946
      self.feedback_fn("* instance running on secondary node (%s),"
5947
                       " updating config" % target_node)
5948
      instance.primary_node = target_node
5949
      self.cfg.Update(instance, self.feedback_fn)
5950
      demoted_node = source_node
5951
    else:
5952
      self.feedback_fn("* instance confirmed to be running on its"
5953
                       " primary node (%s)" % source_node)
5954
      demoted_node = target_node
5955

    
5956
    self._EnsureSecondary(demoted_node)
5957
    try:
5958
      self._WaitUntilSync()
5959
    except errors.OpExecError:
5960
      # we ignore here errors, since if the device is standalone, it
5961
      # won't be able to sync
5962
      pass
5963
    self._GoStandalone()
5964
    self._GoReconnect(False)
5965
    self._WaitUntilSync()
5966

    
5967
    self.feedback_fn("* done")
5968

    
5969
  def _RevertDiskStatus(self):
5970
    """Try to revert the disk status after a failed migration.
5971

5972
    """
5973
    target_node = self.target_node
5974
    try:
5975
      self._EnsureSecondary(target_node)
5976
      self._GoStandalone()
5977
      self._GoReconnect(False)
5978
      self._WaitUntilSync()
5979
    except errors.OpExecError, err:
5980
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5981
                         " drives: error '%s'\n"
5982
                         "Please look and recover the instance status" %
5983
                         str(err))
5984

    
5985
  def _AbortMigration(self):
5986
    """Call the hypervisor code to abort a started migration.
5987

5988
    """
5989
    instance = self.instance
5990
    target_node = self.target_node
5991
    migration_info = self.migration_info
5992

    
5993
    abort_result = self.rpc.call_finalize_migration(target_node,
5994
                                                    instance,
5995
                                                    migration_info,
5996
                                                    False)
5997
    abort_msg = abort_result.fail_msg
5998
    if abort_msg:
5999
      logging.error("Aborting migration failed on target node %s: %s",
6000
                    target_node, abort_msg)
6001
      # Don't raise an exception here, as we stil have to try to revert the
6002
      # disk status, even if this step failed.
6003

    
6004
  def _ExecMigration(self):
6005
    """Migrate an instance.
6006

6007
    The migrate is done by:
6008
      - change the disks into dual-master mode
6009
      - wait until disks are fully synchronized again
6010
      - migrate the instance
6011
      - change disks on the new secondary node (the old primary) to secondary
6012
      - wait until disks are fully synchronized
6013
      - change disks into single-master mode
6014

6015
    """
6016
    instance = self.instance
6017
    target_node = self.target_node
6018
    source_node = self.source_node
6019

    
6020
    self.feedback_fn("* checking disk consistency between source and target")
6021
    for dev in instance.disks:
6022
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6023
        raise errors.OpExecError("Disk %s is degraded or not fully"
6024
                                 " synchronized on target node,"
6025
                                 " aborting migrate." % dev.iv_name)
6026

    
6027
    # First get the migration information from the remote node
6028
    result = self.rpc.call_migration_info(source_node, instance)
6029
    msg = result.fail_msg
6030
    if msg:
6031
      log_err = ("Failed fetching source migration information from %s: %s" %
6032
                 (source_node, msg))
6033
      logging.error(log_err)
6034
      raise errors.OpExecError(log_err)
6035

    
6036
    self.migration_info = migration_info = result.payload
6037

    
6038
    # Then switch the disks to master/master mode
6039
    self._EnsureSecondary(target_node)
6040
    self._GoStandalone()
6041
    self._GoReconnect(True)
6042
    self._WaitUntilSync()
6043

    
6044
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6045
    result = self.rpc.call_accept_instance(target_node,
6046
                                           instance,
6047
                                           migration_info,
6048
                                           self.nodes_ip[target_node])
6049

    
6050
    msg = result.fail_msg
6051
    if msg:
6052
      logging.error("Instance pre-migration failed, trying to revert"
6053
                    " disk status: %s", msg)
6054
      self.feedback_fn("Pre-migration failed, aborting")
6055
      self._AbortMigration()
6056
      self._RevertDiskStatus()
6057
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6058
                               (instance.name, msg))
6059

    
6060
    self.feedback_fn("* migrating instance to %s" % target_node)
6061
    time.sleep(10)
6062
    result = self.rpc.call_instance_migrate(source_node, instance,
6063
                                            self.nodes_ip[target_node],
6064
                                            self.live)
6065
    msg = result.fail_msg
6066
    if msg:
6067
      logging.error("Instance migration failed, trying to revert"
6068
                    " disk status: %s", msg)
6069
      self.feedback_fn("Migration failed, aborting")
6070
      self._AbortMigration()
6071
      self._RevertDiskStatus()
6072
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6073
                               (instance.name, msg))
6074
    time.sleep(10)
6075

    
6076
    instance.primary_node = target_node
6077
    # distribute new instance config to the other nodes
6078
    self.cfg.Update(instance, self.feedback_fn)
6079

    
6080
    result = self.rpc.call_finalize_migration(target_node,
6081
                                              instance,
6082
                                              migration_info,
6083
                                              True)
6084
    msg = result.fail_msg
6085
    if msg:
6086
      logging.error("Instance migration succeeded, but finalization failed:"
6087
                    " %s", msg)
6088
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6089
                               msg)
6090

    
6091
    self._EnsureSecondary(source_node)
6092
    self._WaitUntilSync()
6093
    self._GoStandalone()
6094
    self._GoReconnect(False)
6095
    self._WaitUntilSync()
6096

    
6097
    self.feedback_fn("* done")
6098

    
6099
  def Exec(self, feedback_fn):
6100
    """Perform the migration.
6101

6102
    """
6103
    feedback_fn("Migrating instance %s" % self.instance.name)
6104

    
6105
    self.feedback_fn = feedback_fn
6106

    
6107
    self.source_node = self.instance.primary_node
6108
    self.target_node = self.instance.secondary_nodes[0]
6109
    self.all_nodes = [self.source_node, self.target_node]
6110
    self.nodes_ip = {
6111
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6112
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6113
      }
6114

    
6115
    if self.cleanup:
6116
      return self._ExecCleanup()
6117
    else:
6118
      return self._ExecMigration()
6119

    
6120

    
6121
def _CreateBlockDev(lu, node, instance, device, force_create,
6122
                    info, force_open):
6123
  """Create a tree of block devices on a given node.
6124

6125
  If this device type has to be created on secondaries, create it and
6126
  all its children.
6127

6128
  If not, just recurse to children keeping the same 'force' value.
6129

6130
  @param lu: the lu on whose behalf we execute
6131
  @param node: the node on which to create the device
6132
  @type instance: L{objects.Instance}
6133
  @param instance: the instance which owns the device
6134
  @type device: L{objects.Disk}
6135
  @param device: the device to create
6136
  @type force_create: boolean
6137
  @param force_create: whether to force creation of this device; this
6138
      will be change to True whenever we find a device which has
6139
      CreateOnSecondary() attribute
6140
  @param info: the extra 'metadata' we should attach to the device
6141
      (this will be represented as a LVM tag)
6142
  @type force_open: boolean
6143
  @param force_open: this parameter will be passes to the
6144
      L{backend.BlockdevCreate} function where it specifies
6145
      whether we run on primary or not, and it affects both
6146
      the child assembly and the device own Open() execution
6147

6148
  """
6149
  if device.CreateOnSecondary():
6150
    force_create = True
6151

    
6152
  if device.children:
6153
    for child in device.children:
6154
      _CreateBlockDev(lu, node, instance, child, force_create,
6155
                      info, force_open)
6156

    
6157
  if not force_create:
6158
    return
6159

    
6160
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6161

    
6162

    
6163
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6164
  """Create a single block device on a given node.
6165

6166
  This will not recurse over children of the device, so they must be
6167
  created in advance.
6168

6169
  @param lu: the lu on whose behalf we execute
6170
  @param node: the node on which to create the device
6171
  @type instance: L{objects.Instance}
6172
  @param instance: the instance which owns the device
6173
  @type device: L{objects.Disk}
6174
  @param device: the device to create
6175
  @param info: the extra 'metadata' we should attach to the device
6176
      (this will be represented as a LVM tag)
6177
  @type force_open: boolean
6178
  @param force_open: this parameter will be passes to the
6179
      L{backend.BlockdevCreate} function where it specifies
6180
      whether we run on primary or not, and it affects both
6181
      the child assembly and the device own Open() execution
6182

6183
  """
6184
  lu.cfg.SetDiskID(device, node)
6185
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6186
                                       instance.name, force_open, info)
6187
  result.Raise("Can't create block device %s on"
6188
               " node %s for instance %s" % (device, node, instance.name))
6189
  if device.physical_id is None:
6190
    device.physical_id = result.payload
6191

    
6192

    
6193
def _GenerateUniqueNames(lu, exts):
6194
  """Generate a suitable LV name.
6195

6196
  This will generate a logical volume name for the given instance.
6197

6198
  """
6199
  results = []
6200
  for val in exts:
6201
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6202
    results.append("%s%s" % (new_id, val))
6203
  return results
6204

    
6205

    
6206
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6207
                         p_minor, s_minor):
6208
  """Generate a drbd8 device complete with its children.
6209

6210
  """
6211
  port = lu.cfg.AllocatePort()
6212
  vgname = lu.cfg.GetVGName()
6213
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6214
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6215
                          logical_id=(vgname, names[0]))
6216
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6217
                          logical_id=(vgname, names[1]))
6218
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6219
                          logical_id=(primary, secondary, port,
6220
                                      p_minor, s_minor,
6221
                                      shared_secret),
6222
                          children=[dev_data, dev_meta],
6223
                          iv_name=iv_name)
6224
  return drbd_dev
6225

    
6226

    
6227
def _GenerateDiskTemplate(lu, template_name,
6228
                          instance_name, primary_node,
6229
                          secondary_nodes, disk_info,
6230
                          file_storage_dir, file_driver,
6231
                          base_index):
6232
  """Generate the entire disk layout for a given template type.
6233

6234
  """
6235
  #TODO: compute space requirements
6236

    
6237
  vgname = lu.cfg.GetVGName()
6238
  disk_count = len(disk_info)
6239
  disks = []
6240
  if template_name == constants.DT_DISKLESS:
6241
    pass
6242
  elif template_name == constants.DT_PLAIN:
6243
    if len(secondary_nodes) != 0:
6244
      raise errors.ProgrammerError("Wrong template configuration")
6245

    
6246
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6247
                                      for i in range(disk_count)])
6248
    for idx, disk in enumerate(disk_info):
6249
      disk_index = idx + base_index
6250
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6251
                              logical_id=(vgname, names[idx]),
6252
                              iv_name="disk/%d" % disk_index,
6253
                              mode=disk["mode"])
6254
      disks.append(disk_dev)
6255
  elif template_name == constants.DT_DRBD8:
6256
    if len(secondary_nodes) != 1:
6257
      raise errors.ProgrammerError("Wrong template configuration")
6258
    remote_node = secondary_nodes[0]
6259
    minors = lu.cfg.AllocateDRBDMinor(
6260
      [primary_node, remote_node] * len(disk_info), instance_name)
6261

    
6262
    names = []
6263
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6264
                                               for i in range(disk_count)]):
6265
      names.append(lv_prefix + "_data")
6266
      names.append(lv_prefix + "_meta")
6267
    for idx, disk in enumerate(disk_info):
6268
      disk_index = idx + base_index
6269
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6270
                                      disk["size"], names[idx*2:idx*2+2],
6271
                                      "disk/%d" % disk_index,
6272
                                      minors[idx*2], minors[idx*2+1])
6273
      disk_dev.mode = disk["mode"]
6274
      disks.append(disk_dev)
6275
  elif template_name == constants.DT_FILE:
6276
    if len(secondary_nodes) != 0:
6277
      raise errors.ProgrammerError("Wrong template configuration")
6278

    
6279
    _RequireFileStorage()
6280

    
6281
    for idx, disk in enumerate(disk_info):
6282
      disk_index = idx + base_index
6283
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6284
                              iv_name="disk/%d" % disk_index,
6285
                              logical_id=(file_driver,
6286
                                          "%s/disk%d" % (file_storage_dir,
6287
                                                         disk_index)),
6288
                              mode=disk["mode"])
6289
      disks.append(disk_dev)
6290
  else:
6291
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6292
  return disks
6293

    
6294

    
6295
def _GetInstanceInfoText(instance):
6296
  """Compute that text that should be added to the disk's metadata.
6297

6298
  """
6299
  return "originstname+%s" % instance.name
6300

    
6301

    
6302
def _CalcEta(time_taken, written, total_size):
6303
  """Calculates the ETA based on size written and total size.
6304

6305
  @param time_taken: The time taken so far
6306
  @param written: amount written so far
6307
  @param total_size: The total size of data to be written
6308
  @return: The remaining time in seconds
6309

6310
  """
6311
  avg_time = time_taken / float(written)
6312
  return (total_size - written) * avg_time
6313

    
6314

    
6315
def _WipeDisks(lu, instance):
6316
  """Wipes instance disks.
6317

6318
  @type lu: L{LogicalUnit}
6319
  @param lu: the logical unit on whose behalf we execute
6320
  @type instance: L{objects.Instance}
6321
  @param instance: the instance whose disks we should create
6322
  @return: the success of the wipe
6323

6324
  """
6325
  node = instance.primary_node
6326
  for idx, device in enumerate(instance.disks):
6327
    lu.LogInfo("* Wiping disk %d", idx)
6328
    logging.info("Wiping disk %d for instance %s", idx, instance.name)
6329

    
6330
    # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6331
    # MAX_WIPE_CHUNK at max
6332
    wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6333
                          constants.MIN_WIPE_CHUNK_PERCENT)
6334

    
6335
    offset = 0
6336
    size = device.size
6337
    last_output = 0
6338
    start_time = time.time()
6339

    
6340
    while offset < size:
6341
      wipe_size = min(wipe_chunk_size, size - offset)
6342
      result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6343
      result.Raise("Could not wipe disk %d at offset %d for size %d" %
6344
                   (idx, offset, wipe_size))
6345
      now = time.time()
6346
      offset += wipe_size
6347
      if now - last_output >= 60:
6348
        eta = _CalcEta(now - start_time, offset, size)
6349
        lu.LogInfo(" - done: %.1f%% ETA: %s" %
6350
                   (offset / float(size) * 100, utils.FormatSeconds(eta)))
6351
        last_output = now
6352

    
6353

    
6354
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6355
  """Create all disks for an instance.
6356

6357
  This abstracts away some work from AddInstance.
6358

6359
  @type lu: L{LogicalUnit}
6360
  @param lu: the logical unit on whose behalf we execute
6361
  @type instance: L{objects.Instance}
6362
  @param instance: the instance whose disks we should create
6363
  @type to_skip: list
6364
  @param to_skip: list of indices to skip
6365
  @type target_node: string
6366
  @param target_node: if passed, overrides the target node for creation
6367
  @rtype: boolean
6368
  @return: the success of the creation
6369

6370
  """
6371
  info = _GetInstanceInfoText(instance)
6372
  if target_node is None:
6373
    pnode = instance.primary_node
6374
    all_nodes = instance.all_nodes
6375
  else:
6376
    pnode = target_node
6377
    all_nodes = [pnode]
6378

    
6379
  if instance.disk_template == constants.DT_FILE:
6380
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6381
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6382

    
6383
    result.Raise("Failed to create directory '%s' on"
6384
                 " node %s" % (file_storage_dir, pnode))
6385

    
6386
  # Note: this needs to be kept in sync with adding of disks in
6387
  # LUSetInstanceParams
6388
  for idx, device in enumerate(instance.disks):
6389
    if to_skip and idx in to_skip:
6390
      continue
6391
    logging.info("Creating volume %s for instance %s",
6392
                 device.iv_name, instance.name)
6393
    #HARDCODE
6394
    for node in all_nodes:
6395
      f_create = node == pnode
6396
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6397

    
6398

    
6399
def _RemoveDisks(lu, instance, target_node=None):
6400
  """Remove all disks for an instance.
6401

6402
  This abstracts away some work from `AddInstance()` and
6403
  `RemoveInstance()`. Note that in case some of the devices couldn't
6404
  be removed, the removal will continue with the other ones (compare
6405
  with `_CreateDisks()`).
6406

6407
  @type lu: L{LogicalUnit}
6408
  @param lu: the logical unit on whose behalf we execute
6409
  @type instance: L{objects.Instance}
6410
  @param instance: the instance whose disks we should remove
6411
  @type target_node: string
6412
  @param target_node: used to override the node on which to remove the disks
6413
  @rtype: boolean
6414
  @return: the success of the removal
6415

6416
  """
6417
  logging.info("Removing block devices for instance %s", instance.name)
6418

    
6419
  all_result = True
6420
  for device in instance.disks:
6421
    if target_node:
6422
      edata = [(target_node, device)]
6423
    else:
6424
      edata = device.ComputeNodeTree(instance.primary_node)
6425
    for node, disk in edata:
6426
      lu.cfg.SetDiskID(disk, node)
6427
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6428
      if msg:
6429
        lu.LogWarning("Could not remove block device %s on node %s,"
6430
                      " continuing anyway: %s", device.iv_name, node, msg)
6431
        all_result = False
6432

    
6433
  if instance.disk_template == constants.DT_FILE:
6434
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6435
    if target_node:
6436
      tgt = target_node
6437
    else:
6438
      tgt = instance.primary_node
6439
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6440
    if result.fail_msg:
6441
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6442
                    file_storage_dir, instance.primary_node, result.fail_msg)
6443
      all_result = False
6444

    
6445
  return all_result
6446

    
6447

    
6448
def _ComputeDiskSize(disk_template, disks):
6449
  """Compute disk size requirements in the volume group
6450

6451
  """
6452
  # Required free disk space as a function of disk and swap space
6453
  req_size_dict = {
6454
    constants.DT_DISKLESS: None,
6455
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6456
    # 128 MB are added for drbd metadata for each disk
6457
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6458
    constants.DT_FILE: None,
6459
  }
6460

    
6461
  if disk_template not in req_size_dict:
6462
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6463
                                 " is unknown" %  disk_template)
6464

    
6465
  return req_size_dict[disk_template]
6466

    
6467

    
6468
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6469
  """Hypervisor parameter validation.
6470

6471
  This function abstract the hypervisor parameter validation to be
6472
  used in both instance create and instance modify.
6473

6474
  @type lu: L{LogicalUnit}
6475
  @param lu: the logical unit for which we check
6476
  @type nodenames: list
6477
  @param nodenames: the list of nodes on which we should check
6478
  @type hvname: string
6479
  @param hvname: the name of the hypervisor we should use
6480
  @type hvparams: dict
6481
  @param hvparams: the parameters which we need to check
6482
  @raise errors.OpPrereqError: if the parameters are not valid
6483

6484
  """
6485
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6486
                                                  hvname,
6487
                                                  hvparams)
6488
  for node in nodenames:
6489
    info = hvinfo[node]
6490
    if info.offline:
6491
      continue
6492
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6493

    
6494

    
6495
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6496
  """OS parameters validation.
6497

6498
  @type lu: L{LogicalUnit}
6499
  @param lu: the logical unit for which we check
6500
  @type required: boolean
6501
  @param required: whether the validation should fail if the OS is not
6502
      found
6503
  @type nodenames: list
6504
  @param nodenames: the list of nodes on which we should check
6505
  @type osname: string
6506
  @param osname: the name of the hypervisor we should use
6507
  @type osparams: dict
6508
  @param osparams: the parameters which we need to check
6509
  @raise errors.OpPrereqError: if the parameters are not valid
6510

6511
  """
6512
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6513
                                   [constants.OS_VALIDATE_PARAMETERS],
6514
                                   osparams)
6515
  for node, nres in result.items():
6516
    # we don't check for offline cases since this should be run only
6517
    # against the master node and/or an instance's nodes
6518
    nres.Raise("OS Parameters validation failed on node %s" % node)
6519
    if not nres.payload:
6520
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6521
                 osname, node)
6522

    
6523

    
6524
class LUCreateInstance(LogicalUnit):
6525
  """Create an instance.
6526

6527
  """
6528
  HPATH = "instance-add"
6529
  HTYPE = constants.HTYPE_INSTANCE
6530
  _OP_PARAMS = [
6531
    _PInstanceName,
6532
    ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
6533
    ("start", True, ht.TBool),
6534
    ("wait_for_sync", True, ht.TBool),
6535
    ("ip_check", True, ht.TBool),
6536
    ("name_check", True, ht.TBool),
6537
    ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
6538
    ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
6539
    ("hvparams", ht.EmptyDict, ht.TDict),
6540
    ("beparams", ht.EmptyDict, ht.TDict),
6541
    ("osparams", ht.EmptyDict, ht.TDict),
6542
    ("no_install", None, ht.TMaybeBool),
6543
    ("os_type", None, ht.TMaybeString),
6544
    ("force_variant", False, ht.TBool),
6545
    ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
6546
    ("source_x509_ca", None, ht.TMaybeString),
6547
    ("source_instance_name", None, ht.TMaybeString),
6548
    ("src_node", None, ht.TMaybeString),
6549
    ("src_path", None, ht.TMaybeString),
6550
    ("pnode", None, ht.TMaybeString),
6551
    ("snode", None, ht.TMaybeString),
6552
    ("iallocator", None, ht.TMaybeString),
6553
    ("hypervisor", None, ht.TMaybeString),
6554
    ("disk_template", ht.NoDefault, _CheckDiskTemplate),
6555
    ("identify_defaults", False, ht.TBool),
6556
    ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
6557
    ("file_storage_dir", None, ht.TMaybeString),
6558
    ]
6559
  REQ_BGL = False
6560

    
6561
  def CheckArguments(self):
6562
    """Check arguments.
6563

6564
    """
6565
    # do not require name_check to ease forward/backward compatibility
6566
    # for tools
6567
    if self.op.no_install and self.op.start:
6568
      self.LogInfo("No-installation mode selected, disabling startup")
6569
      self.op.start = False
6570
    # validate/normalize the instance name
6571
    self.op.instance_name = \
6572
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
6573

    
6574
    if self.op.ip_check and not self.op.name_check:
6575
      # TODO: make the ip check more flexible and not depend on the name check
6576
      raise errors.OpPrereqError("Cannot do ip check without a name check",
6577
                                 errors.ECODE_INVAL)
6578

    
6579
    # check nics' parameter names
6580
    for nic in self.op.nics:
6581
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6582

    
6583
    # check disks. parameter names and consistent adopt/no-adopt strategy
6584
    has_adopt = has_no_adopt = False
6585
    for disk in self.op.disks:
6586
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6587
      if "adopt" in disk:
6588
        has_adopt = True
6589
      else:
6590
        has_no_adopt = True
6591
    if has_adopt and has_no_adopt:
6592
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6593
                                 errors.ECODE_INVAL)
6594
    if has_adopt:
6595
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6596
        raise errors.OpPrereqError("Disk adoption is not supported for the"
6597
                                   " '%s' disk template" %
6598
                                   self.op.disk_template,
6599
                                   errors.ECODE_INVAL)
6600
      if self.op.iallocator is not None:
6601
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6602
                                   " iallocator script", errors.ECODE_INVAL)
6603
      if self.op.mode == constants.INSTANCE_IMPORT:
6604
        raise errors.OpPrereqError("Disk adoption not allowed for"
6605
                                   " instance import", errors.ECODE_INVAL)
6606

    
6607
    self.adopt_disks = has_adopt
6608

    
6609
    # instance name verification
6610
    if self.op.name_check:
6611
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6612
      self.op.instance_name = self.hostname1.name
6613
      # used in CheckPrereq for ip ping check
6614
      self.check_ip = self.hostname1.ip
6615
    else:
6616
      self.check_ip = None
6617

    
6618
    # file storage checks
6619
    if (self.op.file_driver and
6620
        not self.op.file_driver in constants.FILE_DRIVER):
6621
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6622
                                 self.op.file_driver, errors.ECODE_INVAL)
6623

    
6624
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6625
      raise errors.OpPrereqError("File storage directory path not absolute",
6626
                                 errors.ECODE_INVAL)
6627

    
6628
    ### Node/iallocator related checks
6629
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6630

    
6631
    if self.op.pnode is not None:
6632
      if self.op.disk_template in constants.DTS_NET_MIRROR:
6633
        if self.op.snode is None:
6634
          raise errors.OpPrereqError("The networked disk templates need"
6635
                                     " a mirror node", errors.ECODE_INVAL)
6636
      elif self.op.snode:
6637
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6638
                        " template")
6639
        self.op.snode = None
6640

    
6641
    self._cds = _GetClusterDomainSecret()
6642

    
6643
    if self.op.mode == constants.INSTANCE_IMPORT:
6644
      # On import force_variant must be True, because if we forced it at
6645
      # initial install, our only chance when importing it back is that it
6646
      # works again!
6647
      self.op.force_variant = True
6648

    
6649
      if self.op.no_install:
6650
        self.LogInfo("No-installation mode has no effect during import")
6651

    
6652
    elif self.op.mode == constants.INSTANCE_CREATE:
6653
      if self.op.os_type is None:
6654
        raise errors.OpPrereqError("No guest OS specified",
6655
                                   errors.ECODE_INVAL)
6656
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6657
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6658
                                   " installation" % self.op.os_type,
6659
                                   errors.ECODE_STATE)
6660
      if self.op.disk_template is None:
6661
        raise errors.OpPrereqError("No disk template specified",
6662
                                   errors.ECODE_INVAL)
6663

    
6664
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6665
      # Check handshake to ensure both clusters have the same domain secret
6666
      src_handshake = self.op.source_handshake
6667
      if not src_handshake:
6668
        raise errors.OpPrereqError("Missing source handshake",
6669
                                   errors.ECODE_INVAL)
6670

    
6671
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6672
                                                           src_handshake)
6673
      if errmsg:
6674
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6675
                                   errors.ECODE_INVAL)
6676

    
6677
      # Load and check source CA
6678
      self.source_x509_ca_pem = self.op.source_x509_ca
6679
      if not self.source_x509_ca_pem:
6680
        raise errors.OpPrereqError("Missing source X509 CA",
6681
                                   errors.ECODE_INVAL)
6682

    
6683
      try:
6684
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6685
                                                    self._cds)
6686
      except OpenSSL.crypto.Error, err:
6687
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6688
                                   (err, ), errors.ECODE_INVAL)
6689

    
6690
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6691
      if errcode is not None:
6692
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6693
                                   errors.ECODE_INVAL)
6694

    
6695
      self.source_x509_ca = cert
6696

    
6697
      src_instance_name = self.op.source_instance_name
6698
      if not src_instance_name:
6699
        raise errors.OpPrereqError("Missing source instance name",
6700
                                   errors.ECODE_INVAL)
6701

    
6702
      self.source_instance_name = \
6703
          netutils.GetHostname(name=src_instance_name).name
6704

    
6705
    else:
6706
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6707
                                 self.op.mode, errors.ECODE_INVAL)
6708

    
6709
  def ExpandNames(self):
6710
    """ExpandNames for CreateInstance.
6711

6712
    Figure out the right locks for instance creation.
6713

6714
    """
6715
    self.needed_locks = {}
6716

    
6717
    instance_name = self.op.instance_name
6718
    # this is just a preventive check, but someone might still add this
6719
    # instance in the meantime, and creation will fail at lock-add time
6720
    if instance_name in self.cfg.GetInstanceList():
6721
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6722
                                 instance_name, errors.ECODE_EXISTS)
6723

    
6724
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6725

    
6726
    if self.op.iallocator:
6727
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6728
    else:
6729
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6730
      nodelist = [self.op.pnode]
6731
      if self.op.snode is not None:
6732
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6733
        nodelist.append(self.op.snode)
6734
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6735

    
6736
    # in case of import lock the source node too
6737
    if self.op.mode == constants.INSTANCE_IMPORT:
6738
      src_node = self.op.src_node
6739
      src_path = self.op.src_path
6740

    
6741
      if src_path is None:
6742
        self.op.src_path = src_path = self.op.instance_name
6743

    
6744
      if src_node is None:
6745
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6746
        self.op.src_node = None
6747
        if os.path.isabs(src_path):
6748
          raise errors.OpPrereqError("Importing an instance from an absolute"
6749
                                     " path requires a source node option.",
6750
                                     errors.ECODE_INVAL)
6751
      else:
6752
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6753
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6754
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6755
        if not os.path.isabs(src_path):
6756
          self.op.src_path = src_path = \
6757
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6758

    
6759
  def _RunAllocator(self):
6760
    """Run the allocator based on input opcode.
6761

6762
    """
6763
    nics = [n.ToDict() for n in self.nics]
6764
    ial = IAllocator(self.cfg, self.rpc,
6765
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6766
                     name=self.op.instance_name,
6767
                     disk_template=self.op.disk_template,
6768
                     tags=[],
6769
                     os=self.op.os_type,
6770
                     vcpus=self.be_full[constants.BE_VCPUS],
6771
                     mem_size=self.be_full[constants.BE_MEMORY],
6772
                     disks=self.disks,
6773
                     nics=nics,
6774
                     hypervisor=self.op.hypervisor,
6775
                     )
6776

    
6777
    ial.Run(self.op.iallocator)
6778

    
6779
    if not ial.success:
6780
      raise errors.OpPrereqError("Can't compute nodes using"
6781
                                 " iallocator '%s': %s" %
6782
                                 (self.op.iallocator, ial.info),
6783
                                 errors.ECODE_NORES)
6784
    if len(ial.result) != ial.required_nodes:
6785
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6786
                                 " of nodes (%s), required %s" %
6787
                                 (self.op.iallocator, len(ial.result),
6788
                                  ial.required_nodes), errors.ECODE_FAULT)
6789
    self.op.pnode = ial.result[0]
6790
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6791
                 self.op.instance_name, self.op.iallocator,
6792
                 utils.CommaJoin(ial.result))
6793
    if ial.required_nodes == 2:
6794
      self.op.snode = ial.result[1]
6795

    
6796
  def BuildHooksEnv(self):
6797
    """Build hooks env.
6798

6799
    This runs on master, primary and secondary nodes of the instance.
6800

6801
    """
6802
    env = {
6803
      "ADD_MODE": self.op.mode,
6804
      }
6805
    if self.op.mode == constants.INSTANCE_IMPORT:
6806
      env["SRC_NODE"] = self.op.src_node
6807
      env["SRC_PATH"] = self.op.src_path
6808
      env["SRC_IMAGES"] = self.src_images
6809

    
6810
    env.update(_BuildInstanceHookEnv(
6811
      name=self.op.instance_name,
6812
      primary_node=self.op.pnode,
6813
      secondary_nodes=self.secondaries,
6814
      status=self.op.start,
6815
      os_type=self.op.os_type,
6816
      memory=self.be_full[constants.BE_MEMORY],
6817
      vcpus=self.be_full[constants.BE_VCPUS],
6818
      nics=_NICListToTuple(self, self.nics),
6819
      disk_template=self.op.disk_template,
6820
      disks=[(d["size"], d["mode"]) for d in self.disks],
6821
      bep=self.be_full,
6822
      hvp=self.hv_full,
6823
      hypervisor_name=self.op.hypervisor,
6824
    ))
6825

    
6826
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6827
          self.secondaries)
6828
    return env, nl, nl
6829

    
6830
  def _ReadExportInfo(self):
6831
    """Reads the export information from disk.
6832

6833
    It will override the opcode source node and path with the actual
6834
    information, if these two were not specified before.
6835

6836
    @return: the export information
6837

6838
    """
6839
    assert self.op.mode == constants.INSTANCE_IMPORT
6840

    
6841
    src_node = self.op.src_node
6842
    src_path = self.op.src_path
6843

    
6844
    if src_node is None:
6845
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6846
      exp_list = self.rpc.call_export_list(locked_nodes)
6847
      found = False
6848
      for node in exp_list:
6849
        if exp_list[node].fail_msg:
6850
          continue
6851
        if src_path in exp_list[node].payload:
6852
          found = True
6853
          self.op.src_node = src_node = node
6854
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6855
                                                       src_path)
6856
          break
6857
      if not found:
6858
        raise errors.OpPrereqError("No export found for relative path %s" %
6859
                                    src_path, errors.ECODE_INVAL)
6860

    
6861
    _CheckNodeOnline(self, src_node)
6862
    result = self.rpc.call_export_info(src_node, src_path)
6863
    result.Raise("No export or invalid export found in dir %s" % src_path)
6864

    
6865
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6866
    if not export_info.has_section(constants.INISECT_EXP):
6867
      raise errors.ProgrammerError("Corrupted export config",
6868
                                   errors.ECODE_ENVIRON)
6869

    
6870
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6871
    if (int(ei_version) != constants.EXPORT_VERSION):
6872
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6873
                                 (ei_version, constants.EXPORT_VERSION),
6874
                                 errors.ECODE_ENVIRON)
6875
    return export_info
6876

    
6877
  def _ReadExportParams(self, einfo):
6878
    """Use export parameters as defaults.
6879

6880
    In case the opcode doesn't specify (as in override) some instance
6881
    parameters, then try to use them from the export information, if
6882
    that declares them.
6883

6884
    """
6885
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6886

    
6887
    if self.op.disk_template is None:
6888
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6889
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6890
                                          "disk_template")
6891
      else:
6892
        raise errors.OpPrereqError("No disk template specified and the export"
6893
                                   " is missing the disk_template information",
6894
                                   errors.ECODE_INVAL)
6895

    
6896
    if not self.op.disks:
6897
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6898
        disks = []
6899
        # TODO: import the disk iv_name too
6900
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6901
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6902
          disks.append({"size": disk_sz})
6903
        self.op.disks = disks
6904
      else:
6905
        raise errors.OpPrereqError("No disk info specified and the export"
6906
                                   " is missing the disk information",
6907
                                   errors.ECODE_INVAL)
6908

    
6909
    if (not self.op.nics and
6910
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6911
      nics = []
6912
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6913
        ndict = {}
6914
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6915
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6916
          ndict[name] = v
6917
        nics.append(ndict)
6918
      self.op.nics = nics
6919

    
6920
    if (self.op.hypervisor is None and
6921
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6922
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6923
    if einfo.has_section(constants.INISECT_HYP):
6924
      # use the export parameters but do not override the ones
6925
      # specified by the user
6926
      for name, value in einfo.items(constants.INISECT_HYP):
6927
        if name not in self.op.hvparams:
6928
          self.op.hvparams[name] = value
6929

    
6930
    if einfo.has_section(constants.INISECT_BEP):
6931
      # use the parameters, without overriding
6932
      for name, value in einfo.items(constants.INISECT_BEP):
6933
        if name not in self.op.beparams:
6934
          self.op.beparams[name] = value
6935
    else:
6936
      # try to read the parameters old style, from the main section
6937
      for name in constants.BES_PARAMETERS:
6938
        if (name not in self.op.beparams and
6939
            einfo.has_option(constants.INISECT_INS, name)):
6940
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6941

    
6942
    if einfo.has_section(constants.INISECT_OSP):
6943
      # use the parameters, without overriding
6944
      for name, value in einfo.items(constants.INISECT_OSP):
6945
        if name not in self.op.osparams:
6946
          self.op.osparams[name] = value
6947

    
6948
  def _RevertToDefaults(self, cluster):
6949
    """Revert the instance parameters to the default values.
6950

6951
    """
6952
    # hvparams
6953
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6954
    for name in self.op.hvparams.keys():
6955
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6956
        del self.op.hvparams[name]
6957
    # beparams
6958
    be_defs = cluster.SimpleFillBE({})
6959
    for name in self.op.beparams.keys():
6960
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6961
        del self.op.beparams[name]
6962
    # nic params
6963
    nic_defs = cluster.SimpleFillNIC({})
6964
    for nic in self.op.nics:
6965
      for name in constants.NICS_PARAMETERS:
6966
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6967
          del nic[name]
6968
    # osparams
6969
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6970
    for name in self.op.osparams.keys():
6971
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
6972
        del self.op.osparams[name]
6973

    
6974
  def CheckPrereq(self):
6975
    """Check prerequisites.
6976

6977
    """
6978
    if self.op.mode == constants.INSTANCE_IMPORT:
6979
      export_info = self._ReadExportInfo()
6980
      self._ReadExportParams(export_info)
6981

    
6982
    _CheckDiskTemplate(self.op.disk_template)
6983

    
6984
    if (not self.cfg.GetVGName() and
6985
        self.op.disk_template not in constants.DTS_NOT_LVM):
6986
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6987
                                 " instances", errors.ECODE_STATE)
6988

    
6989
    if self.op.hypervisor is None:
6990
      self.op.hypervisor = self.cfg.GetHypervisorType()
6991

    
6992
    cluster = self.cfg.GetClusterInfo()
6993
    enabled_hvs = cluster.enabled_hypervisors
6994
    if self.op.hypervisor not in enabled_hvs:
6995
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6996
                                 " cluster (%s)" % (self.op.hypervisor,
6997
                                  ",".join(enabled_hvs)),
6998
                                 errors.ECODE_STATE)
6999

    
7000
    # check hypervisor parameter syntax (locally)
7001
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7002
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7003
                                      self.op.hvparams)
7004
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7005
    hv_type.CheckParameterSyntax(filled_hvp)
7006
    self.hv_full = filled_hvp
7007
    # check that we don't specify global parameters on an instance
7008
    _CheckGlobalHvParams(self.op.hvparams)
7009

    
7010
    # fill and remember the beparams dict
7011
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7012
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7013

    
7014
    # build os parameters
7015
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7016

    
7017
    # now that hvp/bep are in final format, let's reset to defaults,
7018
    # if told to do so
7019
    if self.op.identify_defaults:
7020
      self._RevertToDefaults(cluster)
7021

    
7022
    # NIC buildup
7023
    self.nics = []
7024
    for idx, nic in enumerate(self.op.nics):
7025
      nic_mode_req = nic.get("mode", None)
7026
      nic_mode = nic_mode_req
7027
      if nic_mode is None:
7028
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7029

    
7030
      # in routed mode, for the first nic, the default ip is 'auto'
7031
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7032
        default_ip_mode = constants.VALUE_AUTO
7033
      else:
7034
        default_ip_mode = constants.VALUE_NONE
7035

    
7036
      # ip validity checks
7037
      ip = nic.get("ip", default_ip_mode)
7038
      if ip is None or ip.lower() == constants.VALUE_NONE:
7039
        nic_ip = None
7040
      elif ip.lower() == constants.VALUE_AUTO:
7041
        if not self.op.name_check:
7042
          raise errors.OpPrereqError("IP address set to auto but name checks"
7043
                                     " have been skipped",
7044
                                     errors.ECODE_INVAL)
7045
        nic_ip = self.hostname1.ip
7046
      else:
7047
        if not netutils.IPAddress.IsValid(ip):
7048
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7049
                                     errors.ECODE_INVAL)
7050
        nic_ip = ip
7051

    
7052
      # TODO: check the ip address for uniqueness
7053
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7054
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7055
                                   errors.ECODE_INVAL)
7056

    
7057
      # MAC address verification
7058
      mac = nic.get("mac", constants.VALUE_AUTO)
7059
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7060
        mac = utils.NormalizeAndValidateMac(mac)
7061

    
7062
        try:
7063
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7064
        except errors.ReservationError:
7065
          raise errors.OpPrereqError("MAC address %s already in use"
7066
                                     " in cluster" % mac,
7067
                                     errors.ECODE_NOTUNIQUE)
7068

    
7069
      # bridge verification
7070
      bridge = nic.get("bridge", None)
7071
      link = nic.get("link", None)
7072
      if bridge and link:
7073
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7074
                                   " at the same time", errors.ECODE_INVAL)
7075
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7076
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7077
                                   errors.ECODE_INVAL)
7078
      elif bridge:
7079
        link = bridge
7080

    
7081
      nicparams = {}
7082
      if nic_mode_req:
7083
        nicparams[constants.NIC_MODE] = nic_mode_req
7084
      if link:
7085
        nicparams[constants.NIC_LINK] = link
7086

    
7087
      check_params = cluster.SimpleFillNIC(nicparams)
7088
      objects.NIC.CheckParameterSyntax(check_params)
7089
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7090

    
7091
    # disk checks/pre-build
7092
    self.disks = []
7093
    for disk in self.op.disks:
7094
      mode = disk.get("mode", constants.DISK_RDWR)
7095
      if mode not in constants.DISK_ACCESS_SET:
7096
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7097
                                   mode, errors.ECODE_INVAL)
7098
      size = disk.get("size", None)
7099
      if size is None:
7100
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7101
      try:
7102
        size = int(size)
7103
      except (TypeError, ValueError):
7104
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7105
                                   errors.ECODE_INVAL)
7106
      new_disk = {"size": size, "mode": mode}
7107
      if "adopt" in disk:
7108
        new_disk["adopt"] = disk["adopt"]
7109
      self.disks.append(new_disk)
7110

    
7111
    if self.op.mode == constants.INSTANCE_IMPORT:
7112

    
7113
      # Check that the new instance doesn't have less disks than the export
7114
      instance_disks = len(self.disks)
7115
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7116
      if instance_disks < export_disks:
7117
        raise errors.OpPrereqError("Not enough disks to import."
7118
                                   " (instance: %d, export: %d)" %
7119
                                   (instance_disks, export_disks),
7120
                                   errors.ECODE_INVAL)
7121

    
7122
      disk_images = []
7123
      for idx in range(export_disks):
7124
        option = 'disk%d_dump' % idx
7125
        if export_info.has_option(constants.INISECT_INS, option):
7126
          # FIXME: are the old os-es, disk sizes, etc. useful?
7127
          export_name = export_info.get(constants.INISECT_INS, option)
7128
          image = utils.PathJoin(self.op.src_path, export_name)
7129
          disk_images.append(image)
7130
        else:
7131
          disk_images.append(False)
7132

    
7133
      self.src_images = disk_images
7134

    
7135
      old_name = export_info.get(constants.INISECT_INS, 'name')
7136
      try:
7137
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7138
      except (TypeError, ValueError), err:
7139
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7140
                                   " an integer: %s" % str(err),
7141
                                   errors.ECODE_STATE)
7142
      if self.op.instance_name == old_name:
7143
        for idx, nic in enumerate(self.nics):
7144
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7145
            nic_mac_ini = 'nic%d_mac' % idx
7146
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7147

    
7148
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7149

    
7150
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7151
    if self.op.ip_check:
7152
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7153
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7154
                                   (self.check_ip, self.op.instance_name),
7155
                                   errors.ECODE_NOTUNIQUE)
7156

    
7157
    #### mac address generation
7158
    # By generating here the mac address both the allocator and the hooks get
7159
    # the real final mac address rather than the 'auto' or 'generate' value.
7160
    # There is a race condition between the generation and the instance object
7161
    # creation, which means that we know the mac is valid now, but we're not
7162
    # sure it will be when we actually add the instance. If things go bad
7163
    # adding the instance will abort because of a duplicate mac, and the
7164
    # creation job will fail.
7165
    for nic in self.nics:
7166
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7167
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7168

    
7169
    #### allocator run
7170

    
7171
    if self.op.iallocator is not None:
7172
      self._RunAllocator()
7173

    
7174
    #### node related checks
7175

    
7176
    # check primary node
7177
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7178
    assert self.pnode is not None, \
7179
      "Cannot retrieve locked node %s" % self.op.pnode
7180
    if pnode.offline:
7181
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7182
                                 pnode.name, errors.ECODE_STATE)
7183
    if pnode.drained:
7184
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7185
                                 pnode.name, errors.ECODE_STATE)
7186

    
7187
    self.secondaries = []
7188

    
7189
    # mirror node verification
7190
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7191
      if self.op.snode == pnode.name:
7192
        raise errors.OpPrereqError("The secondary node cannot be the"
7193
                                   " primary node.", errors.ECODE_INVAL)
7194
      _CheckNodeOnline(self, self.op.snode)
7195
      _CheckNodeNotDrained(self, self.op.snode)
7196
      self.secondaries.append(self.op.snode)
7197

    
7198
    nodenames = [pnode.name] + self.secondaries
7199

    
7200
    req_size = _ComputeDiskSize(self.op.disk_template,
7201
                                self.disks)
7202

    
7203
    # Check lv size requirements, if not adopting
7204
    if req_size is not None and not self.adopt_disks:
7205
      _CheckNodesFreeDisk(self, nodenames, req_size)
7206

    
7207
    if self.adopt_disks: # instead, we must check the adoption data
7208
      all_lvs = set([i["adopt"] for i in self.disks])
7209
      if len(all_lvs) != len(self.disks):
7210
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7211
                                   errors.ECODE_INVAL)
7212
      for lv_name in all_lvs:
7213
        try:
7214
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7215
        except errors.ReservationError:
7216
          raise errors.OpPrereqError("LV named %s used by another instance" %
7217
                                     lv_name, errors.ECODE_NOTUNIQUE)
7218

    
7219
      node_lvs = self.rpc.call_lv_list([pnode.name],
7220
                                       self.cfg.GetVGName())[pnode.name]
7221
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7222
      node_lvs = node_lvs.payload
7223
      delta = all_lvs.difference(node_lvs.keys())
7224
      if delta:
7225
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7226
                                   utils.CommaJoin(delta),
7227
                                   errors.ECODE_INVAL)
7228
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7229
      if online_lvs:
7230
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7231
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7232
                                   errors.ECODE_STATE)
7233
      # update the size of disk based on what is found
7234
      for dsk in self.disks:
7235
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7236

    
7237
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7238

    
7239
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7240
    # check OS parameters (remotely)
7241
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7242

    
7243
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7244

    
7245
    # memory check on primary node
7246
    if self.op.start:
7247
      _CheckNodeFreeMemory(self, self.pnode.name,
7248
                           "creating instance %s" % self.op.instance_name,
7249
                           self.be_full[constants.BE_MEMORY],
7250
                           self.op.hypervisor)
7251

    
7252
    self.dry_run_result = list(nodenames)
7253

    
7254
  def Exec(self, feedback_fn):
7255
    """Create and add the instance to the cluster.
7256

7257
    """
7258
    instance = self.op.instance_name
7259
    pnode_name = self.pnode.name
7260

    
7261
    ht_kind = self.op.hypervisor
7262
    if ht_kind in constants.HTS_REQ_PORT:
7263
      network_port = self.cfg.AllocatePort()
7264
    else:
7265
      network_port = None
7266

    
7267
    if constants.ENABLE_FILE_STORAGE:
7268
      # this is needed because os.path.join does not accept None arguments
7269
      if self.op.file_storage_dir is None:
7270
        string_file_storage_dir = ""
7271
      else:
7272
        string_file_storage_dir = self.op.file_storage_dir
7273

    
7274
      # build the full file storage dir path
7275
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7276
                                        string_file_storage_dir, instance)
7277
    else:
7278
      file_storage_dir = ""
7279

    
7280
    disks = _GenerateDiskTemplate(self,
7281
                                  self.op.disk_template,
7282
                                  instance, pnode_name,
7283
                                  self.secondaries,
7284
                                  self.disks,
7285
                                  file_storage_dir,
7286
                                  self.op.file_driver,
7287
                                  0)
7288

    
7289
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7290
                            primary_node=pnode_name,
7291
                            nics=self.nics, disks=disks,
7292
                            disk_template=self.op.disk_template,
7293
                            admin_up=False,
7294
                            network_port=network_port,
7295
                            beparams=self.op.beparams,
7296
                            hvparams=self.op.hvparams,
7297
                            hypervisor=self.op.hypervisor,
7298
                            osparams=self.op.osparams,
7299
                            )
7300

    
7301
    if self.adopt_disks:
7302
      # rename LVs to the newly-generated names; we need to construct
7303
      # 'fake' LV disks with the old data, plus the new unique_id
7304
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7305
      rename_to = []
7306
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7307
        rename_to.append(t_dsk.logical_id)
7308
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7309
        self.cfg.SetDiskID(t_dsk, pnode_name)
7310
      result = self.rpc.call_blockdev_rename(pnode_name,
7311
                                             zip(tmp_disks, rename_to))
7312
      result.Raise("Failed to rename adoped LVs")
7313
    else:
7314
      feedback_fn("* creating instance disks...")
7315
      try:
7316
        _CreateDisks(self, iobj)
7317
      except errors.OpExecError:
7318
        self.LogWarning("Device creation failed, reverting...")
7319
        try:
7320
          _RemoveDisks(self, iobj)
7321
        finally:
7322
          self.cfg.ReleaseDRBDMinors(instance)
7323
          raise
7324

    
7325
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7326
        feedback_fn("* wiping instance disks...")
7327
        try:
7328
          _WipeDisks(self, iobj)
7329
        except errors.OpExecError:
7330
          self.LogWarning("Device wiping failed, reverting...")
7331
          try:
7332
            _RemoveDisks(self, iobj)
7333
          finally:
7334
            self.cfg.ReleaseDRBDMinors(instance)
7335
            raise
7336

    
7337
    feedback_fn("adding instance %s to cluster config" % instance)
7338

    
7339
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7340

    
7341
    # Declare that we don't want to remove the instance lock anymore, as we've
7342
    # added the instance to the config
7343
    del self.remove_locks[locking.LEVEL_INSTANCE]
7344
    # Unlock all the nodes
7345
    if self.op.mode == constants.INSTANCE_IMPORT:
7346
      nodes_keep = [self.op.src_node]
7347
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7348
                       if node != self.op.src_node]
7349
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7350
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7351
    else:
7352
      self.context.glm.release(locking.LEVEL_NODE)
7353
      del self.acquired_locks[locking.LEVEL_NODE]
7354

    
7355
    if self.op.wait_for_sync:
7356
      disk_abort = not _WaitForSync(self, iobj)
7357
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7358
      # make sure the disks are not degraded (still sync-ing is ok)
7359
      time.sleep(15)
7360
      feedback_fn("* checking mirrors status")
7361
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7362
    else:
7363
      disk_abort = False
7364

    
7365
    if disk_abort:
7366
      _RemoveDisks(self, iobj)
7367
      self.cfg.RemoveInstance(iobj.name)
7368
      # Make sure the instance lock gets removed
7369
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7370
      raise errors.OpExecError("There are some degraded disks for"
7371
                               " this instance")
7372

    
7373
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7374
      if self.op.mode == constants.INSTANCE_CREATE:
7375
        if not self.op.no_install:
7376
          feedback_fn("* running the instance OS create scripts...")
7377
          # FIXME: pass debug option from opcode to backend
7378
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7379
                                                 self.op.debug_level)
7380
          result.Raise("Could not add os for instance %s"
7381
                       " on node %s" % (instance, pnode_name))
7382

    
7383
      elif self.op.mode == constants.INSTANCE_IMPORT:
7384
        feedback_fn("* running the instance OS import scripts...")
7385

    
7386
        transfers = []
7387

    
7388
        for idx, image in enumerate(self.src_images):
7389
          if not image:
7390
            continue
7391

    
7392
          # FIXME: pass debug option from opcode to backend
7393
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7394
                                             constants.IEIO_FILE, (image, ),
7395
                                             constants.IEIO_SCRIPT,
7396
                                             (iobj.disks[idx], idx),
7397
                                             None)
7398
          transfers.append(dt)
7399

    
7400
        import_result = \
7401
          masterd.instance.TransferInstanceData(self, feedback_fn,
7402
                                                self.op.src_node, pnode_name,
7403
                                                self.pnode.secondary_ip,
7404
                                                iobj, transfers)
7405
        if not compat.all(import_result):
7406
          self.LogWarning("Some disks for instance %s on node %s were not"
7407
                          " imported successfully" % (instance, pnode_name))
7408

    
7409
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7410
        feedback_fn("* preparing remote import...")
7411
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
7412
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7413

    
7414
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7415
                                                     self.source_x509_ca,
7416
                                                     self._cds, timeouts)
7417
        if not compat.all(disk_results):
7418
          # TODO: Should the instance still be started, even if some disks
7419
          # failed to import (valid for local imports, too)?
7420
          self.LogWarning("Some disks for instance %s on node %s were not"
7421
                          " imported successfully" % (instance, pnode_name))
7422

    
7423
        # Run rename script on newly imported instance
7424
        assert iobj.name == instance
7425
        feedback_fn("Running rename script for %s" % instance)
7426
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7427
                                                   self.source_instance_name,
7428
                                                   self.op.debug_level)
7429
        if result.fail_msg:
7430
          self.LogWarning("Failed to run rename script for %s on node"
7431
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7432

    
7433
      else:
7434
        # also checked in the prereq part
7435
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7436
                                     % self.op.mode)
7437

    
7438
    if self.op.start:
7439
      iobj.admin_up = True
7440
      self.cfg.Update(iobj, feedback_fn)
7441
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7442
      feedback_fn("* starting instance...")
7443
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7444
      result.Raise("Could not start instance")
7445

    
7446
    return list(iobj.all_nodes)
7447

    
7448

    
7449
class LUConnectConsole(NoHooksLU):
7450
  """Connect to an instance's console.
7451

7452
  This is somewhat special in that it returns the command line that
7453
  you need to run on the master node in order to connect to the
7454
  console.
7455

7456
  """
7457
  _OP_PARAMS = [
7458
    _PInstanceName
7459
    ]
7460
  REQ_BGL = False
7461

    
7462
  def ExpandNames(self):
7463
    self._ExpandAndLockInstance()
7464

    
7465
  def CheckPrereq(self):
7466
    """Check prerequisites.
7467

7468
    This checks that the instance is in the cluster.
7469

7470
    """
7471
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7472
    assert self.instance is not None, \
7473
      "Cannot retrieve locked instance %s" % self.op.instance_name
7474
    _CheckNodeOnline(self, self.instance.primary_node)
7475

    
7476
  def Exec(self, feedback_fn):
7477
    """Connect to the console of an instance
7478

7479
    """
7480
    instance = self.instance
7481
    node = instance.primary_node
7482

    
7483
    node_insts = self.rpc.call_instance_list([node],
7484
                                             [instance.hypervisor])[node]
7485
    node_insts.Raise("Can't get node information from %s" % node)
7486

    
7487
    if instance.name not in node_insts.payload:
7488
      if instance.admin_up:
7489
        state = "ERROR_down"
7490
      else:
7491
        state = "ADMIN_down"
7492
      raise errors.OpExecError("Instance %s is not running (state %s)" %
7493
                               (instance.name, state))
7494

    
7495
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7496

    
7497
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7498
    cluster = self.cfg.GetClusterInfo()
7499
    # beparams and hvparams are passed separately, to avoid editing the
7500
    # instance and then saving the defaults in the instance itself.
7501
    hvparams = cluster.FillHV(instance)
7502
    beparams = cluster.FillBE(instance)
7503
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7504

    
7505
    # build ssh cmdline
7506
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7507

    
7508

    
7509
class LUReplaceDisks(LogicalUnit):
7510
  """Replace the disks of an instance.
7511

7512
  """
7513
  HPATH = "mirrors-replace"
7514
  HTYPE = constants.HTYPE_INSTANCE
7515
  _OP_PARAMS = [
7516
    _PInstanceName,
7517
    ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
7518
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
7519
    ("remote_node", None, ht.TMaybeString),
7520
    ("iallocator", None, ht.TMaybeString),
7521
    ("early_release", False, ht.TBool),
7522
    ]
7523
  REQ_BGL = False
7524

    
7525
  def CheckArguments(self):
7526
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7527
                                  self.op.iallocator)
7528

    
7529
  def ExpandNames(self):
7530
    self._ExpandAndLockInstance()
7531

    
7532
    if self.op.iallocator is not None:
7533
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7534

    
7535
    elif self.op.remote_node is not None:
7536
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7537
      self.op.remote_node = remote_node
7538

    
7539
      # Warning: do not remove the locking of the new secondary here
7540
      # unless DRBD8.AddChildren is changed to work in parallel;
7541
      # currently it doesn't since parallel invocations of
7542
      # FindUnusedMinor will conflict
7543
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7544
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7545

    
7546
    else:
7547
      self.needed_locks[locking.LEVEL_NODE] = []
7548
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7549

    
7550
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7551
                                   self.op.iallocator, self.op.remote_node,
7552
                                   self.op.disks, False, self.op.early_release)
7553

    
7554
    self.tasklets = [self.replacer]
7555

    
7556
  def DeclareLocks(self, level):
7557
    # If we're not already locking all nodes in the set we have to declare the
7558
    # instance's primary/secondary nodes.
7559
    if (level == locking.LEVEL_NODE and
7560
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7561
      self._LockInstancesNodes()
7562

    
7563
  def BuildHooksEnv(self):
7564
    """Build hooks env.
7565

7566
    This runs on the master, the primary and all the secondaries.
7567

7568
    """
7569
    instance = self.replacer.instance
7570
    env = {
7571
      "MODE": self.op.mode,
7572
      "NEW_SECONDARY": self.op.remote_node,
7573
      "OLD_SECONDARY": instance.secondary_nodes[0],
7574
      }
7575
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7576
    nl = [
7577
      self.cfg.GetMasterNode(),
7578
      instance.primary_node,
7579
      ]
7580
    if self.op.remote_node is not None:
7581
      nl.append(self.op.remote_node)
7582
    return env, nl, nl
7583

    
7584

    
7585
class TLReplaceDisks(Tasklet):
7586
  """Replaces disks for an instance.
7587

7588
  Note: Locking is not within the scope of this class.
7589

7590
  """
7591
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7592
               disks, delay_iallocator, early_release):
7593
    """Initializes this class.
7594

7595
    """
7596
    Tasklet.__init__(self, lu)
7597

    
7598
    # Parameters
7599
    self.instance_name = instance_name
7600
    self.mode = mode
7601
    self.iallocator_name = iallocator_name
7602
    self.remote_node = remote_node
7603
    self.disks = disks
7604
    self.delay_iallocator = delay_iallocator
7605
    self.early_release = early_release
7606

    
7607
    # Runtime data
7608
    self.instance = None
7609
    self.new_node = None
7610
    self.target_node = None
7611
    self.other_node = None
7612
    self.remote_node_info = None
7613
    self.node_secondary_ip = None
7614

    
7615
  @staticmethod
7616
  def CheckArguments(mode, remote_node, iallocator):
7617
    """Helper function for users of this class.
7618

7619
    """
7620
    # check for valid parameter combination
7621
    if mode == constants.REPLACE_DISK_CHG:
7622
      if remote_node is None and iallocator is None:
7623
        raise errors.OpPrereqError("When changing the secondary either an"
7624
                                   " iallocator script must be used or the"
7625
                                   " new node given", errors.ECODE_INVAL)
7626

    
7627
      if remote_node is not None and iallocator is not None:
7628
        raise errors.OpPrereqError("Give either the iallocator or the new"
7629
                                   " secondary, not both", errors.ECODE_INVAL)
7630

    
7631
    elif remote_node is not None or iallocator is not None:
7632
      # Not replacing the secondary
7633
      raise errors.OpPrereqError("The iallocator and new node options can"
7634
                                 " only be used when changing the"
7635
                                 " secondary node", errors.ECODE_INVAL)
7636

    
7637
  @staticmethod
7638
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7639
    """Compute a new secondary node using an IAllocator.
7640

7641
    """
7642
    ial = IAllocator(lu.cfg, lu.rpc,
7643
                     mode=constants.IALLOCATOR_MODE_RELOC,
7644
                     name=instance_name,
7645
                     relocate_from=relocate_from)
7646

    
7647
    ial.Run(iallocator_name)
7648

    
7649
    if not ial.success:
7650
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7651
                                 " %s" % (iallocator_name, ial.info),
7652
                                 errors.ECODE_NORES)
7653

    
7654
    if len(ial.result) != ial.required_nodes:
7655
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7656
                                 " of nodes (%s), required %s" %
7657
                                 (iallocator_name,
7658
                                  len(ial.result), ial.required_nodes),
7659
                                 errors.ECODE_FAULT)
7660

    
7661
    remote_node_name = ial.result[0]
7662

    
7663
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7664
               instance_name, remote_node_name)
7665

    
7666
    return remote_node_name
7667

    
7668
  def _FindFaultyDisks(self, node_name):
7669
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7670
                                    node_name, True)
7671

    
7672
  def CheckPrereq(self):
7673
    """Check prerequisites.
7674

7675
    This checks that the instance is in the cluster.
7676

7677
    """
7678
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7679
    assert instance is not None, \
7680
      "Cannot retrieve locked instance %s" % self.instance_name
7681

    
7682
    if instance.disk_template != constants.DT_DRBD8:
7683
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7684
                                 " instances", errors.ECODE_INVAL)
7685

    
7686
    if len(instance.secondary_nodes) != 1:
7687
      raise errors.OpPrereqError("The instance has a strange layout,"
7688
                                 " expected one secondary but found %d" %
7689
                                 len(instance.secondary_nodes),
7690
                                 errors.ECODE_FAULT)
7691

    
7692
    if not self.delay_iallocator:
7693
      self._CheckPrereq2()
7694

    
7695
  def _CheckPrereq2(self):
7696
    """Check prerequisites, second part.
7697

7698
    This function should always be part of CheckPrereq. It was separated and is
7699
    now called from Exec because during node evacuation iallocator was only
7700
    called with an unmodified cluster model, not taking planned changes into
7701
    account.
7702

7703
    """
7704
    instance = self.instance
7705
    secondary_node = instance.secondary_nodes[0]
7706

    
7707
    if self.iallocator_name is None:
7708
      remote_node = self.remote_node
7709
    else:
7710
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7711
                                       instance.name, instance.secondary_nodes)
7712

    
7713
    if remote_node is not None:
7714
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7715
      assert self.remote_node_info is not None, \
7716
        "Cannot retrieve locked node %s" % remote_node
7717
    else:
7718
      self.remote_node_info = None
7719

    
7720
    if remote_node == self.instance.primary_node:
7721
      raise errors.OpPrereqError("The specified node is the primary node of"
7722
                                 " the instance.", errors.ECODE_INVAL)
7723

    
7724
    if remote_node == secondary_node:
7725
      raise errors.OpPrereqError("The specified node is already the"
7726
                                 " secondary node of the instance.",
7727
                                 errors.ECODE_INVAL)
7728

    
7729
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7730
                                    constants.REPLACE_DISK_CHG):
7731
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7732
                                 errors.ECODE_INVAL)
7733

    
7734
    if self.mode == constants.REPLACE_DISK_AUTO:
7735
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7736
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7737

    
7738
      if faulty_primary and faulty_secondary:
7739
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7740
                                   " one node and can not be repaired"
7741
                                   " automatically" % self.instance_name,
7742
                                   errors.ECODE_STATE)
7743

    
7744
      if faulty_primary:
7745
        self.disks = faulty_primary
7746
        self.target_node = instance.primary_node
7747
        self.other_node = secondary_node
7748
        check_nodes = [self.target_node, self.other_node]
7749
      elif faulty_secondary:
7750
        self.disks = faulty_secondary
7751
        self.target_node = secondary_node
7752
        self.other_node = instance.primary_node
7753
        check_nodes = [self.target_node, self.other_node]
7754
      else:
7755
        self.disks = []
7756
        check_nodes = []
7757

    
7758
    else:
7759
      # Non-automatic modes
7760
      if self.mode == constants.REPLACE_DISK_PRI:
7761
        self.target_node = instance.primary_node
7762
        self.other_node = secondary_node
7763
        check_nodes = [self.target_node, self.other_node]
7764

    
7765
      elif self.mode == constants.REPLACE_DISK_SEC:
7766
        self.target_node = secondary_node
7767
        self.other_node = instance.primary_node
7768
        check_nodes = [self.target_node, self.other_node]
7769

    
7770
      elif self.mode == constants.REPLACE_DISK_CHG:
7771
        self.new_node = remote_node
7772
        self.other_node = instance.primary_node
7773
        self.target_node = secondary_node
7774
        check_nodes = [self.new_node, self.other_node]
7775

    
7776
        _CheckNodeNotDrained(self.lu, remote_node)
7777

    
7778
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7779
        assert old_node_info is not None
7780
        if old_node_info.offline and not self.early_release:
7781
          # doesn't make sense to delay the release
7782
          self.early_release = True
7783
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7784
                          " early-release mode", secondary_node)
7785

    
7786
      else:
7787
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7788
                                     self.mode)
7789

    
7790
      # If not specified all disks should be replaced
7791
      if not self.disks:
7792
        self.disks = range(len(self.instance.disks))
7793

    
7794
    for node in check_nodes:
7795
      _CheckNodeOnline(self.lu, node)
7796

    
7797
    # Check whether disks are valid
7798
    for disk_idx in self.disks:
7799
      instance.FindDisk(disk_idx)
7800

    
7801
    # Get secondary node IP addresses
7802
    node_2nd_ip = {}
7803

    
7804
    for node_name in [self.target_node, self.other_node, self.new_node]:
7805
      if node_name is not None:
7806
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7807

    
7808
    self.node_secondary_ip = node_2nd_ip
7809

    
7810
  def Exec(self, feedback_fn):
7811
    """Execute disk replacement.
7812

7813
    This dispatches the disk replacement to the appropriate handler.
7814

7815
    """
7816
    if self.delay_iallocator:
7817
      self._CheckPrereq2()
7818

    
7819
    if not self.disks:
7820
      feedback_fn("No disks need replacement")
7821
      return
7822

    
7823
    feedback_fn("Replacing disk(s) %s for %s" %
7824
                (utils.CommaJoin(self.disks), self.instance.name))
7825

    
7826
    activate_disks = (not self.instance.admin_up)
7827

    
7828
    # Activate the instance disks if we're replacing them on a down instance
7829
    if activate_disks:
7830
      _StartInstanceDisks(self.lu, self.instance, True)
7831

    
7832
    try:
7833
      # Should we replace the secondary node?
7834
      if self.new_node is not None:
7835
        fn = self._ExecDrbd8Secondary
7836
      else:
7837
        fn = self._ExecDrbd8DiskOnly
7838

    
7839
      return fn(feedback_fn)
7840

    
7841
    finally:
7842
      # Deactivate the instance disks if we're replacing them on a
7843
      # down instance
7844
      if activate_disks:
7845
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7846

    
7847
  def _CheckVolumeGroup(self, nodes):
7848
    self.lu.LogInfo("Checking volume groups")
7849

    
7850
    vgname = self.cfg.GetVGName()
7851

    
7852
    # Make sure volume group exists on all involved nodes
7853
    results = self.rpc.call_vg_list(nodes)
7854
    if not results:
7855
      raise errors.OpExecError("Can't list volume groups on the nodes")
7856

    
7857
    for node in nodes:
7858
      res = results[node]
7859
      res.Raise("Error checking node %s" % node)
7860
      if vgname not in res.payload:
7861
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7862
                                 (vgname, node))
7863

    
7864
  def _CheckDisksExistence(self, nodes):
7865
    # Check disk existence
7866
    for idx, dev in enumerate(self.instance.disks):
7867
      if idx not in self.disks:
7868
        continue
7869

    
7870
      for node in nodes:
7871
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7872
        self.cfg.SetDiskID(dev, node)
7873

    
7874
        result = self.rpc.call_blockdev_find(node, dev)
7875

    
7876
        msg = result.fail_msg
7877
        if msg or not result.payload:
7878
          if not msg:
7879
            msg = "disk not found"
7880
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7881
                                   (idx, node, msg))
7882

    
7883
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7884
    for idx, dev in enumerate(self.instance.disks):
7885
      if idx not in self.disks:
7886
        continue
7887

    
7888
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7889
                      (idx, node_name))
7890

    
7891
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7892
                                   ldisk=ldisk):
7893
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7894
                                 " replace disks for instance %s" %
7895
                                 (node_name, self.instance.name))
7896

    
7897
  def _CreateNewStorage(self, node_name):
7898
    vgname = self.cfg.GetVGName()
7899
    iv_names = {}
7900

    
7901
    for idx, dev in enumerate(self.instance.disks):
7902
      if idx not in self.disks:
7903
        continue
7904

    
7905
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7906

    
7907
      self.cfg.SetDiskID(dev, node_name)
7908

    
7909
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7910
      names = _GenerateUniqueNames(self.lu, lv_names)
7911

    
7912
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7913
                             logical_id=(vgname, names[0]))
7914
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7915
                             logical_id=(vgname, names[1]))
7916

    
7917
      new_lvs = [lv_data, lv_meta]
7918
      old_lvs = dev.children
7919
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7920

    
7921
      # we pass force_create=True to force the LVM creation
7922
      for new_lv in new_lvs:
7923
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7924
                        _GetInstanceInfoText(self.instance), False)
7925

    
7926
    return iv_names
7927

    
7928
  def _CheckDevices(self, node_name, iv_names):
7929
    for name, (dev, _, _) in iv_names.iteritems():
7930
      self.cfg.SetDiskID(dev, node_name)
7931

    
7932
      result = self.rpc.call_blockdev_find(node_name, dev)
7933

    
7934
      msg = result.fail_msg
7935
      if msg or not result.payload:
7936
        if not msg:
7937
          msg = "disk not found"
7938
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7939
                                 (name, msg))
7940

    
7941
      if result.payload.is_degraded:
7942
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7943

    
7944
  def _RemoveOldStorage(self, node_name, iv_names):
7945
    for name, (_, old_lvs, _) in iv_names.iteritems():
7946
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7947

    
7948
      for lv in old_lvs:
7949
        self.cfg.SetDiskID(lv, node_name)
7950

    
7951
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7952
        if msg:
7953
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7954
                             hint="remove unused LVs manually")
7955

    
7956
  def _ReleaseNodeLock(self, node_name):
7957
    """Releases the lock for a given node."""
7958
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7959

    
7960
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7961
    """Replace a disk on the primary or secondary for DRBD 8.
7962

7963
    The algorithm for replace is quite complicated:
7964

7965
      1. for each disk to be replaced:
7966

7967
        1. create new LVs on the target node with unique names
7968
        1. detach old LVs from the drbd device
7969
        1. rename old LVs to name_replaced.<time_t>
7970
        1. rename new LVs to old LVs
7971
        1. attach the new LVs (with the old names now) to the drbd device
7972

7973
      1. wait for sync across all devices
7974

7975
      1. for each modified disk:
7976

7977
        1. remove old LVs (which have the name name_replaces.<time_t>)
7978

7979
    Failures are not very well handled.
7980

7981
    """
7982
    steps_total = 6
7983

    
7984
    # Step: check device activation
7985
    self.lu.LogStep(1, steps_total, "Check device existence")
7986
    self._CheckDisksExistence([self.other_node, self.target_node])
7987
    self._CheckVolumeGroup([self.target_node, self.other_node])
7988

    
7989
    # Step: check other node consistency
7990
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7991
    self._CheckDisksConsistency(self.other_node,
7992
                                self.other_node == self.instance.primary_node,
7993
                                False)
7994

    
7995
    # Step: create new storage
7996
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7997
    iv_names = self._CreateNewStorage(self.target_node)
7998

    
7999
    # Step: for each lv, detach+rename*2+attach
8000
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8001
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8002
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8003

    
8004
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8005
                                                     old_lvs)
8006
      result.Raise("Can't detach drbd from local storage on node"
8007
                   " %s for device %s" % (self.target_node, dev.iv_name))
8008
      #dev.children = []
8009
      #cfg.Update(instance)
8010

    
8011
      # ok, we created the new LVs, so now we know we have the needed
8012
      # storage; as such, we proceed on the target node to rename
8013
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8014
      # using the assumption that logical_id == physical_id (which in
8015
      # turn is the unique_id on that node)
8016

    
8017
      # FIXME(iustin): use a better name for the replaced LVs
8018
      temp_suffix = int(time.time())
8019
      ren_fn = lambda d, suff: (d.physical_id[0],
8020
                                d.physical_id[1] + "_replaced-%s" % suff)
8021

    
8022
      # Build the rename list based on what LVs exist on the node
8023
      rename_old_to_new = []
8024
      for to_ren in old_lvs:
8025
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8026
        if not result.fail_msg and result.payload:
8027
          # device exists
8028
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8029

    
8030
      self.lu.LogInfo("Renaming the old LVs on the target node")
8031
      result = self.rpc.call_blockdev_rename(self.target_node,
8032
                                             rename_old_to_new)
8033
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8034

    
8035
      # Now we rename the new LVs to the old LVs
8036
      self.lu.LogInfo("Renaming the new LVs on the target node")
8037
      rename_new_to_old = [(new, old.physical_id)
8038
                           for old, new in zip(old_lvs, new_lvs)]
8039
      result = self.rpc.call_blockdev_rename(self.target_node,
8040
                                             rename_new_to_old)
8041
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8042

    
8043
      for old, new in zip(old_lvs, new_lvs):
8044
        new.logical_id = old.logical_id
8045
        self.cfg.SetDiskID(new, self.target_node)
8046

    
8047
      for disk in old_lvs:
8048
        disk.logical_id = ren_fn(disk, temp_suffix)
8049
        self.cfg.SetDiskID(disk, self.target_node)
8050

    
8051
      # Now that the new lvs have the old name, we can add them to the device
8052
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8053
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8054
                                                  new_lvs)
8055
      msg = result.fail_msg
8056
      if msg:
8057
        for new_lv in new_lvs:
8058
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8059
                                               new_lv).fail_msg
8060
          if msg2:
8061
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8062
                               hint=("cleanup manually the unused logical"
8063
                                     "volumes"))
8064
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8065

    
8066
      dev.children = new_lvs
8067

    
8068
      self.cfg.Update(self.instance, feedback_fn)
8069

    
8070
    cstep = 5
8071
    if self.early_release:
8072
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8073
      cstep += 1
8074
      self._RemoveOldStorage(self.target_node, iv_names)
8075
      # WARNING: we release both node locks here, do not do other RPCs
8076
      # than WaitForSync to the primary node
8077
      self._ReleaseNodeLock([self.target_node, self.other_node])
8078

    
8079
    # Wait for sync
8080
    # This can fail as the old devices are degraded and _WaitForSync
8081
    # does a combined result over all disks, so we don't check its return value
8082
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8083
    cstep += 1
8084
    _WaitForSync(self.lu, self.instance)
8085

    
8086
    # Check all devices manually
8087
    self._CheckDevices(self.instance.primary_node, iv_names)
8088

    
8089
    # Step: remove old storage
8090
    if not self.early_release:
8091
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8092
      cstep += 1
8093
      self._RemoveOldStorage(self.target_node, iv_names)
8094

    
8095
  def _ExecDrbd8Secondary(self, feedback_fn):
8096
    """Replace the secondary node for DRBD 8.
8097

8098
    The algorithm for replace is quite complicated:
8099
      - for all disks of the instance:
8100
        - create new LVs on the new node with same names
8101
        - shutdown the drbd device on the old secondary
8102
        - disconnect the drbd network on the primary
8103
        - create the drbd device on the new secondary
8104
        - network attach the drbd on the primary, using an artifice:
8105
          the drbd code for Attach() will connect to the network if it
8106
          finds a device which is connected to the good local disks but
8107
          not network enabled
8108
      - wait for sync across all devices
8109
      - remove all disks from the old secondary
8110

8111
    Failures are not very well handled.
8112

8113
    """
8114
    steps_total = 6
8115

    
8116
    # Step: check device activation
8117
    self.lu.LogStep(1, steps_total, "Check device existence")
8118
    self._CheckDisksExistence([self.instance.primary_node])
8119
    self._CheckVolumeGroup([self.instance.primary_node])
8120

    
8121
    # Step: check other node consistency
8122
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8123
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8124

    
8125
    # Step: create new storage
8126
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8127
    for idx, dev in enumerate(self.instance.disks):
8128
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8129
                      (self.new_node, idx))
8130
      # we pass force_create=True to force LVM creation
8131
      for new_lv in dev.children:
8132
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8133
                        _GetInstanceInfoText(self.instance), False)
8134

    
8135
    # Step 4: dbrd minors and drbd setups changes
8136
    # after this, we must manually remove the drbd minors on both the
8137
    # error and the success paths
8138
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8139
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8140
                                         for dev in self.instance.disks],
8141
                                        self.instance.name)
8142
    logging.debug("Allocated minors %r", minors)
8143

    
8144
    iv_names = {}
8145
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8146
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8147
                      (self.new_node, idx))
8148
      # create new devices on new_node; note that we create two IDs:
8149
      # one without port, so the drbd will be activated without
8150
      # networking information on the new node at this stage, and one
8151
      # with network, for the latter activation in step 4
8152
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8153
      if self.instance.primary_node == o_node1:
8154
        p_minor = o_minor1
8155
      else:
8156
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8157
        p_minor = o_minor2
8158

    
8159
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8160
                      p_minor, new_minor, o_secret)
8161
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8162
                    p_minor, new_minor, o_secret)
8163

    
8164
      iv_names[idx] = (dev, dev.children, new_net_id)
8165
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8166
                    new_net_id)
8167
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8168
                              logical_id=new_alone_id,
8169
                              children=dev.children,
8170
                              size=dev.size)
8171
      try:
8172
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8173
                              _GetInstanceInfoText(self.instance), False)
8174
      except errors.GenericError:
8175
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8176
        raise
8177

    
8178
    # We have new devices, shutdown the drbd on the old secondary
8179
    for idx, dev in enumerate(self.instance.disks):
8180
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8181
      self.cfg.SetDiskID(dev, self.target_node)
8182
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8183
      if msg:
8184
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8185
                           "node: %s" % (idx, msg),
8186
                           hint=("Please cleanup this device manually as"
8187
                                 " soon as possible"))
8188

    
8189
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8190
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8191
                                               self.node_secondary_ip,
8192
                                               self.instance.disks)\
8193
                                              [self.instance.primary_node]
8194

    
8195
    msg = result.fail_msg
8196
    if msg:
8197
      # detaches didn't succeed (unlikely)
8198
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8199
      raise errors.OpExecError("Can't detach the disks from the network on"
8200
                               " old node: %s" % (msg,))
8201

    
8202
    # if we managed to detach at least one, we update all the disks of
8203
    # the instance to point to the new secondary
8204
    self.lu.LogInfo("Updating instance configuration")
8205
    for dev, _, new_logical_id in iv_names.itervalues():
8206
      dev.logical_id = new_logical_id
8207
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8208

    
8209
    self.cfg.Update(self.instance, feedback_fn)
8210

    
8211
    # and now perform the drbd attach
8212
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8213
                    " (standalone => connected)")
8214
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8215
                                            self.new_node],
8216
                                           self.node_secondary_ip,
8217
                                           self.instance.disks,
8218
                                           self.instance.name,
8219
                                           False)
8220
    for to_node, to_result in result.items():
8221
      msg = to_result.fail_msg
8222
      if msg:
8223
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8224
                           to_node, msg,
8225
                           hint=("please do a gnt-instance info to see the"
8226
                                 " status of disks"))
8227
    cstep = 5
8228
    if self.early_release:
8229
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8230
      cstep += 1
8231
      self._RemoveOldStorage(self.target_node, iv_names)
8232
      # WARNING: we release all node locks here, do not do other RPCs
8233
      # than WaitForSync to the primary node
8234
      self._ReleaseNodeLock([self.instance.primary_node,
8235
                             self.target_node,
8236
                             self.new_node])
8237

    
8238
    # Wait for sync
8239
    # This can fail as the old devices are degraded and _WaitForSync
8240
    # does a combined result over all disks, so we don't check its return value
8241
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8242
    cstep += 1
8243
    _WaitForSync(self.lu, self.instance)
8244

    
8245
    # Check all devices manually
8246
    self._CheckDevices(self.instance.primary_node, iv_names)
8247

    
8248
    # Step: remove old storage
8249
    if not self.early_release:
8250
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8251
      self._RemoveOldStorage(self.target_node, iv_names)
8252

    
8253

    
8254
class LURepairNodeStorage(NoHooksLU):
8255
  """Repairs the volume group on a node.
8256

8257
  """
8258
  _OP_PARAMS = [
8259
    _PNodeName,
8260
    ("storage_type", ht.NoDefault, _CheckStorageType),
8261
    ("name", ht.NoDefault, ht.TNonEmptyString),
8262
    ("ignore_consistency", False, ht.TBool),
8263
    ]
8264
  REQ_BGL = False
8265

    
8266
  def CheckArguments(self):
8267
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8268

    
8269
    storage_type = self.op.storage_type
8270

    
8271
    if (constants.SO_FIX_CONSISTENCY not in
8272
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8273
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8274
                                 " repaired" % storage_type,
8275
                                 errors.ECODE_INVAL)
8276

    
8277
  def ExpandNames(self):
8278
    self.needed_locks = {
8279
      locking.LEVEL_NODE: [self.op.node_name],
8280
      }
8281

    
8282
  def _CheckFaultyDisks(self, instance, node_name):
8283
    """Ensure faulty disks abort the opcode or at least warn."""
8284
    try:
8285
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8286
                                  node_name, True):
8287
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8288
                                   " node '%s'" % (instance.name, node_name),
8289
                                   errors.ECODE_STATE)
8290
    except errors.OpPrereqError, err:
8291
      if self.op.ignore_consistency:
8292
        self.proc.LogWarning(str(err.args[0]))
8293
      else:
8294
        raise
8295

    
8296
  def CheckPrereq(self):
8297
    """Check prerequisites.
8298

8299
    """
8300
    # Check whether any instance on this node has faulty disks
8301
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8302
      if not inst.admin_up:
8303
        continue
8304
      check_nodes = set(inst.all_nodes)
8305
      check_nodes.discard(self.op.node_name)
8306
      for inst_node_name in check_nodes:
8307
        self._CheckFaultyDisks(inst, inst_node_name)
8308

    
8309
  def Exec(self, feedback_fn):
8310
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8311
                (self.op.name, self.op.node_name))
8312

    
8313
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8314
    result = self.rpc.call_storage_execute(self.op.node_name,
8315
                                           self.op.storage_type, st_args,
8316
                                           self.op.name,
8317
                                           constants.SO_FIX_CONSISTENCY)
8318
    result.Raise("Failed to repair storage unit '%s' on %s" %
8319
                 (self.op.name, self.op.node_name))
8320

    
8321

    
8322
class LUNodeEvacuationStrategy(NoHooksLU):
8323
  """Computes the node evacuation strategy.
8324

8325
  """
8326
  _OP_PARAMS = [
8327
    ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8328
    ("remote_node", None, ht.TMaybeString),
8329
    ("iallocator", None, ht.TMaybeString),
8330
    ]
8331
  REQ_BGL = False
8332

    
8333
  def CheckArguments(self):
8334
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8335

    
8336
  def ExpandNames(self):
8337
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8338
    self.needed_locks = locks = {}
8339
    if self.op.remote_node is None:
8340
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8341
    else:
8342
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8343
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8344

    
8345
  def Exec(self, feedback_fn):
8346
    if self.op.remote_node is not None:
8347
      instances = []
8348
      for node in self.op.nodes:
8349
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8350
      result = []
8351
      for i in instances:
8352
        if i.primary_node == self.op.remote_node:
8353
          raise errors.OpPrereqError("Node %s is the primary node of"
8354
                                     " instance %s, cannot use it as"
8355
                                     " secondary" %
8356
                                     (self.op.remote_node, i.name),
8357
                                     errors.ECODE_INVAL)
8358
        result.append([i.name, self.op.remote_node])
8359
    else:
8360
      ial = IAllocator(self.cfg, self.rpc,
8361
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8362
                       evac_nodes=self.op.nodes)
8363
      ial.Run(self.op.iallocator, validate=True)
8364
      if not ial.success:
8365
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8366
                                 errors.ECODE_NORES)
8367
      result = ial.result
8368
    return result
8369

    
8370

    
8371
class LUGrowDisk(LogicalUnit):
8372
  """Grow a disk of an instance.
8373

8374
  """
8375
  HPATH = "disk-grow"
8376
  HTYPE = constants.HTYPE_INSTANCE
8377
  _OP_PARAMS = [
8378
    _PInstanceName,
8379
    ("disk", ht.NoDefault, ht.TInt),
8380
    ("amount", ht.NoDefault, ht.TInt),
8381
    ("wait_for_sync", True, ht.TBool),
8382
    ]
8383
  REQ_BGL = False
8384

    
8385
  def ExpandNames(self):
8386
    self._ExpandAndLockInstance()
8387
    self.needed_locks[locking.LEVEL_NODE] = []
8388
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8389

    
8390
  def DeclareLocks(self, level):
8391
    if level == locking.LEVEL_NODE:
8392
      self._LockInstancesNodes()
8393

    
8394
  def BuildHooksEnv(self):
8395
    """Build hooks env.
8396

8397
    This runs on the master, the primary and all the secondaries.
8398

8399
    """
8400
    env = {
8401
      "DISK": self.op.disk,
8402
      "AMOUNT": self.op.amount,
8403
      }
8404
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8405
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8406
    return env, nl, nl
8407

    
8408
  def CheckPrereq(self):
8409
    """Check prerequisites.
8410

8411
    This checks that the instance is in the cluster.
8412

8413
    """
8414
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8415
    assert instance is not None, \
8416
      "Cannot retrieve locked instance %s" % self.op.instance_name
8417
    nodenames = list(instance.all_nodes)
8418
    for node in nodenames:
8419
      _CheckNodeOnline(self, node)
8420

    
8421
    self.instance = instance
8422

    
8423
    if instance.disk_template not in constants.DTS_GROWABLE:
8424
      raise errors.OpPrereqError("Instance's disk layout does not support"
8425
                                 " growing.", errors.ECODE_INVAL)
8426

    
8427
    self.disk = instance.FindDisk(self.op.disk)
8428

    
8429
    if instance.disk_template != constants.DT_FILE:
8430
      # TODO: check the free disk space for file, when that feature will be
8431
      # supported
8432
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8433

    
8434
  def Exec(self, feedback_fn):
8435
    """Execute disk grow.
8436

8437
    """
8438
    instance = self.instance
8439
    disk = self.disk
8440

    
8441
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8442
    if not disks_ok:
8443
      raise errors.OpExecError("Cannot activate block device to grow")
8444

    
8445
    for node in instance.all_nodes:
8446
      self.cfg.SetDiskID(disk, node)
8447
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8448
      result.Raise("Grow request failed to node %s" % node)
8449

    
8450
      # TODO: Rewrite code to work properly
8451
      # DRBD goes into sync mode for a short amount of time after executing the
8452
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8453
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8454
      # time is a work-around.
8455
      time.sleep(5)
8456

    
8457
    disk.RecordGrow(self.op.amount)
8458
    self.cfg.Update(instance, feedback_fn)
8459
    if self.op.wait_for_sync:
8460
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8461
      if disk_abort:
8462
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8463
                             " status.\nPlease check the instance.")
8464
      if not instance.admin_up:
8465
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8466
    elif not instance.admin_up:
8467
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8468
                           " not supposed to be running because no wait for"
8469
                           " sync mode was requested.")
8470

    
8471

    
8472
class LUQueryInstanceData(NoHooksLU):
8473
  """Query runtime instance data.
8474

8475
  """
8476
  _OP_PARAMS = [
8477
    ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
8478
    ("static", False, ht.TBool),
8479
    ]
8480
  REQ_BGL = False
8481

    
8482
  def ExpandNames(self):
8483
    self.needed_locks = {}
8484
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8485

    
8486
    if self.op.instances:
8487
      self.wanted_names = []
8488
      for name in self.op.instances:
8489
        full_name = _ExpandInstanceName(self.cfg, name)
8490
        self.wanted_names.append(full_name)
8491
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8492
    else:
8493
      self.wanted_names = None
8494
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8495

    
8496
    self.needed_locks[locking.LEVEL_NODE] = []
8497
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8498

    
8499
  def DeclareLocks(self, level):
8500
    if level == locking.LEVEL_NODE:
8501
      self._LockInstancesNodes()
8502

    
8503
  def CheckPrereq(self):
8504
    """Check prerequisites.
8505

8506
    This only checks the optional instance list against the existing names.
8507

8508
    """
8509
    if self.wanted_names is None:
8510
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8511

    
8512
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8513
                             in self.wanted_names]
8514

    
8515
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8516
    """Returns the status of a block device
8517

8518
    """
8519
    if self.op.static or not node:
8520
      return None
8521

    
8522
    self.cfg.SetDiskID(dev, node)
8523

    
8524
    result = self.rpc.call_blockdev_find(node, dev)
8525
    if result.offline:
8526
      return None
8527

    
8528
    result.Raise("Can't compute disk status for %s" % instance_name)
8529

    
8530
    status = result.payload
8531
    if status is None:
8532
      return None
8533

    
8534
    return (status.dev_path, status.major, status.minor,
8535
            status.sync_percent, status.estimated_time,
8536
            status.is_degraded, status.ldisk_status)
8537

    
8538
  def _ComputeDiskStatus(self, instance, snode, dev):
8539
    """Compute block device status.
8540

8541
    """
8542
    if dev.dev_type in constants.LDS_DRBD:
8543
      # we change the snode then (otherwise we use the one passed in)
8544
      if dev.logical_id[0] == instance.primary_node:
8545
        snode = dev.logical_id[1]
8546
      else:
8547
        snode = dev.logical_id[0]
8548

    
8549
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8550
                                              instance.name, dev)
8551
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8552

    
8553
    if dev.children:
8554
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8555
                      for child in dev.children]
8556
    else:
8557
      dev_children = []
8558

    
8559
    data = {
8560
      "iv_name": dev.iv_name,
8561
      "dev_type": dev.dev_type,
8562
      "logical_id": dev.logical_id,
8563
      "physical_id": dev.physical_id,
8564
      "pstatus": dev_pstatus,
8565
      "sstatus": dev_sstatus,
8566
      "children": dev_children,
8567
      "mode": dev.mode,
8568
      "size": dev.size,
8569
      }
8570

    
8571
    return data
8572

    
8573
  def Exec(self, feedback_fn):
8574
    """Gather and return data"""
8575
    result = {}
8576

    
8577
    cluster = self.cfg.GetClusterInfo()
8578

    
8579
    for instance in self.wanted_instances:
8580
      if not self.op.static:
8581
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8582
                                                  instance.name,
8583
                                                  instance.hypervisor)
8584
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8585
        remote_info = remote_info.payload
8586
        if remote_info and "state" in remote_info:
8587
          remote_state = "up"
8588
        else:
8589
          remote_state = "down"
8590
      else:
8591
        remote_state = None
8592
      if instance.admin_up:
8593
        config_state = "up"
8594
      else:
8595
        config_state = "down"
8596

    
8597
      disks = [self._ComputeDiskStatus(instance, None, device)
8598
               for device in instance.disks]
8599

    
8600
      idict = {
8601
        "name": instance.name,
8602
        "config_state": config_state,
8603
        "run_state": remote_state,
8604
        "pnode": instance.primary_node,
8605
        "snodes": instance.secondary_nodes,
8606
        "os": instance.os,
8607
        # this happens to be the same format used for hooks
8608
        "nics": _NICListToTuple(self, instance.nics),
8609
        "disk_template": instance.disk_template,
8610
        "disks": disks,
8611
        "hypervisor": instance.hypervisor,
8612
        "network_port": instance.network_port,
8613
        "hv_instance": instance.hvparams,
8614
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8615
        "be_instance": instance.beparams,
8616
        "be_actual": cluster.FillBE(instance),
8617
        "os_instance": instance.osparams,
8618
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8619
        "serial_no": instance.serial_no,
8620
        "mtime": instance.mtime,
8621
        "ctime": instance.ctime,
8622
        "uuid": instance.uuid,
8623
        }
8624

    
8625
      result[instance.name] = idict
8626

    
8627
    return result
8628

    
8629

    
8630
class LUSetInstanceParams(LogicalUnit):
8631
  """Modifies an instances's parameters.
8632

8633
  """
8634
  HPATH = "instance-modify"
8635
  HTYPE = constants.HTYPE_INSTANCE
8636
  _OP_PARAMS = [
8637
    _PInstanceName,
8638
    ("nics", ht.EmptyList, ht.TList),
8639
    ("disks", ht.EmptyList, ht.TList),
8640
    ("beparams", ht.EmptyDict, ht.TDict),
8641
    ("hvparams", ht.EmptyDict, ht.TDict),
8642
    ("disk_template", None, ht.TMaybeString),
8643
    ("remote_node", None, ht.TMaybeString),
8644
    ("os_name", None, ht.TMaybeString),
8645
    ("force_variant", False, ht.TBool),
8646
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
8647
    _PForce,
8648
    ]
8649
  REQ_BGL = False
8650

    
8651
  def CheckArguments(self):
8652
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8653
            self.op.hvparams or self.op.beparams or self.op.os_name):
8654
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8655

    
8656
    if self.op.hvparams:
8657
      _CheckGlobalHvParams(self.op.hvparams)
8658

    
8659
    # Disk validation
8660
    disk_addremove = 0
8661
    for disk_op, disk_dict in self.op.disks:
8662
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8663
      if disk_op == constants.DDM_REMOVE:
8664
        disk_addremove += 1
8665
        continue
8666
      elif disk_op == constants.DDM_ADD:
8667
        disk_addremove += 1
8668
      else:
8669
        if not isinstance(disk_op, int):
8670
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8671
        if not isinstance(disk_dict, dict):
8672
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8673
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8674

    
8675
      if disk_op == constants.DDM_ADD:
8676
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8677
        if mode not in constants.DISK_ACCESS_SET:
8678
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8679
                                     errors.ECODE_INVAL)
8680
        size = disk_dict.get('size', None)
8681
        if size is None:
8682
          raise errors.OpPrereqError("Required disk parameter size missing",
8683
                                     errors.ECODE_INVAL)
8684
        try:
8685
          size = int(size)
8686
        except (TypeError, ValueError), err:
8687
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8688
                                     str(err), errors.ECODE_INVAL)
8689
        disk_dict['size'] = size
8690
      else:
8691
        # modification of disk
8692
        if 'size' in disk_dict:
8693
          raise errors.OpPrereqError("Disk size change not possible, use"
8694
                                     " grow-disk", errors.ECODE_INVAL)
8695

    
8696
    if disk_addremove > 1:
8697
      raise errors.OpPrereqError("Only one disk add or remove operation"
8698
                                 " supported at a time", errors.ECODE_INVAL)
8699

    
8700
    if self.op.disks and self.op.disk_template is not None:
8701
      raise errors.OpPrereqError("Disk template conversion and other disk"
8702
                                 " changes not supported at the same time",
8703
                                 errors.ECODE_INVAL)
8704

    
8705
    if self.op.disk_template:
8706
      _CheckDiskTemplate(self.op.disk_template)
8707
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8708
          self.op.remote_node is None):
8709
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8710
                                   " one requires specifying a secondary node",
8711
                                   errors.ECODE_INVAL)
8712

    
8713
    # NIC validation
8714
    nic_addremove = 0
8715
    for nic_op, nic_dict in self.op.nics:
8716
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8717
      if nic_op == constants.DDM_REMOVE:
8718
        nic_addremove += 1
8719
        continue
8720
      elif nic_op == constants.DDM_ADD:
8721
        nic_addremove += 1
8722
      else:
8723
        if not isinstance(nic_op, int):
8724
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8725
        if not isinstance(nic_dict, dict):
8726
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8727
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8728

    
8729
      # nic_dict should be a dict
8730
      nic_ip = nic_dict.get('ip', None)
8731
      if nic_ip is not None:
8732
        if nic_ip.lower() == constants.VALUE_NONE:
8733
          nic_dict['ip'] = None
8734
        else:
8735
          if not netutils.IPAddress.IsValid(nic_ip):
8736
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8737
                                       errors.ECODE_INVAL)
8738

    
8739
      nic_bridge = nic_dict.get('bridge', None)
8740
      nic_link = nic_dict.get('link', None)
8741
      if nic_bridge and nic_link:
8742
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8743
                                   " at the same time", errors.ECODE_INVAL)
8744
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8745
        nic_dict['bridge'] = None
8746
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8747
        nic_dict['link'] = None
8748

    
8749
      if nic_op == constants.DDM_ADD:
8750
        nic_mac = nic_dict.get('mac', None)
8751
        if nic_mac is None:
8752
          nic_dict['mac'] = constants.VALUE_AUTO
8753

    
8754
      if 'mac' in nic_dict:
8755
        nic_mac = nic_dict['mac']
8756
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8757
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8758

    
8759
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8760
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8761
                                     " modifying an existing nic",
8762
                                     errors.ECODE_INVAL)
8763

    
8764
    if nic_addremove > 1:
8765
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8766
                                 " supported at a time", errors.ECODE_INVAL)
8767

    
8768
  def ExpandNames(self):
8769
    self._ExpandAndLockInstance()
8770
    self.needed_locks[locking.LEVEL_NODE] = []
8771
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8772

    
8773
  def DeclareLocks(self, level):
8774
    if level == locking.LEVEL_NODE:
8775
      self._LockInstancesNodes()
8776
      if self.op.disk_template and self.op.remote_node:
8777
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8778
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8779

    
8780
  def BuildHooksEnv(self):
8781
    """Build hooks env.
8782

8783
    This runs on the master, primary and secondaries.
8784

8785
    """
8786
    args = dict()
8787
    if constants.BE_MEMORY in self.be_new:
8788
      args['memory'] = self.be_new[constants.BE_MEMORY]
8789
    if constants.BE_VCPUS in self.be_new:
8790
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8791
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8792
    # information at all.
8793
    if self.op.nics:
8794
      args['nics'] = []
8795
      nic_override = dict(self.op.nics)
8796
      for idx, nic in enumerate(self.instance.nics):
8797
        if idx in nic_override:
8798
          this_nic_override = nic_override[idx]
8799
        else:
8800
          this_nic_override = {}
8801
        if 'ip' in this_nic_override:
8802
          ip = this_nic_override['ip']
8803
        else:
8804
          ip = nic.ip
8805
        if 'mac' in this_nic_override:
8806
          mac = this_nic_override['mac']
8807
        else:
8808
          mac = nic.mac
8809
        if idx in self.nic_pnew:
8810
          nicparams = self.nic_pnew[idx]
8811
        else:
8812
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8813
        mode = nicparams[constants.NIC_MODE]
8814
        link = nicparams[constants.NIC_LINK]
8815
        args['nics'].append((ip, mac, mode, link))
8816
      if constants.DDM_ADD in nic_override:
8817
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8818
        mac = nic_override[constants.DDM_ADD]['mac']
8819
        nicparams = self.nic_pnew[constants.DDM_ADD]
8820
        mode = nicparams[constants.NIC_MODE]
8821
        link = nicparams[constants.NIC_LINK]
8822
        args['nics'].append((ip, mac, mode, link))
8823
      elif constants.DDM_REMOVE in nic_override:
8824
        del args['nics'][-1]
8825

    
8826
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8827
    if self.op.disk_template:
8828
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8829
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8830
    return env, nl, nl
8831

    
8832
  def CheckPrereq(self):
8833
    """Check prerequisites.
8834

8835
    This only checks the instance list against the existing names.
8836

8837
    """
8838
    # checking the new params on the primary/secondary nodes
8839

    
8840
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8841
    cluster = self.cluster = self.cfg.GetClusterInfo()
8842
    assert self.instance is not None, \
8843
      "Cannot retrieve locked instance %s" % self.op.instance_name
8844
    pnode = instance.primary_node
8845
    nodelist = list(instance.all_nodes)
8846

    
8847
    # OS change
8848
    if self.op.os_name and not self.op.force:
8849
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8850
                      self.op.force_variant)
8851
      instance_os = self.op.os_name
8852
    else:
8853
      instance_os = instance.os
8854

    
8855
    if self.op.disk_template:
8856
      if instance.disk_template == self.op.disk_template:
8857
        raise errors.OpPrereqError("Instance already has disk template %s" %
8858
                                   instance.disk_template, errors.ECODE_INVAL)
8859

    
8860
      if (instance.disk_template,
8861
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8862
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8863
                                   " %s to %s" % (instance.disk_template,
8864
                                                  self.op.disk_template),
8865
                                   errors.ECODE_INVAL)
8866
      _CheckInstanceDown(self, instance, "cannot change disk template")
8867
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8868
        if self.op.remote_node == pnode:
8869
          raise errors.OpPrereqError("Given new secondary node %s is the same"
8870
                                     " as the primary node of the instance" %
8871
                                     self.op.remote_node, errors.ECODE_STATE)
8872
        _CheckNodeOnline(self, self.op.remote_node)
8873
        _CheckNodeNotDrained(self, self.op.remote_node)
8874
        disks = [{"size": d.size} for d in instance.disks]
8875
        required = _ComputeDiskSize(self.op.disk_template, disks)
8876
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8877

    
8878
    # hvparams processing
8879
    if self.op.hvparams:
8880
      hv_type = instance.hypervisor
8881
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8882
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8883
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8884

    
8885
      # local check
8886
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8887
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8888
      self.hv_new = hv_new # the new actual values
8889
      self.hv_inst = i_hvdict # the new dict (without defaults)
8890
    else:
8891
      self.hv_new = self.hv_inst = {}
8892

    
8893
    # beparams processing
8894
    if self.op.beparams:
8895
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8896
                                   use_none=True)
8897
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8898
      be_new = cluster.SimpleFillBE(i_bedict)
8899
      self.be_new = be_new # the new actual values
8900
      self.be_inst = i_bedict # the new dict (without defaults)
8901
    else:
8902
      self.be_new = self.be_inst = {}
8903

    
8904
    # osparams processing
8905
    if self.op.osparams:
8906
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8907
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8908
      self.os_inst = i_osdict # the new dict (without defaults)
8909
    else:
8910
      self.os_inst = {}
8911

    
8912
    self.warn = []
8913

    
8914
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8915
      mem_check_list = [pnode]
8916
      if be_new[constants.BE_AUTO_BALANCE]:
8917
        # either we changed auto_balance to yes or it was from before
8918
        mem_check_list.extend(instance.secondary_nodes)
8919
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8920
                                                  instance.hypervisor)
8921
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8922
                                         instance.hypervisor)
8923
      pninfo = nodeinfo[pnode]
8924
      msg = pninfo.fail_msg
8925
      if msg:
8926
        # Assume the primary node is unreachable and go ahead
8927
        self.warn.append("Can't get info from primary node %s: %s" %
8928
                         (pnode,  msg))
8929
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8930
        self.warn.append("Node data from primary node %s doesn't contain"
8931
                         " free memory information" % pnode)
8932
      elif instance_info.fail_msg:
8933
        self.warn.append("Can't get instance runtime information: %s" %
8934
                        instance_info.fail_msg)
8935
      else:
8936
        if instance_info.payload:
8937
          current_mem = int(instance_info.payload['memory'])
8938
        else:
8939
          # Assume instance not running
8940
          # (there is a slight race condition here, but it's not very probable,
8941
          # and we have no other way to check)
8942
          current_mem = 0
8943
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8944
                    pninfo.payload['memory_free'])
8945
        if miss_mem > 0:
8946
          raise errors.OpPrereqError("This change will prevent the instance"
8947
                                     " from starting, due to %d MB of memory"
8948
                                     " missing on its primary node" % miss_mem,
8949
                                     errors.ECODE_NORES)
8950

    
8951
      if be_new[constants.BE_AUTO_BALANCE]:
8952
        for node, nres in nodeinfo.items():
8953
          if node not in instance.secondary_nodes:
8954
            continue
8955
          msg = nres.fail_msg
8956
          if msg:
8957
            self.warn.append("Can't get info from secondary node %s: %s" %
8958
                             (node, msg))
8959
          elif not isinstance(nres.payload.get('memory_free', None), int):
8960
            self.warn.append("Secondary node %s didn't return free"
8961
                             " memory information" % node)
8962
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8963
            self.warn.append("Not enough memory to failover instance to"
8964
                             " secondary node %s" % node)
8965

    
8966
    # NIC processing
8967
    self.nic_pnew = {}
8968
    self.nic_pinst = {}
8969
    for nic_op, nic_dict in self.op.nics:
8970
      if nic_op == constants.DDM_REMOVE:
8971
        if not instance.nics:
8972
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8973
                                     errors.ECODE_INVAL)
8974
        continue
8975
      if nic_op != constants.DDM_ADD:
8976
        # an existing nic
8977
        if not instance.nics:
8978
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8979
                                     " no NICs" % nic_op,
8980
                                     errors.ECODE_INVAL)
8981
        if nic_op < 0 or nic_op >= len(instance.nics):
8982
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8983
                                     " are 0 to %d" %
8984
                                     (nic_op, len(instance.nics) - 1),
8985
                                     errors.ECODE_INVAL)
8986
        old_nic_params = instance.nics[nic_op].nicparams
8987
        old_nic_ip = instance.nics[nic_op].ip
8988
      else:
8989
        old_nic_params = {}
8990
        old_nic_ip = None
8991

    
8992
      update_params_dict = dict([(key, nic_dict[key])
8993
                                 for key in constants.NICS_PARAMETERS
8994
                                 if key in nic_dict])
8995

    
8996
      if 'bridge' in nic_dict:
8997
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8998

    
8999
      new_nic_params = _GetUpdatedParams(old_nic_params,
9000
                                         update_params_dict)
9001
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9002
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9003
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9004
      self.nic_pinst[nic_op] = new_nic_params
9005
      self.nic_pnew[nic_op] = new_filled_nic_params
9006
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9007

    
9008
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9009
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9010
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9011
        if msg:
9012
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9013
          if self.op.force:
9014
            self.warn.append(msg)
9015
          else:
9016
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9017
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9018
        if 'ip' in nic_dict:
9019
          nic_ip = nic_dict['ip']
9020
        else:
9021
          nic_ip = old_nic_ip
9022
        if nic_ip is None:
9023
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9024
                                     ' on a routed nic', errors.ECODE_INVAL)
9025
      if 'mac' in nic_dict:
9026
        nic_mac = nic_dict['mac']
9027
        if nic_mac is None:
9028
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9029
                                     errors.ECODE_INVAL)
9030
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9031
          # otherwise generate the mac
9032
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9033
        else:
9034
          # or validate/reserve the current one
9035
          try:
9036
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9037
          except errors.ReservationError:
9038
            raise errors.OpPrereqError("MAC address %s already in use"
9039
                                       " in cluster" % nic_mac,
9040
                                       errors.ECODE_NOTUNIQUE)
9041

    
9042
    # DISK processing
9043
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9044
      raise errors.OpPrereqError("Disk operations not supported for"
9045
                                 " diskless instances",
9046
                                 errors.ECODE_INVAL)
9047
    for disk_op, _ in self.op.disks:
9048
      if disk_op == constants.DDM_REMOVE:
9049
        if len(instance.disks) == 1:
9050
          raise errors.OpPrereqError("Cannot remove the last disk of"
9051
                                     " an instance", errors.ECODE_INVAL)
9052
        _CheckInstanceDown(self, instance, "cannot remove disks")
9053

    
9054
      if (disk_op == constants.DDM_ADD and
9055
          len(instance.nics) >= constants.MAX_DISKS):
9056
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9057
                                   " add more" % constants.MAX_DISKS,
9058
                                   errors.ECODE_STATE)
9059
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9060
        # an existing disk
9061
        if disk_op < 0 or disk_op >= len(instance.disks):
9062
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9063
                                     " are 0 to %d" %
9064
                                     (disk_op, len(instance.disks)),
9065
                                     errors.ECODE_INVAL)
9066

    
9067
    return
9068

    
9069
  def _ConvertPlainToDrbd(self, feedback_fn):
9070
    """Converts an instance from plain to drbd.
9071

9072
    """
9073
    feedback_fn("Converting template to drbd")
9074
    instance = self.instance
9075
    pnode = instance.primary_node
9076
    snode = self.op.remote_node
9077

    
9078
    # create a fake disk info for _GenerateDiskTemplate
9079
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9080
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9081
                                      instance.name, pnode, [snode],
9082
                                      disk_info, None, None, 0)
9083
    info = _GetInstanceInfoText(instance)
9084
    feedback_fn("Creating aditional volumes...")
9085
    # first, create the missing data and meta devices
9086
    for disk in new_disks:
9087
      # unfortunately this is... not too nice
9088
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9089
                            info, True)
9090
      for child in disk.children:
9091
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9092
    # at this stage, all new LVs have been created, we can rename the
9093
    # old ones
9094
    feedback_fn("Renaming original volumes...")
9095
    rename_list = [(o, n.children[0].logical_id)
9096
                   for (o, n) in zip(instance.disks, new_disks)]
9097
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9098
    result.Raise("Failed to rename original LVs")
9099

    
9100
    feedback_fn("Initializing DRBD devices...")
9101
    # all child devices are in place, we can now create the DRBD devices
9102
    for disk in new_disks:
9103
      for node in [pnode, snode]:
9104
        f_create = node == pnode
9105
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9106

    
9107
    # at this point, the instance has been modified
9108
    instance.disk_template = constants.DT_DRBD8
9109
    instance.disks = new_disks
9110
    self.cfg.Update(instance, feedback_fn)
9111

    
9112
    # disks are created, waiting for sync
9113
    disk_abort = not _WaitForSync(self, instance)
9114
    if disk_abort:
9115
      raise errors.OpExecError("There are some degraded disks for"
9116
                               " this instance, please cleanup manually")
9117

    
9118
  def _ConvertDrbdToPlain(self, feedback_fn):
9119
    """Converts an instance from drbd to plain.
9120

9121
    """
9122
    instance = self.instance
9123
    assert len(instance.secondary_nodes) == 1
9124
    pnode = instance.primary_node
9125
    snode = instance.secondary_nodes[0]
9126
    feedback_fn("Converting template to plain")
9127

    
9128
    old_disks = instance.disks
9129
    new_disks = [d.children[0] for d in old_disks]
9130

    
9131
    # copy over size and mode
9132
    for parent, child in zip(old_disks, new_disks):
9133
      child.size = parent.size
9134
      child.mode = parent.mode
9135

    
9136
    # update instance structure
9137
    instance.disks = new_disks
9138
    instance.disk_template = constants.DT_PLAIN
9139
    self.cfg.Update(instance, feedback_fn)
9140

    
9141
    feedback_fn("Removing volumes on the secondary node...")
9142
    for disk in old_disks:
9143
      self.cfg.SetDiskID(disk, snode)
9144
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9145
      if msg:
9146
        self.LogWarning("Could not remove block device %s on node %s,"
9147
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9148

    
9149
    feedback_fn("Removing unneeded volumes on the primary node...")
9150
    for idx, disk in enumerate(old_disks):
9151
      meta = disk.children[1]
9152
      self.cfg.SetDiskID(meta, pnode)
9153
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9154
      if msg:
9155
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9156
                        " continuing anyway: %s", idx, pnode, msg)
9157

    
9158

    
9159
  def Exec(self, feedback_fn):
9160
    """Modifies an instance.
9161

9162
    All parameters take effect only at the next restart of the instance.
9163

9164
    """
9165
    # Process here the warnings from CheckPrereq, as we don't have a
9166
    # feedback_fn there.
9167
    for warn in self.warn:
9168
      feedback_fn("WARNING: %s" % warn)
9169

    
9170
    result = []
9171
    instance = self.instance
9172
    # disk changes
9173
    for disk_op, disk_dict in self.op.disks:
9174
      if disk_op == constants.DDM_REMOVE:
9175
        # remove the last disk
9176
        device = instance.disks.pop()
9177
        device_idx = len(instance.disks)
9178
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9179
          self.cfg.SetDiskID(disk, node)
9180
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9181
          if msg:
9182
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9183
                            " continuing anyway", device_idx, node, msg)
9184
        result.append(("disk/%d" % device_idx, "remove"))
9185
      elif disk_op == constants.DDM_ADD:
9186
        # add a new disk
9187
        if instance.disk_template == constants.DT_FILE:
9188
          file_driver, file_path = instance.disks[0].logical_id
9189
          file_path = os.path.dirname(file_path)
9190
        else:
9191
          file_driver = file_path = None
9192
        disk_idx_base = len(instance.disks)
9193
        new_disk = _GenerateDiskTemplate(self,
9194
                                         instance.disk_template,
9195
                                         instance.name, instance.primary_node,
9196
                                         instance.secondary_nodes,
9197
                                         [disk_dict],
9198
                                         file_path,
9199
                                         file_driver,
9200
                                         disk_idx_base)[0]
9201
        instance.disks.append(new_disk)
9202
        info = _GetInstanceInfoText(instance)
9203

    
9204
        logging.info("Creating volume %s for instance %s",
9205
                     new_disk.iv_name, instance.name)
9206
        # Note: this needs to be kept in sync with _CreateDisks
9207
        #HARDCODE
9208
        for node in instance.all_nodes:
9209
          f_create = node == instance.primary_node
9210
          try:
9211
            _CreateBlockDev(self, node, instance, new_disk,
9212
                            f_create, info, f_create)
9213
          except errors.OpExecError, err:
9214
            self.LogWarning("Failed to create volume %s (%s) on"
9215
                            " node %s: %s",
9216
                            new_disk.iv_name, new_disk, node, err)
9217
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9218
                       (new_disk.size, new_disk.mode)))
9219
      else:
9220
        # change a given disk
9221
        instance.disks[disk_op].mode = disk_dict['mode']
9222
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9223

    
9224
    if self.op.disk_template:
9225
      r_shut = _ShutdownInstanceDisks(self, instance)
9226
      if not r_shut:
9227
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9228
                                 " proceed with disk template conversion")
9229
      mode = (instance.disk_template, self.op.disk_template)
9230
      try:
9231
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9232
      except:
9233
        self.cfg.ReleaseDRBDMinors(instance.name)
9234
        raise
9235
      result.append(("disk_template", self.op.disk_template))
9236

    
9237
    # NIC changes
9238
    for nic_op, nic_dict in self.op.nics:
9239
      if nic_op == constants.DDM_REMOVE:
9240
        # remove the last nic
9241
        del instance.nics[-1]
9242
        result.append(("nic.%d" % len(instance.nics), "remove"))
9243
      elif nic_op == constants.DDM_ADD:
9244
        # mac and bridge should be set, by now
9245
        mac = nic_dict['mac']
9246
        ip = nic_dict.get('ip', None)
9247
        nicparams = self.nic_pinst[constants.DDM_ADD]
9248
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9249
        instance.nics.append(new_nic)
9250
        result.append(("nic.%d" % (len(instance.nics) - 1),
9251
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9252
                       (new_nic.mac, new_nic.ip,
9253
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9254
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9255
                       )))
9256
      else:
9257
        for key in 'mac', 'ip':
9258
          if key in nic_dict:
9259
            setattr(instance.nics[nic_op], key, nic_dict[key])
9260
        if nic_op in self.nic_pinst:
9261
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9262
        for key, val in nic_dict.iteritems():
9263
          result.append(("nic.%s/%d" % (key, nic_op), val))
9264

    
9265
    # hvparams changes
9266
    if self.op.hvparams:
9267
      instance.hvparams = self.hv_inst
9268
      for key, val in self.op.hvparams.iteritems():
9269
        result.append(("hv/%s" % key, val))
9270

    
9271
    # beparams changes
9272
    if self.op.beparams:
9273
      instance.beparams = self.be_inst
9274
      for key, val in self.op.beparams.iteritems():
9275
        result.append(("be/%s" % key, val))
9276

    
9277
    # OS change
9278
    if self.op.os_name:
9279
      instance.os = self.op.os_name
9280

    
9281
    # osparams changes
9282
    if self.op.osparams:
9283
      instance.osparams = self.os_inst
9284
      for key, val in self.op.osparams.iteritems():
9285
        result.append(("os/%s" % key, val))
9286

    
9287
    self.cfg.Update(instance, feedback_fn)
9288

    
9289
    return result
9290

    
9291
  _DISK_CONVERSIONS = {
9292
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9293
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9294
    }
9295

    
9296

    
9297
class LUQueryExports(NoHooksLU):
9298
  """Query the exports list
9299

9300
  """
9301
  _OP_PARAMS = [
9302
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9303
    ("use_locking", False, ht.TBool),
9304
    ]
9305
  REQ_BGL = False
9306

    
9307
  def ExpandNames(self):
9308
    self.needed_locks = {}
9309
    self.share_locks[locking.LEVEL_NODE] = 1
9310
    if not self.op.nodes:
9311
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9312
    else:
9313
      self.needed_locks[locking.LEVEL_NODE] = \
9314
        _GetWantedNodes(self, self.op.nodes)
9315

    
9316
  def Exec(self, feedback_fn):
9317
    """Compute the list of all the exported system images.
9318

9319
    @rtype: dict
9320
    @return: a dictionary with the structure node->(export-list)
9321
        where export-list is a list of the instances exported on
9322
        that node.
9323

9324
    """
9325
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9326
    rpcresult = self.rpc.call_export_list(self.nodes)
9327
    result = {}
9328
    for node in rpcresult:
9329
      if rpcresult[node].fail_msg:
9330
        result[node] = False
9331
      else:
9332
        result[node] = rpcresult[node].payload
9333

    
9334
    return result
9335

    
9336

    
9337
class LUPrepareExport(NoHooksLU):
9338
  """Prepares an instance for an export and returns useful information.
9339

9340
  """
9341
  _OP_PARAMS = [
9342
    _PInstanceName,
9343
    ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9344
    ]
9345
  REQ_BGL = False
9346

    
9347
  def ExpandNames(self):
9348
    self._ExpandAndLockInstance()
9349

    
9350
  def CheckPrereq(self):
9351
    """Check prerequisites.
9352

9353
    """
9354
    instance_name = self.op.instance_name
9355

    
9356
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9357
    assert self.instance is not None, \
9358
          "Cannot retrieve locked instance %s" % self.op.instance_name
9359
    _CheckNodeOnline(self, self.instance.primary_node)
9360

    
9361
    self._cds = _GetClusterDomainSecret()
9362

    
9363
  def Exec(self, feedback_fn):
9364
    """Prepares an instance for an export.
9365

9366
    """
9367
    instance = self.instance
9368

    
9369
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9370
      salt = utils.GenerateSecret(8)
9371

    
9372
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9373
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9374
                                              constants.RIE_CERT_VALIDITY)
9375
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9376

    
9377
      (name, cert_pem) = result.payload
9378

    
9379
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9380
                                             cert_pem)
9381

    
9382
      return {
9383
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9384
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9385
                          salt),
9386
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9387
        }
9388

    
9389
    return None
9390

    
9391

    
9392
class LUExportInstance(LogicalUnit):
9393
  """Export an instance to an image in the cluster.
9394

9395
  """
9396
  HPATH = "instance-export"
9397
  HTYPE = constants.HTYPE_INSTANCE
9398
  _OP_PARAMS = [
9399
    _PInstanceName,
9400
    ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9401
    ("shutdown", True, ht.TBool),
9402
    _PShutdownTimeout,
9403
    ("remove_instance", False, ht.TBool),
9404
    ("ignore_remove_failures", False, ht.TBool),
9405
    ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9406
    ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9407
    ("destination_x509_ca", None, ht.TMaybeString),
9408
    ]
9409
  REQ_BGL = False
9410

    
9411
  def CheckArguments(self):
9412
    """Check the arguments.
9413

9414
    """
9415
    self.x509_key_name = self.op.x509_key_name
9416
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9417

    
9418
    if self.op.remove_instance and not self.op.shutdown:
9419
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9420
                                 " down before")
9421

    
9422
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9423
      if not self.x509_key_name:
9424
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9425
                                   errors.ECODE_INVAL)
9426

    
9427
      if not self.dest_x509_ca_pem:
9428
        raise errors.OpPrereqError("Missing destination X509 CA",
9429
                                   errors.ECODE_INVAL)
9430

    
9431
  def ExpandNames(self):
9432
    self._ExpandAndLockInstance()
9433

    
9434
    # Lock all nodes for local exports
9435
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9436
      # FIXME: lock only instance primary and destination node
9437
      #
9438
      # Sad but true, for now we have do lock all nodes, as we don't know where
9439
      # the previous export might be, and in this LU we search for it and
9440
      # remove it from its current node. In the future we could fix this by:
9441
      #  - making a tasklet to search (share-lock all), then create the
9442
      #    new one, then one to remove, after
9443
      #  - removing the removal operation altogether
9444
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9445

    
9446
  def DeclareLocks(self, level):
9447
    """Last minute lock declaration."""
9448
    # All nodes are locked anyway, so nothing to do here.
9449

    
9450
  def BuildHooksEnv(self):
9451
    """Build hooks env.
9452

9453
    This will run on the master, primary node and target node.
9454

9455
    """
9456
    env = {
9457
      "EXPORT_MODE": self.op.mode,
9458
      "EXPORT_NODE": self.op.target_node,
9459
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9460
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9461
      # TODO: Generic function for boolean env variables
9462
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9463
      }
9464

    
9465
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9466

    
9467
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9468

    
9469
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9470
      nl.append(self.op.target_node)
9471

    
9472
    return env, nl, nl
9473

    
9474
  def CheckPrereq(self):
9475
    """Check prerequisites.
9476

9477
    This checks that the instance and node names are valid.
9478

9479
    """
9480
    instance_name = self.op.instance_name
9481

    
9482
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9483
    assert self.instance is not None, \
9484
          "Cannot retrieve locked instance %s" % self.op.instance_name
9485
    _CheckNodeOnline(self, self.instance.primary_node)
9486

    
9487
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9488
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9489
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9490
      assert self.dst_node is not None
9491

    
9492
      _CheckNodeOnline(self, self.dst_node.name)
9493
      _CheckNodeNotDrained(self, self.dst_node.name)
9494

    
9495
      self._cds = None
9496
      self.dest_disk_info = None
9497
      self.dest_x509_ca = None
9498

    
9499
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9500
      self.dst_node = None
9501

    
9502
      if len(self.op.target_node) != len(self.instance.disks):
9503
        raise errors.OpPrereqError(("Received destination information for %s"
9504
                                    " disks, but instance %s has %s disks") %
9505
                                   (len(self.op.target_node), instance_name,
9506
                                    len(self.instance.disks)),
9507
                                   errors.ECODE_INVAL)
9508

    
9509
      cds = _GetClusterDomainSecret()
9510

    
9511
      # Check X509 key name
9512
      try:
9513
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9514
      except (TypeError, ValueError), err:
9515
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9516

    
9517
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9518
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9519
                                   errors.ECODE_INVAL)
9520

    
9521
      # Load and verify CA
9522
      try:
9523
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9524
      except OpenSSL.crypto.Error, err:
9525
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9526
                                   (err, ), errors.ECODE_INVAL)
9527

    
9528
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9529
      if errcode is not None:
9530
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9531
                                   (msg, ), errors.ECODE_INVAL)
9532

    
9533
      self.dest_x509_ca = cert
9534

    
9535
      # Verify target information
9536
      disk_info = []
9537
      for idx, disk_data in enumerate(self.op.target_node):
9538
        try:
9539
          (host, port, magic) = \
9540
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9541
        except errors.GenericError, err:
9542
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9543
                                     (idx, err), errors.ECODE_INVAL)
9544

    
9545
        disk_info.append((host, port, magic))
9546

    
9547
      assert len(disk_info) == len(self.op.target_node)
9548
      self.dest_disk_info = disk_info
9549

    
9550
    else:
9551
      raise errors.ProgrammerError("Unhandled export mode %r" %
9552
                                   self.op.mode)
9553

    
9554
    # instance disk type verification
9555
    # TODO: Implement export support for file-based disks
9556
    for disk in self.instance.disks:
9557
      if disk.dev_type == constants.LD_FILE:
9558
        raise errors.OpPrereqError("Export not supported for instances with"
9559
                                   " file-based disks", errors.ECODE_INVAL)
9560

    
9561
  def _CleanupExports(self, feedback_fn):
9562
    """Removes exports of current instance from all other nodes.
9563

9564
    If an instance in a cluster with nodes A..D was exported to node C, its
9565
    exports will be removed from the nodes A, B and D.
9566

9567
    """
9568
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9569

    
9570
    nodelist = self.cfg.GetNodeList()
9571
    nodelist.remove(self.dst_node.name)
9572

    
9573
    # on one-node clusters nodelist will be empty after the removal
9574
    # if we proceed the backup would be removed because OpQueryExports
9575
    # substitutes an empty list with the full cluster node list.
9576
    iname = self.instance.name
9577
    if nodelist:
9578
      feedback_fn("Removing old exports for instance %s" % iname)
9579
      exportlist = self.rpc.call_export_list(nodelist)
9580
      for node in exportlist:
9581
        if exportlist[node].fail_msg:
9582
          continue
9583
        if iname in exportlist[node].payload:
9584
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9585
          if msg:
9586
            self.LogWarning("Could not remove older export for instance %s"
9587
                            " on node %s: %s", iname, node, msg)
9588

    
9589
  def Exec(self, feedback_fn):
9590
    """Export an instance to an image in the cluster.
9591

9592
    """
9593
    assert self.op.mode in constants.EXPORT_MODES
9594

    
9595
    instance = self.instance
9596
    src_node = instance.primary_node
9597

    
9598
    if self.op.shutdown:
9599
      # shutdown the instance, but not the disks
9600
      feedback_fn("Shutting down instance %s" % instance.name)
9601
      result = self.rpc.call_instance_shutdown(src_node, instance,
9602
                                               self.op.shutdown_timeout)
9603
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9604
      result.Raise("Could not shutdown instance %s on"
9605
                   " node %s" % (instance.name, src_node))
9606

    
9607
    # set the disks ID correctly since call_instance_start needs the
9608
    # correct drbd minor to create the symlinks
9609
    for disk in instance.disks:
9610
      self.cfg.SetDiskID(disk, src_node)
9611

    
9612
    activate_disks = (not instance.admin_up)
9613

    
9614
    if activate_disks:
9615
      # Activate the instance disks if we'exporting a stopped instance
9616
      feedback_fn("Activating disks for %s" % instance.name)
9617
      _StartInstanceDisks(self, instance, None)
9618

    
9619
    try:
9620
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9621
                                                     instance)
9622

    
9623
      helper.CreateSnapshots()
9624
      try:
9625
        if (self.op.shutdown and instance.admin_up and
9626
            not self.op.remove_instance):
9627
          assert not activate_disks
9628
          feedback_fn("Starting instance %s" % instance.name)
9629
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9630
          msg = result.fail_msg
9631
          if msg:
9632
            feedback_fn("Failed to start instance: %s" % msg)
9633
            _ShutdownInstanceDisks(self, instance)
9634
            raise errors.OpExecError("Could not start instance: %s" % msg)
9635

    
9636
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9637
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9638
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9639
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9640
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9641

    
9642
          (key_name, _, _) = self.x509_key_name
9643

    
9644
          dest_ca_pem = \
9645
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9646
                                            self.dest_x509_ca)
9647

    
9648
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9649
                                                     key_name, dest_ca_pem,
9650
                                                     timeouts)
9651
      finally:
9652
        helper.Cleanup()
9653

    
9654
      # Check for backwards compatibility
9655
      assert len(dresults) == len(instance.disks)
9656
      assert compat.all(isinstance(i, bool) for i in dresults), \
9657
             "Not all results are boolean: %r" % dresults
9658

    
9659
    finally:
9660
      if activate_disks:
9661
        feedback_fn("Deactivating disks for %s" % instance.name)
9662
        _ShutdownInstanceDisks(self, instance)
9663

    
9664
    if not (compat.all(dresults) and fin_resu):
9665
      failures = []
9666
      if not fin_resu:
9667
        failures.append("export finalization")
9668
      if not compat.all(dresults):
9669
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9670
                               if not dsk)
9671
        failures.append("disk export: disk(s) %s" % fdsk)
9672

    
9673
      raise errors.OpExecError("Export failed, errors in %s" %
9674
                               utils.CommaJoin(failures))
9675

    
9676
    # At this point, the export was successful, we can cleanup/finish
9677

    
9678
    # Remove instance if requested
9679
    if self.op.remove_instance:
9680
      feedback_fn("Removing instance %s" % instance.name)
9681
      _RemoveInstance(self, feedback_fn, instance,
9682
                      self.op.ignore_remove_failures)
9683

    
9684
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9685
      self._CleanupExports(feedback_fn)
9686

    
9687
    return fin_resu, dresults
9688

    
9689

    
9690
class LURemoveExport(NoHooksLU):
9691
  """Remove exports related to the named instance.
9692

9693
  """
9694
  _OP_PARAMS = [
9695
    _PInstanceName,
9696
    ]
9697
  REQ_BGL = False
9698

    
9699
  def ExpandNames(self):
9700
    self.needed_locks = {}
9701
    # We need all nodes to be locked in order for RemoveExport to work, but we
9702
    # don't need to lock the instance itself, as nothing will happen to it (and
9703
    # we can remove exports also for a removed instance)
9704
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9705

    
9706
  def Exec(self, feedback_fn):
9707
    """Remove any export.
9708

9709
    """
9710
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9711
    # If the instance was not found we'll try with the name that was passed in.
9712
    # This will only work if it was an FQDN, though.
9713
    fqdn_warn = False
9714
    if not instance_name:
9715
      fqdn_warn = True
9716
      instance_name = self.op.instance_name
9717

    
9718
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9719
    exportlist = self.rpc.call_export_list(locked_nodes)
9720
    found = False
9721
    for node in exportlist:
9722
      msg = exportlist[node].fail_msg
9723
      if msg:
9724
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9725
        continue
9726
      if instance_name in exportlist[node].payload:
9727
        found = True
9728
        result = self.rpc.call_export_remove(node, instance_name)
9729
        msg = result.fail_msg
9730
        if msg:
9731
          logging.error("Could not remove export for instance %s"
9732
                        " on node %s: %s", instance_name, node, msg)
9733

    
9734
    if fqdn_warn and not found:
9735
      feedback_fn("Export not found. If trying to remove an export belonging"
9736
                  " to a deleted instance please use its Fully Qualified"
9737
                  " Domain Name.")
9738

    
9739

    
9740
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9741
  """Generic tags LU.
9742

9743
  This is an abstract class which is the parent of all the other tags LUs.
9744

9745
  """
9746

    
9747
  def ExpandNames(self):
9748
    self.needed_locks = {}
9749
    if self.op.kind == constants.TAG_NODE:
9750
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9751
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9752
    elif self.op.kind == constants.TAG_INSTANCE:
9753
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9754
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9755

    
9756
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
9757
    # not possible to acquire the BGL based on opcode parameters)
9758

    
9759
  def CheckPrereq(self):
9760
    """Check prerequisites.
9761

9762
    """
9763
    if self.op.kind == constants.TAG_CLUSTER:
9764
      self.target = self.cfg.GetClusterInfo()
9765
    elif self.op.kind == constants.TAG_NODE:
9766
      self.target = self.cfg.GetNodeInfo(self.op.name)
9767
    elif self.op.kind == constants.TAG_INSTANCE:
9768
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9769
    else:
9770
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9771
                                 str(self.op.kind), errors.ECODE_INVAL)
9772

    
9773

    
9774
class LUGetTags(TagsLU):
9775
  """Returns the tags of a given object.
9776

9777
  """
9778
  _OP_PARAMS = [
9779
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
9780
    # Name is only meaningful for nodes and instances
9781
    ("name", ht.NoDefault, ht.TMaybeString),
9782
    ]
9783
  REQ_BGL = False
9784

    
9785
  def ExpandNames(self):
9786
    TagsLU.ExpandNames(self)
9787

    
9788
    # Share locks as this is only a read operation
9789
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9790

    
9791
  def Exec(self, feedback_fn):
9792
    """Returns the tag list.
9793

9794
    """
9795
    return list(self.target.GetTags())
9796

    
9797

    
9798
class LUSearchTags(NoHooksLU):
9799
  """Searches the tags for a given pattern.
9800

9801
  """
9802
  _OP_PARAMS = [
9803
    ("pattern", ht.NoDefault, ht.TNonEmptyString),
9804
    ]
9805
  REQ_BGL = False
9806

    
9807
  def ExpandNames(self):
9808
    self.needed_locks = {}
9809

    
9810
  def CheckPrereq(self):
9811
    """Check prerequisites.
9812

9813
    This checks the pattern passed for validity by compiling it.
9814

9815
    """
9816
    try:
9817
      self.re = re.compile(self.op.pattern)
9818
    except re.error, err:
9819
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9820
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9821

    
9822
  def Exec(self, feedback_fn):
9823
    """Returns the tag list.
9824

9825
    """
9826
    cfg = self.cfg
9827
    tgts = [("/cluster", cfg.GetClusterInfo())]
9828
    ilist = cfg.GetAllInstancesInfo().values()
9829
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9830
    nlist = cfg.GetAllNodesInfo().values()
9831
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9832
    results = []
9833
    for path, target in tgts:
9834
      for tag in target.GetTags():
9835
        if self.re.search(tag):
9836
          results.append((path, tag))
9837
    return results
9838

    
9839

    
9840
class LUAddTags(TagsLU):
9841
  """Sets a tag on a given object.
9842

9843
  """
9844
  _OP_PARAMS = [
9845
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
9846
    # Name is only meaningful for nodes and instances
9847
    ("name", ht.NoDefault, ht.TMaybeString),
9848
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
9849
    ]
9850
  REQ_BGL = False
9851

    
9852
  def CheckPrereq(self):
9853
    """Check prerequisites.
9854

9855
    This checks the type and length of the tag name and value.
9856

9857
    """
9858
    TagsLU.CheckPrereq(self)
9859
    for tag in self.op.tags:
9860
      objects.TaggableObject.ValidateTag(tag)
9861

    
9862
  def Exec(self, feedback_fn):
9863
    """Sets the tag.
9864

9865
    """
9866
    try:
9867
      for tag in self.op.tags:
9868
        self.target.AddTag(tag)
9869
    except errors.TagError, err:
9870
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9871
    self.cfg.Update(self.target, feedback_fn)
9872

    
9873

    
9874
class LUDelTags(TagsLU):
9875
  """Delete a list of tags from a given object.
9876

9877
  """
9878
  _OP_PARAMS = [
9879
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
9880
    # Name is only meaningful for nodes and instances
9881
    ("name", ht.NoDefault, ht.TMaybeString),
9882
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
9883
    ]
9884
  REQ_BGL = False
9885

    
9886
  def CheckPrereq(self):
9887
    """Check prerequisites.
9888

9889
    This checks that we have the given tag.
9890

9891
    """
9892
    TagsLU.CheckPrereq(self)
9893
    for tag in self.op.tags:
9894
      objects.TaggableObject.ValidateTag(tag)
9895
    del_tags = frozenset(self.op.tags)
9896
    cur_tags = self.target.GetTags()
9897

    
9898
    diff_tags = del_tags - cur_tags
9899
    if diff_tags:
9900
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
9901
      raise errors.OpPrereqError("Tag(s) %s not found" %
9902
                                 (utils.CommaJoin(diff_names), ),
9903
                                 errors.ECODE_NOENT)
9904

    
9905
  def Exec(self, feedback_fn):
9906
    """Remove the tag from the object.
9907

9908
    """
9909
    for tag in self.op.tags:
9910
      self.target.RemoveTag(tag)
9911
    self.cfg.Update(self.target, feedback_fn)
9912

    
9913

    
9914
class LUTestDelay(NoHooksLU):
9915
  """Sleep for a specified amount of time.
9916

9917
  This LU sleeps on the master and/or nodes for a specified amount of
9918
  time.
9919

9920
  """
9921
  _OP_PARAMS = [
9922
    ("duration", ht.NoDefault, ht.TFloat),
9923
    ("on_master", True, ht.TBool),
9924
    ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9925
    ("repeat", 0, ht.TPositiveInt)
9926
    ]
9927
  REQ_BGL = False
9928

    
9929
  def ExpandNames(self):
9930
    """Expand names and set required locks.
9931

9932
    This expands the node list, if any.
9933

9934
    """
9935
    self.needed_locks = {}
9936
    if self.op.on_nodes:
9937
      # _GetWantedNodes can be used here, but is not always appropriate to use
9938
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9939
      # more information.
9940
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9941
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9942

    
9943
  def _TestDelay(self):
9944
    """Do the actual sleep.
9945

9946
    """
9947
    if self.op.on_master:
9948
      if not utils.TestDelay(self.op.duration):
9949
        raise errors.OpExecError("Error during master delay test")
9950
    if self.op.on_nodes:
9951
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9952
      for node, node_result in result.items():
9953
        node_result.Raise("Failure during rpc call to node %s" % node)
9954

    
9955
  def Exec(self, feedback_fn):
9956
    """Execute the test delay opcode, with the wanted repetitions.
9957

9958
    """
9959
    if self.op.repeat == 0:
9960
      self._TestDelay()
9961
    else:
9962
      top_value = self.op.repeat - 1
9963
      for i in range(self.op.repeat):
9964
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9965
        self._TestDelay()
9966

    
9967

    
9968
class LUTestJobqueue(NoHooksLU):
9969
  """Utility LU to test some aspects of the job queue.
9970

9971
  """
9972
  _OP_PARAMS = [
9973
    ("notify_waitlock", False, ht.TBool),
9974
    ("notify_exec", False, ht.TBool),
9975
    ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
9976
    ("fail", False, ht.TBool),
9977
    ]
9978
  REQ_BGL = False
9979

    
9980
  # Must be lower than default timeout for WaitForJobChange to see whether it
9981
  # notices changed jobs
9982
  _CLIENT_CONNECT_TIMEOUT = 20.0
9983
  _CLIENT_CONFIRM_TIMEOUT = 60.0
9984

    
9985
  @classmethod
9986
  def _NotifyUsingSocket(cls, cb, errcls):
9987
    """Opens a Unix socket and waits for another program to connect.
9988

9989
    @type cb: callable
9990
    @param cb: Callback to send socket name to client
9991
    @type errcls: class
9992
    @param errcls: Exception class to use for errors
9993

9994
    """
9995
    # Using a temporary directory as there's no easy way to create temporary
9996
    # sockets without writing a custom loop around tempfile.mktemp and
9997
    # socket.bind
9998
    tmpdir = tempfile.mkdtemp()
9999
    try:
10000
      tmpsock = utils.PathJoin(tmpdir, "sock")
10001

    
10002
      logging.debug("Creating temporary socket at %s", tmpsock)
10003
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10004
      try:
10005
        sock.bind(tmpsock)
10006
        sock.listen(1)
10007

    
10008
        # Send details to client
10009
        cb(tmpsock)
10010

    
10011
        # Wait for client to connect before continuing
10012
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10013
        try:
10014
          (conn, _) = sock.accept()
10015
        except socket.error, err:
10016
          raise errcls("Client didn't connect in time (%s)" % err)
10017
      finally:
10018
        sock.close()
10019
    finally:
10020
      # Remove as soon as client is connected
10021
      shutil.rmtree(tmpdir)
10022

    
10023
    # Wait for client to close
10024
    try:
10025
      try:
10026
        # pylint: disable-msg=E1101
10027
        # Instance of '_socketobject' has no ... member
10028
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10029
        conn.recv(1)
10030
      except socket.error, err:
10031
        raise errcls("Client failed to confirm notification (%s)" % err)
10032
    finally:
10033
      conn.close()
10034

    
10035
  def _SendNotification(self, test, arg, sockname):
10036
    """Sends a notification to the client.
10037

10038
    @type test: string
10039
    @param test: Test name
10040
    @param arg: Test argument (depends on test)
10041
    @type sockname: string
10042
    @param sockname: Socket path
10043

10044
    """
10045
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10046

    
10047
  def _Notify(self, prereq, test, arg):
10048
    """Notifies the client of a test.
10049

10050
    @type prereq: bool
10051
    @param prereq: Whether this is a prereq-phase test
10052
    @type test: string
10053
    @param test: Test name
10054
    @param arg: Test argument (depends on test)
10055

10056
    """
10057
    if prereq:
10058
      errcls = errors.OpPrereqError
10059
    else:
10060
      errcls = errors.OpExecError
10061

    
10062
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10063
                                                  test, arg),
10064
                                   errcls)
10065

    
10066
  def CheckArguments(self):
10067
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10068
    self.expandnames_calls = 0
10069

    
10070
  def ExpandNames(self):
10071
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10072
    if checkargs_calls < 1:
10073
      raise errors.ProgrammerError("CheckArguments was not called")
10074

    
10075
    self.expandnames_calls += 1
10076

    
10077
    if self.op.notify_waitlock:
10078
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10079

    
10080
    self.LogInfo("Expanding names")
10081

    
10082
    # Get lock on master node (just to get a lock, not for a particular reason)
10083
    self.needed_locks = {
10084
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10085
      }
10086

    
10087
  def Exec(self, feedback_fn):
10088
    if self.expandnames_calls < 1:
10089
      raise errors.ProgrammerError("ExpandNames was not called")
10090

    
10091
    if self.op.notify_exec:
10092
      self._Notify(False, constants.JQT_EXEC, None)
10093

    
10094
    self.LogInfo("Executing")
10095

    
10096
    if self.op.log_messages:
10097
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10098
      for idx, msg in enumerate(self.op.log_messages):
10099
        self.LogInfo("Sending log message %s", idx + 1)
10100
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10101
        # Report how many test messages have been sent
10102
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10103

    
10104
    if self.op.fail:
10105
      raise errors.OpExecError("Opcode failure was requested")
10106

    
10107
    return True
10108

    
10109

    
10110
class IAllocator(object):
10111
  """IAllocator framework.
10112

10113
  An IAllocator instance has three sets of attributes:
10114
    - cfg that is needed to query the cluster
10115
    - input data (all members of the _KEYS class attribute are required)
10116
    - four buffer attributes (in|out_data|text), that represent the
10117
      input (to the external script) in text and data structure format,
10118
      and the output from it, again in two formats
10119
    - the result variables from the script (success, info, nodes) for
10120
      easy usage
10121

10122
  """
10123
  # pylint: disable-msg=R0902
10124
  # lots of instance attributes
10125
  _ALLO_KEYS = [
10126
    "name", "mem_size", "disks", "disk_template",
10127
    "os", "tags", "nics", "vcpus", "hypervisor",
10128
    ]
10129
  _RELO_KEYS = [
10130
    "name", "relocate_from",
10131
    ]
10132
  _EVAC_KEYS = [
10133
    "evac_nodes",
10134
    ]
10135

    
10136
  def __init__(self, cfg, rpc, mode, **kwargs):
10137
    self.cfg = cfg
10138
    self.rpc = rpc
10139
    # init buffer variables
10140
    self.in_text = self.out_text = self.in_data = self.out_data = None
10141
    # init all input fields so that pylint is happy
10142
    self.mode = mode
10143
    self.mem_size = self.disks = self.disk_template = None
10144
    self.os = self.tags = self.nics = self.vcpus = None
10145
    self.hypervisor = None
10146
    self.relocate_from = None
10147
    self.name = None
10148
    self.evac_nodes = None
10149
    # computed fields
10150
    self.required_nodes = None
10151
    # init result fields
10152
    self.success = self.info = self.result = None
10153
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10154
      keyset = self._ALLO_KEYS
10155
      fn = self._AddNewInstance
10156
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10157
      keyset = self._RELO_KEYS
10158
      fn = self._AddRelocateInstance
10159
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10160
      keyset = self._EVAC_KEYS
10161
      fn = self._AddEvacuateNodes
10162
    else:
10163
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10164
                                   " IAllocator" % self.mode)
10165
    for key in kwargs:
10166
      if key not in keyset:
10167
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
10168
                                     " IAllocator" % key)
10169
      setattr(self, key, kwargs[key])
10170

    
10171
    for key in keyset:
10172
      if key not in kwargs:
10173
        raise errors.ProgrammerError("Missing input parameter '%s' to"
10174
                                     " IAllocator" % key)
10175
    self._BuildInputData(fn)
10176

    
10177
  def _ComputeClusterData(self):
10178
    """Compute the generic allocator input data.
10179

10180
    This is the data that is independent of the actual operation.
10181

10182
    """
10183
    cfg = self.cfg
10184
    cluster_info = cfg.GetClusterInfo()
10185
    # cluster data
10186
    data = {
10187
      "version": constants.IALLOCATOR_VERSION,
10188
      "cluster_name": cfg.GetClusterName(),
10189
      "cluster_tags": list(cluster_info.GetTags()),
10190
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10191
      # we don't have job IDs
10192
      }
10193
    iinfo = cfg.GetAllInstancesInfo().values()
10194
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10195

    
10196
    # node data
10197
    node_list = cfg.GetNodeList()
10198

    
10199
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10200
      hypervisor_name = self.hypervisor
10201
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10202
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10203
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10204
      hypervisor_name = cluster_info.enabled_hypervisors[0]
10205

    
10206
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10207
                                        hypervisor_name)
10208
    node_iinfo = \
10209
      self.rpc.call_all_instances_info(node_list,
10210
                                       cluster_info.enabled_hypervisors)
10211

    
10212
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10213

    
10214
    data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
10215

    
10216
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10217

    
10218
    self.in_data = data
10219

    
10220
  @staticmethod
10221
  def _ComputeNodeGroupData(cfg):
10222
    """Compute node groups data.
10223

10224
    """
10225
    ng = {}
10226
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10227
      ng[guuid] = { "name": gdata.name }
10228
    return ng
10229

    
10230
  @staticmethod
10231
  def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
10232
    """Compute global node data.
10233

10234
    """
10235
    node_results = {}
10236
    for nname, nresult in node_data.items():
10237
      # first fill in static (config-based) values
10238
      ninfo = cfg.GetNodeInfo(nname)
10239
      pnr = {
10240
        "tags": list(ninfo.GetTags()),
10241
        "primary_ip": ninfo.primary_ip,
10242
        "secondary_ip": ninfo.secondary_ip,
10243
        "offline": ninfo.offline,
10244
        "drained": ninfo.drained,
10245
        "master_candidate": ninfo.master_candidate,
10246
        "group": ninfo.group,
10247
        }
10248

    
10249
      if not (ninfo.offline or ninfo.drained):
10250
        nresult.Raise("Can't get data for node %s" % nname)
10251
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10252
                                nname)
10253
        remote_info = nresult.payload
10254

    
10255
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
10256
                     'vg_size', 'vg_free', 'cpu_total']:
10257
          if attr not in remote_info:
10258
            raise errors.OpExecError("Node '%s' didn't return attribute"
10259
                                     " '%s'" % (nname, attr))
10260
          if not isinstance(remote_info[attr], int):
10261
            raise errors.OpExecError("Node '%s' returned invalid value"
10262
                                     " for '%s': %s" %
10263
                                     (nname, attr, remote_info[attr]))
10264
        # compute memory used by primary instances
10265
        i_p_mem = i_p_up_mem = 0
10266
        for iinfo, beinfo in i_list:
10267
          if iinfo.primary_node == nname:
10268
            i_p_mem += beinfo[constants.BE_MEMORY]
10269
            if iinfo.name not in node_iinfo[nname].payload:
10270
              i_used_mem = 0
10271
            else:
10272
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10273
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10274
            remote_info['memory_free'] -= max(0, i_mem_diff)
10275

    
10276
            if iinfo.admin_up:
10277
              i_p_up_mem += beinfo[constants.BE_MEMORY]
10278

    
10279
        # compute memory used by instances
10280
        pnr_dyn = {
10281
          "total_memory": remote_info['memory_total'],
10282
          "reserved_memory": remote_info['memory_dom0'],
10283
          "free_memory": remote_info['memory_free'],
10284
          "total_disk": remote_info['vg_size'],
10285
          "free_disk": remote_info['vg_free'],
10286
          "total_cpus": remote_info['cpu_total'],
10287
          "i_pri_memory": i_p_mem,
10288
          "i_pri_up_memory": i_p_up_mem,
10289
          }
10290
        pnr.update(pnr_dyn)
10291

    
10292
      node_results[nname] = pnr
10293

    
10294
    return node_results
10295

    
10296
  @staticmethod
10297
  def _ComputeInstanceData(cluster_info, i_list):
10298
    """Compute global instance data.
10299

10300
    """
10301
    instance_data = {}
10302
    for iinfo, beinfo in i_list:
10303
      nic_data = []
10304
      for nic in iinfo.nics:
10305
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10306
        nic_dict = {"mac": nic.mac,
10307
                    "ip": nic.ip,
10308
                    "mode": filled_params[constants.NIC_MODE],
10309
                    "link": filled_params[constants.NIC_LINK],
10310
                   }
10311
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10312
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10313
        nic_data.append(nic_dict)
10314
      pir = {
10315
        "tags": list(iinfo.GetTags()),
10316
        "admin_up": iinfo.admin_up,
10317
        "vcpus": beinfo[constants.BE_VCPUS],
10318
        "memory": beinfo[constants.BE_MEMORY],
10319
        "os": iinfo.os,
10320
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10321
        "nics": nic_data,
10322
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10323
        "disk_template": iinfo.disk_template,
10324
        "hypervisor": iinfo.hypervisor,
10325
        }
10326
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10327
                                                 pir["disks"])
10328
      instance_data[iinfo.name] = pir
10329

    
10330
    return instance_data
10331

    
10332
  def _AddNewInstance(self):
10333
    """Add new instance data to allocator structure.
10334

10335
    This in combination with _AllocatorGetClusterData will create the
10336
    correct structure needed as input for the allocator.
10337

10338
    The checks for the completeness of the opcode must have already been
10339
    done.
10340

10341
    """
10342
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10343

    
10344
    if self.disk_template in constants.DTS_NET_MIRROR:
10345
      self.required_nodes = 2
10346
    else:
10347
      self.required_nodes = 1
10348
    request = {
10349
      "name": self.name,
10350
      "disk_template": self.disk_template,
10351
      "tags": self.tags,
10352
      "os": self.os,
10353
      "vcpus": self.vcpus,
10354
      "memory": self.mem_size,
10355
      "disks": self.disks,
10356
      "disk_space_total": disk_space,
10357
      "nics": self.nics,
10358
      "required_nodes": self.required_nodes,
10359
      }
10360
    return request
10361

    
10362
  def _AddRelocateInstance(self):
10363
    """Add relocate instance data to allocator structure.
10364

10365
    This in combination with _IAllocatorGetClusterData will create the
10366
    correct structure needed as input for the allocator.
10367

10368
    The checks for the completeness of the opcode must have already been
10369
    done.
10370

10371
    """
10372
    instance = self.cfg.GetInstanceInfo(self.name)
10373
    if instance is None:
10374
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
10375
                                   " IAllocator" % self.name)
10376

    
10377
    if instance.disk_template not in constants.DTS_NET_MIRROR:
10378
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10379
                                 errors.ECODE_INVAL)
10380

    
10381
    if len(instance.secondary_nodes) != 1:
10382
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
10383
                                 errors.ECODE_STATE)
10384

    
10385
    self.required_nodes = 1
10386
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
10387
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10388

    
10389
    request = {
10390
      "name": self.name,
10391
      "disk_space_total": disk_space,
10392
      "required_nodes": self.required_nodes,
10393
      "relocate_from": self.relocate_from,
10394
      }
10395
    return request
10396

    
10397
  def _AddEvacuateNodes(self):
10398
    """Add evacuate nodes data to allocator structure.
10399

10400
    """
10401
    request = {
10402
      "evac_nodes": self.evac_nodes
10403
      }
10404
    return request
10405

    
10406
  def _BuildInputData(self, fn):
10407
    """Build input data structures.
10408

10409
    """
10410
    self._ComputeClusterData()
10411

    
10412
    request = fn()
10413
    request["type"] = self.mode
10414
    self.in_data["request"] = request
10415

    
10416
    self.in_text = serializer.Dump(self.in_data)
10417

    
10418
  def Run(self, name, validate=True, call_fn=None):
10419
    """Run an instance allocator and return the results.
10420

10421
    """
10422
    if call_fn is None:
10423
      call_fn = self.rpc.call_iallocator_runner
10424

    
10425
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10426
    result.Raise("Failure while running the iallocator script")
10427

    
10428
    self.out_text = result.payload
10429
    if validate:
10430
      self._ValidateResult()
10431

    
10432
  def _ValidateResult(self):
10433
    """Process the allocator results.
10434

10435
    This will process and if successful save the result in
10436
    self.out_data and the other parameters.
10437

10438
    """
10439
    try:
10440
      rdict = serializer.Load(self.out_text)
10441
    except Exception, err:
10442
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10443

    
10444
    if not isinstance(rdict, dict):
10445
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10446

    
10447
    # TODO: remove backwards compatiblity in later versions
10448
    if "nodes" in rdict and "result" not in rdict:
10449
      rdict["result"] = rdict["nodes"]
10450
      del rdict["nodes"]
10451

    
10452
    for key in "success", "info", "result":
10453
      if key not in rdict:
10454
        raise errors.OpExecError("Can't parse iallocator results:"
10455
                                 " missing key '%s'" % key)
10456
      setattr(self, key, rdict[key])
10457

    
10458
    if not isinstance(rdict["result"], list):
10459
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10460
                               " is not a list")
10461
    self.out_data = rdict
10462

    
10463

    
10464
class LUTestAllocator(NoHooksLU):
10465
  """Run allocator tests.
10466

10467
  This LU runs the allocator tests
10468

10469
  """
10470
  _OP_PARAMS = [
10471
    ("direction", ht.NoDefault,
10472
     ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10473
    ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
10474
    ("name", ht.NoDefault, ht.TNonEmptyString),
10475
    ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
10476
      ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
10477
               ht.TOr(ht.TNone, ht.TNonEmptyString))))),
10478
    ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
10479
    ("hypervisor", None, ht.TMaybeString),
10480
    ("allocator", None, ht.TMaybeString),
10481
    ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10482
    ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10483
    ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10484
    ("os", None, ht.TMaybeString),
10485
    ("disk_template", None, ht.TMaybeString),
10486
    ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
10487
    ]
10488

    
10489
  def CheckPrereq(self):
10490
    """Check prerequisites.
10491

10492
    This checks the opcode parameters depending on the director and mode test.
10493

10494
    """
10495
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10496
      for attr in ["mem_size", "disks", "disk_template",
10497
                   "os", "tags", "nics", "vcpus"]:
10498
        if not hasattr(self.op, attr):
10499
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10500
                                     attr, errors.ECODE_INVAL)
10501
      iname = self.cfg.ExpandInstanceName(self.op.name)
10502
      if iname is not None:
10503
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10504
                                   iname, errors.ECODE_EXISTS)
10505
      if not isinstance(self.op.nics, list):
10506
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10507
                                   errors.ECODE_INVAL)
10508
      if not isinstance(self.op.disks, list):
10509
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10510
                                   errors.ECODE_INVAL)
10511
      for row in self.op.disks:
10512
        if (not isinstance(row, dict) or
10513
            "size" not in row or
10514
            not isinstance(row["size"], int) or
10515
            "mode" not in row or
10516
            row["mode"] not in ['r', 'w']):
10517
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10518
                                     " parameter", errors.ECODE_INVAL)
10519
      if self.op.hypervisor is None:
10520
        self.op.hypervisor = self.cfg.GetHypervisorType()
10521
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10522
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10523
      self.op.name = fname
10524
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10525
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10526
      if not hasattr(self.op, "evac_nodes"):
10527
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10528
                                   " opcode input", errors.ECODE_INVAL)
10529
    else:
10530
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10531
                                 self.op.mode, errors.ECODE_INVAL)
10532

    
10533
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10534
      if self.op.allocator is None:
10535
        raise errors.OpPrereqError("Missing allocator name",
10536
                                   errors.ECODE_INVAL)
10537
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10538
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10539
                                 self.op.direction, errors.ECODE_INVAL)
10540

    
10541
  def Exec(self, feedback_fn):
10542
    """Run the allocator test.
10543

10544
    """
10545
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10546
      ial = IAllocator(self.cfg, self.rpc,
10547
                       mode=self.op.mode,
10548
                       name=self.op.name,
10549
                       mem_size=self.op.mem_size,
10550
                       disks=self.op.disks,
10551
                       disk_template=self.op.disk_template,
10552
                       os=self.op.os,
10553
                       tags=self.op.tags,
10554
                       nics=self.op.nics,
10555
                       vcpus=self.op.vcpus,
10556
                       hypervisor=self.op.hypervisor,
10557
                       )
10558
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10559
      ial = IAllocator(self.cfg, self.rpc,
10560
                       mode=self.op.mode,
10561
                       name=self.op.name,
10562
                       relocate_from=list(self.relocate_from),
10563
                       )
10564
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10565
      ial = IAllocator(self.cfg, self.rpc,
10566
                       mode=self.op.mode,
10567
                       evac_nodes=self.op.evac_nodes)
10568
    else:
10569
      raise errors.ProgrammerError("Uncatched mode %s in"
10570
                                   " LUTestAllocator.Exec", self.op.mode)
10571

    
10572
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10573
      result = ial.in_text
10574
    else:
10575
      ial.Run(self.op.allocator, validate=False)
10576
      result = ial.out_text
10577
    return result