Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ b8d26c6e

History | View | Annotate | Download (375.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42

    
43
from ganeti import ssh
44
from ganeti import utils
45
from ganeti import errors
46
from ganeti import hypervisor
47
from ganeti import locking
48
from ganeti import constants
49
from ganeti import objects
50
from ganeti import serializer
51
from ganeti import ssconf
52
from ganeti import uidpool
53
from ganeti import compat
54
from ganeti import masterd
55
from ganeti import netutils
56
from ganeti import ht
57

    
58
import ganeti.masterd.instance # pylint: disable-msg=W0611
59

    
60
# Common opcode attributes
61

    
62
#: output fields for a query operation
63
_POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
64

    
65

    
66
#: the shutdown timeout
67
_PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
68
                     ht.TPositiveInt)
69

    
70
#: the force parameter
71
_PForce = ("force", False, ht.TBool)
72

    
73
#: a required instance name (for single-instance LUs)
74
_PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
75

    
76
#: Whether to ignore offline nodes
77
_PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
78

    
79
#: a required node name (for single-node LUs)
80
_PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
81

    
82
#: the migration type (live/non-live)
83
_PMigrationMode = ("mode", None,
84
                   ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
85

    
86
#: the obsolete 'live' mode (boolean)
87
_PMigrationLive = ("live", None, ht.TMaybeBool)
88

    
89

    
90
# End types
91
class LogicalUnit(object):
92
  """Logical Unit base class.
93

94
  Subclasses must follow these rules:
95
    - implement ExpandNames
96
    - implement CheckPrereq (except when tasklets are used)
97
    - implement Exec (except when tasklets are used)
98
    - implement BuildHooksEnv
99
    - redefine HPATH and HTYPE
100
    - optionally redefine their run requirements:
101
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
102

103
  Note that all commands require root permissions.
104

105
  @ivar dry_run_result: the value (if any) that will be returned to the caller
106
      in dry-run mode (signalled by opcode dry_run parameter)
107
  @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
108
      they should get if not already defined, and types they must match
109

110
  """
111
  HPATH = None
112
  HTYPE = None
113
  _OP_PARAMS = []
114
  REQ_BGL = True
115

    
116
  def __init__(self, processor, op, context, rpc):
117
    """Constructor for LogicalUnit.
118

119
    This needs to be overridden in derived classes in order to check op
120
    validity.
121

122
    """
123
    self.proc = processor
124
    self.op = op
125
    self.cfg = context.cfg
126
    self.context = context
127
    self.rpc = rpc
128
    # Dicts used to declare locking needs to mcpu
129
    self.needed_locks = None
130
    self.acquired_locks = {}
131
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
132
    self.add_locks = {}
133
    self.remove_locks = {}
134
    # Used to force good behavior when calling helper functions
135
    self.recalculate_locks = {}
136
    self.__ssh = None
137
    # logging
138
    self.Log = processor.Log # pylint: disable-msg=C0103
139
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
140
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
141
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
142
    # support for dry-run
143
    self.dry_run_result = None
144
    # support for generic debug attribute
145
    if (not hasattr(self.op, "debug_level") or
146
        not isinstance(self.op.debug_level, int)):
147
      self.op.debug_level = 0
148

    
149
    # Tasklets
150
    self.tasklets = None
151

    
152
    # The new kind-of-type-system
153
    op_id = self.op.OP_ID
154
    for attr_name, aval, test in self._OP_PARAMS:
155
      if not hasattr(op, attr_name):
156
        if aval == ht.NoDefault:
157
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
158
                                     (op_id, attr_name), errors.ECODE_INVAL)
159
        else:
160
          if callable(aval):
161
            dval = aval()
162
          else:
163
            dval = aval
164
          setattr(self.op, attr_name, dval)
165
      attr_val = getattr(op, attr_name)
166
      if test == ht.NoType:
167
        # no tests here
168
        continue
169
      if not callable(test):
170
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
171
                                     " given type is not a proper type (%s)" %
172
                                     (op_id, attr_name, test))
173
      if not test(attr_val):
174
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
175
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
176
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
177
                                   (op_id, attr_name), errors.ECODE_INVAL)
178

    
179
    self.CheckArguments()
180

    
181
  def __GetSSH(self):
182
    """Returns the SshRunner object
183

184
    """
185
    if not self.__ssh:
186
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
187
    return self.__ssh
188

    
189
  ssh = property(fget=__GetSSH)
190

    
191
  def CheckArguments(self):
192
    """Check syntactic validity for the opcode arguments.
193

194
    This method is for doing a simple syntactic check and ensure
195
    validity of opcode parameters, without any cluster-related
196
    checks. While the same can be accomplished in ExpandNames and/or
197
    CheckPrereq, doing these separate is better because:
198

199
      - ExpandNames is left as as purely a lock-related function
200
      - CheckPrereq is run after we have acquired locks (and possible
201
        waited for them)
202

203
    The function is allowed to change the self.op attribute so that
204
    later methods can no longer worry about missing parameters.
205

206
    """
207
    pass
208

    
209
  def ExpandNames(self):
210
    """Expand names for this LU.
211

212
    This method is called before starting to execute the opcode, and it should
213
    update all the parameters of the opcode to their canonical form (e.g. a
214
    short node name must be fully expanded after this method has successfully
215
    completed). This way locking, hooks, logging, ecc. can work correctly.
216

217
    LUs which implement this method must also populate the self.needed_locks
218
    member, as a dict with lock levels as keys, and a list of needed lock names
219
    as values. Rules:
220

221
      - use an empty dict if you don't need any lock
222
      - if you don't need any lock at a particular level omit that level
223
      - don't put anything for the BGL level
224
      - if you want all locks at a level use locking.ALL_SET as a value
225

226
    If you need to share locks (rather than acquire them exclusively) at one
227
    level you can modify self.share_locks, setting a true value (usually 1) for
228
    that level. By default locks are not shared.
229

230
    This function can also define a list of tasklets, which then will be
231
    executed in order instead of the usual LU-level CheckPrereq and Exec
232
    functions, if those are not defined by the LU.
233

234
    Examples::
235

236
      # Acquire all nodes and one instance
237
      self.needed_locks = {
238
        locking.LEVEL_NODE: locking.ALL_SET,
239
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
240
      }
241
      # Acquire just two nodes
242
      self.needed_locks = {
243
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
244
      }
245
      # Acquire no locks
246
      self.needed_locks = {} # No, you can't leave it to the default value None
247

248
    """
249
    # The implementation of this method is mandatory only if the new LU is
250
    # concurrent, so that old LUs don't need to be changed all at the same
251
    # time.
252
    if self.REQ_BGL:
253
      self.needed_locks = {} # Exclusive LUs don't need locks.
254
    else:
255
      raise NotImplementedError
256

    
257
  def DeclareLocks(self, level):
258
    """Declare LU locking needs for a level
259

260
    While most LUs can just declare their locking needs at ExpandNames time,
261
    sometimes there's the need to calculate some locks after having acquired
262
    the ones before. This function is called just before acquiring locks at a
263
    particular level, but after acquiring the ones at lower levels, and permits
264
    such calculations. It can be used to modify self.needed_locks, and by
265
    default it does nothing.
266

267
    This function is only called if you have something already set in
268
    self.needed_locks for the level.
269

270
    @param level: Locking level which is going to be locked
271
    @type level: member of ganeti.locking.LEVELS
272

273
    """
274

    
275
  def CheckPrereq(self):
276
    """Check prerequisites for this LU.
277

278
    This method should check that the prerequisites for the execution
279
    of this LU are fulfilled. It can do internode communication, but
280
    it should be idempotent - no cluster or system changes are
281
    allowed.
282

283
    The method should raise errors.OpPrereqError in case something is
284
    not fulfilled. Its return value is ignored.
285

286
    This method should also update all the parameters of the opcode to
287
    their canonical form if it hasn't been done by ExpandNames before.
288

289
    """
290
    if self.tasklets is not None:
291
      for (idx, tl) in enumerate(self.tasklets):
292
        logging.debug("Checking prerequisites for tasklet %s/%s",
293
                      idx + 1, len(self.tasklets))
294
        tl.CheckPrereq()
295
    else:
296
      pass
297

    
298
  def Exec(self, feedback_fn):
299
    """Execute the LU.
300

301
    This method should implement the actual work. It should raise
302
    errors.OpExecError for failures that are somewhat dealt with in
303
    code, or expected.
304

305
    """
306
    if self.tasklets is not None:
307
      for (idx, tl) in enumerate(self.tasklets):
308
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
309
        tl.Exec(feedback_fn)
310
    else:
311
      raise NotImplementedError
312

    
313
  def BuildHooksEnv(self):
314
    """Build hooks environment for this LU.
315

316
    This method should return a three-node tuple consisting of: a dict
317
    containing the environment that will be used for running the
318
    specific hook for this LU, a list of node names on which the hook
319
    should run before the execution, and a list of node names on which
320
    the hook should run after the execution.
321

322
    The keys of the dict must not have 'GANETI_' prefixed as this will
323
    be handled in the hooks runner. Also note additional keys will be
324
    added by the hooks runner. If the LU doesn't define any
325
    environment, an empty dict (and not None) should be returned.
326

327
    No nodes should be returned as an empty list (and not None).
328

329
    Note that if the HPATH for a LU class is None, this function will
330
    not be called.
331

332
    """
333
    raise NotImplementedError
334

    
335
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
336
    """Notify the LU about the results of its hooks.
337

338
    This method is called every time a hooks phase is executed, and notifies
339
    the Logical Unit about the hooks' result. The LU can then use it to alter
340
    its result based on the hooks.  By default the method does nothing and the
341
    previous result is passed back unchanged but any LU can define it if it
342
    wants to use the local cluster hook-scripts somehow.
343

344
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
345
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
346
    @param hook_results: the results of the multi-node hooks rpc call
347
    @param feedback_fn: function used send feedback back to the caller
348
    @param lu_result: the previous Exec result this LU had, or None
349
        in the PRE phase
350
    @return: the new Exec result, based on the previous result
351
        and hook results
352

353
    """
354
    # API must be kept, thus we ignore the unused argument and could
355
    # be a function warnings
356
    # pylint: disable-msg=W0613,R0201
357
    return lu_result
358

    
359
  def _ExpandAndLockInstance(self):
360
    """Helper function to expand and lock an instance.
361

362
    Many LUs that work on an instance take its name in self.op.instance_name
363
    and need to expand it and then declare the expanded name for locking. This
364
    function does it, and then updates self.op.instance_name to the expanded
365
    name. It also initializes needed_locks as a dict, if this hasn't been done
366
    before.
367

368
    """
369
    if self.needed_locks is None:
370
      self.needed_locks = {}
371
    else:
372
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
373
        "_ExpandAndLockInstance called with instance-level locks set"
374
    self.op.instance_name = _ExpandInstanceName(self.cfg,
375
                                                self.op.instance_name)
376
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
377

    
378
  def _LockInstancesNodes(self, primary_only=False):
379
    """Helper function to declare instances' nodes for locking.
380

381
    This function should be called after locking one or more instances to lock
382
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
383
    with all primary or secondary nodes for instances already locked and
384
    present in self.needed_locks[locking.LEVEL_INSTANCE].
385

386
    It should be called from DeclareLocks, and for safety only works if
387
    self.recalculate_locks[locking.LEVEL_NODE] is set.
388

389
    In the future it may grow parameters to just lock some instance's nodes, or
390
    to just lock primaries or secondary nodes, if needed.
391

392
    If should be called in DeclareLocks in a way similar to::
393

394
      if level == locking.LEVEL_NODE:
395
        self._LockInstancesNodes()
396

397
    @type primary_only: boolean
398
    @param primary_only: only lock primary nodes of locked instances
399

400
    """
401
    assert locking.LEVEL_NODE in self.recalculate_locks, \
402
      "_LockInstancesNodes helper function called with no nodes to recalculate"
403

    
404
    # TODO: check if we're really been called with the instance locks held
405

    
406
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
407
    # future we might want to have different behaviors depending on the value
408
    # of self.recalculate_locks[locking.LEVEL_NODE]
409
    wanted_nodes = []
410
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
411
      instance = self.context.cfg.GetInstanceInfo(instance_name)
412
      wanted_nodes.append(instance.primary_node)
413
      if not primary_only:
414
        wanted_nodes.extend(instance.secondary_nodes)
415

    
416
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
417
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
418
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
419
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
420

    
421
    del self.recalculate_locks[locking.LEVEL_NODE]
422

    
423

    
424
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
425
  """Simple LU which runs no hooks.
426

427
  This LU is intended as a parent for other LogicalUnits which will
428
  run no hooks, in order to reduce duplicate code.
429

430
  """
431
  HPATH = None
432
  HTYPE = None
433

    
434
  def BuildHooksEnv(self):
435
    """Empty BuildHooksEnv for NoHooksLu.
436

437
    This just raises an error.
438

439
    """
440
    assert False, "BuildHooksEnv called for NoHooksLUs"
441

    
442

    
443
class Tasklet:
444
  """Tasklet base class.
445

446
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
448
  tasklets know nothing about locks.
449

450
  Subclasses must follow these rules:
451
    - Implement CheckPrereq
452
    - Implement Exec
453

454
  """
455
  def __init__(self, lu):
456
    self.lu = lu
457

    
458
    # Shortcuts
459
    self.cfg = lu.cfg
460
    self.rpc = lu.rpc
461

    
462
  def CheckPrereq(self):
463
    """Check prerequisites for this tasklets.
464

465
    This method should check whether the prerequisites for the execution of
466
    this tasklet are fulfilled. It can do internode communication, but it
467
    should be idempotent - no cluster or system changes are allowed.
468

469
    The method should raise errors.OpPrereqError in case something is not
470
    fulfilled. Its return value is ignored.
471

472
    This method should also update all parameters to their canonical form if it
473
    hasn't been done before.
474

475
    """
476
    pass
477

    
478
  def Exec(self, feedback_fn):
479
    """Execute the tasklet.
480

481
    This method should implement the actual work. It should raise
482
    errors.OpExecError for failures that are somewhat dealt with in code, or
483
    expected.
484

485
    """
486
    raise NotImplementedError
487

    
488

    
489
def _GetWantedNodes(lu, nodes):
490
  """Returns list of checked and expanded node names.
491

492
  @type lu: L{LogicalUnit}
493
  @param lu: the logical unit on whose behalf we execute
494
  @type nodes: list
495
  @param nodes: list of node names or None for all nodes
496
  @rtype: list
497
  @return: the list of nodes, sorted
498
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
499

500
  """
501
  if not nodes:
502
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
503
      " non-empty list of nodes whose name is to be expanded.")
504

    
505
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
506
  return utils.NiceSort(wanted)
507

    
508

    
509
def _GetWantedInstances(lu, instances):
510
  """Returns list of checked and expanded instance names.
511

512
  @type lu: L{LogicalUnit}
513
  @param lu: the logical unit on whose behalf we execute
514
  @type instances: list
515
  @param instances: list of instance names or None for all instances
516
  @rtype: list
517
  @return: the list of instances, sorted
518
  @raise errors.OpPrereqError: if the instances parameter is wrong type
519
  @raise errors.OpPrereqError: if any of the passed instances is not found
520

521
  """
522
  if instances:
523
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
524
  else:
525
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
526
  return wanted
527

    
528

    
529
def _GetUpdatedParams(old_params, update_dict,
530
                      use_default=True, use_none=False):
531
  """Return the new version of a parameter dictionary.
532

533
  @type old_params: dict
534
  @param old_params: old parameters
535
  @type update_dict: dict
536
  @param update_dict: dict containing new parameter values, or
537
      constants.VALUE_DEFAULT to reset the parameter to its default
538
      value
539
  @param use_default: boolean
540
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
541
      values as 'to be deleted' values
542
  @param use_none: boolean
543
  @type use_none: whether to recognise C{None} values as 'to be
544
      deleted' values
545
  @rtype: dict
546
  @return: the new parameter dictionary
547

548
  """
549
  params_copy = copy.deepcopy(old_params)
550
  for key, val in update_dict.iteritems():
551
    if ((use_default and val == constants.VALUE_DEFAULT) or
552
        (use_none and val is None)):
553
      try:
554
        del params_copy[key]
555
      except KeyError:
556
        pass
557
    else:
558
      params_copy[key] = val
559
  return params_copy
560

    
561

    
562
def _CheckOutputFields(static, dynamic, selected):
563
  """Checks whether all selected fields are valid.
564

565
  @type static: L{utils.FieldSet}
566
  @param static: static fields set
567
  @type dynamic: L{utils.FieldSet}
568
  @param dynamic: dynamic fields set
569

570
  """
571
  f = utils.FieldSet()
572
  f.Extend(static)
573
  f.Extend(dynamic)
574

    
575
  delta = f.NonMatching(selected)
576
  if delta:
577
    raise errors.OpPrereqError("Unknown output fields selected: %s"
578
                               % ",".join(delta), errors.ECODE_INVAL)
579

    
580

    
581
def _CheckGlobalHvParams(params):
582
  """Validates that given hypervisor params are not global ones.
583

584
  This will ensure that instances don't get customised versions of
585
  global params.
586

587
  """
588
  used_globals = constants.HVC_GLOBALS.intersection(params)
589
  if used_globals:
590
    msg = ("The following hypervisor parameters are global and cannot"
591
           " be customized at instance level, please modify them at"
592
           " cluster level: %s" % utils.CommaJoin(used_globals))
593
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
594

    
595

    
596
def _CheckNodeOnline(lu, node):
597
  """Ensure that a given node is online.
598

599
  @param lu: the LU on behalf of which we make the check
600
  @param node: the node to check
601
  @raise errors.OpPrereqError: if the node is offline
602

603
  """
604
  if lu.cfg.GetNodeInfo(node).offline:
605
    raise errors.OpPrereqError("Can't use offline node %s" % node,
606
                               errors.ECODE_INVAL)
607

    
608

    
609
def _CheckNodeNotDrained(lu, node):
610
  """Ensure that a given node is not drained.
611

612
  @param lu: the LU on behalf of which we make the check
613
  @param node: the node to check
614
  @raise errors.OpPrereqError: if the node is drained
615

616
  """
617
  if lu.cfg.GetNodeInfo(node).drained:
618
    raise errors.OpPrereqError("Can't use drained node %s" % node,
619
                               errors.ECODE_INVAL)
620

    
621

    
622
def _CheckNodeHasOS(lu, node, os_name, force_variant):
623
  """Ensure that a node supports a given OS.
624

625
  @param lu: the LU on behalf of which we make the check
626
  @param node: the node to check
627
  @param os_name: the OS to query about
628
  @param force_variant: whether to ignore variant errors
629
  @raise errors.OpPrereqError: if the node is not supporting the OS
630

631
  """
632
  result = lu.rpc.call_os_get(node, os_name)
633
  result.Raise("OS '%s' not in supported OS list for node %s" %
634
               (os_name, node),
635
               prereq=True, ecode=errors.ECODE_INVAL)
636
  if not force_variant:
637
    _CheckOSVariant(result.payload, os_name)
638

    
639

    
640
def _RequireFileStorage():
641
  """Checks that file storage is enabled.
642

643
  @raise errors.OpPrereqError: when file storage is disabled
644

645
  """
646
  if not constants.ENABLE_FILE_STORAGE:
647
    raise errors.OpPrereqError("File storage disabled at configure time",
648
                               errors.ECODE_INVAL)
649

    
650

    
651
def _CheckDiskTemplate(template):
652
  """Ensure a given disk template is valid.
653

654
  """
655
  if template not in constants.DISK_TEMPLATES:
656
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
657
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
658
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
659
  if template == constants.DT_FILE:
660
    _RequireFileStorage()
661
  return True
662

    
663

    
664
def _CheckStorageType(storage_type):
665
  """Ensure a given storage type is valid.
666

667
  """
668
  if storage_type not in constants.VALID_STORAGE_TYPES:
669
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
670
                               errors.ECODE_INVAL)
671
  if storage_type == constants.ST_FILE:
672
    _RequireFileStorage()
673
  return True
674

    
675

    
676
def _GetClusterDomainSecret():
677
  """Reads the cluster domain secret.
678

679
  """
680
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
681
                               strict=True)
682

    
683

    
684
def _CheckInstanceDown(lu, instance, reason):
685
  """Ensure that an instance is not running."""
686
  if instance.admin_up:
687
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
688
                               (instance.name, reason), errors.ECODE_STATE)
689

    
690
  pnode = instance.primary_node
691
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
692
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
693
              prereq=True, ecode=errors.ECODE_ENVIRON)
694

    
695
  if instance.name in ins_l.payload:
696
    raise errors.OpPrereqError("Instance %s is running, %s" %
697
                               (instance.name, reason), errors.ECODE_STATE)
698

    
699

    
700
def _ExpandItemName(fn, name, kind):
701
  """Expand an item name.
702

703
  @param fn: the function to use for expansion
704
  @param name: requested item name
705
  @param kind: text description ('Node' or 'Instance')
706
  @return: the resolved (full) name
707
  @raise errors.OpPrereqError: if the item is not found
708

709
  """
710
  full_name = fn(name)
711
  if full_name is None:
712
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
713
                               errors.ECODE_NOENT)
714
  return full_name
715

    
716

    
717
def _ExpandNodeName(cfg, name):
718
  """Wrapper over L{_ExpandItemName} for nodes."""
719
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
720

    
721

    
722
def _ExpandInstanceName(cfg, name):
723
  """Wrapper over L{_ExpandItemName} for instance."""
724
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
725

    
726

    
727
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
728
                          memory, vcpus, nics, disk_template, disks,
729
                          bep, hvp, hypervisor_name):
730
  """Builds instance related env variables for hooks
731

732
  This builds the hook environment from individual variables.
733

734
  @type name: string
735
  @param name: the name of the instance
736
  @type primary_node: string
737
  @param primary_node: the name of the instance's primary node
738
  @type secondary_nodes: list
739
  @param secondary_nodes: list of secondary nodes as strings
740
  @type os_type: string
741
  @param os_type: the name of the instance's OS
742
  @type status: boolean
743
  @param status: the should_run status of the instance
744
  @type memory: string
745
  @param memory: the memory size of the instance
746
  @type vcpus: string
747
  @param vcpus: the count of VCPUs the instance has
748
  @type nics: list
749
  @param nics: list of tuples (ip, mac, mode, link) representing
750
      the NICs the instance has
751
  @type disk_template: string
752
  @param disk_template: the disk template of the instance
753
  @type disks: list
754
  @param disks: the list of (size, mode) pairs
755
  @type bep: dict
756
  @param bep: the backend parameters for the instance
757
  @type hvp: dict
758
  @param hvp: the hypervisor parameters for the instance
759
  @type hypervisor_name: string
760
  @param hypervisor_name: the hypervisor for the instance
761
  @rtype: dict
762
  @return: the hook environment for this instance
763

764
  """
765
  if status:
766
    str_status = "up"
767
  else:
768
    str_status = "down"
769
  env = {
770
    "OP_TARGET": name,
771
    "INSTANCE_NAME": name,
772
    "INSTANCE_PRIMARY": primary_node,
773
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
774
    "INSTANCE_OS_TYPE": os_type,
775
    "INSTANCE_STATUS": str_status,
776
    "INSTANCE_MEMORY": memory,
777
    "INSTANCE_VCPUS": vcpus,
778
    "INSTANCE_DISK_TEMPLATE": disk_template,
779
    "INSTANCE_HYPERVISOR": hypervisor_name,
780
  }
781

    
782
  if nics:
783
    nic_count = len(nics)
784
    for idx, (ip, mac, mode, link) in enumerate(nics):
785
      if ip is None:
786
        ip = ""
787
      env["INSTANCE_NIC%d_IP" % idx] = ip
788
      env["INSTANCE_NIC%d_MAC" % idx] = mac
789
      env["INSTANCE_NIC%d_MODE" % idx] = mode
790
      env["INSTANCE_NIC%d_LINK" % idx] = link
791
      if mode == constants.NIC_MODE_BRIDGED:
792
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
793
  else:
794
    nic_count = 0
795

    
796
  env["INSTANCE_NIC_COUNT"] = nic_count
797

    
798
  if disks:
799
    disk_count = len(disks)
800
    for idx, (size, mode) in enumerate(disks):
801
      env["INSTANCE_DISK%d_SIZE" % idx] = size
802
      env["INSTANCE_DISK%d_MODE" % idx] = mode
803
  else:
804
    disk_count = 0
805

    
806
  env["INSTANCE_DISK_COUNT"] = disk_count
807

    
808
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
809
    for key, value in source.items():
810
      env["INSTANCE_%s_%s" % (kind, key)] = value
811

    
812
  return env
813

    
814

    
815
def _NICListToTuple(lu, nics):
816
  """Build a list of nic information tuples.
817

818
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
819
  value in LUQueryInstanceData.
820

821
  @type lu:  L{LogicalUnit}
822
  @param lu: the logical unit on whose behalf we execute
823
  @type nics: list of L{objects.NIC}
824
  @param nics: list of nics to convert to hooks tuples
825

826
  """
827
  hooks_nics = []
828
  cluster = lu.cfg.GetClusterInfo()
829
  for nic in nics:
830
    ip = nic.ip
831
    mac = nic.mac
832
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
833
    mode = filled_params[constants.NIC_MODE]
834
    link = filled_params[constants.NIC_LINK]
835
    hooks_nics.append((ip, mac, mode, link))
836
  return hooks_nics
837

    
838

    
839
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
840
  """Builds instance related env variables for hooks from an object.
841

842
  @type lu: L{LogicalUnit}
843
  @param lu: the logical unit on whose behalf we execute
844
  @type instance: L{objects.Instance}
845
  @param instance: the instance for which we should build the
846
      environment
847
  @type override: dict
848
  @param override: dictionary with key/values that will override
849
      our values
850
  @rtype: dict
851
  @return: the hook environment dictionary
852

853
  """
854
  cluster = lu.cfg.GetClusterInfo()
855
  bep = cluster.FillBE(instance)
856
  hvp = cluster.FillHV(instance)
857
  args = {
858
    'name': instance.name,
859
    'primary_node': instance.primary_node,
860
    'secondary_nodes': instance.secondary_nodes,
861
    'os_type': instance.os,
862
    'status': instance.admin_up,
863
    'memory': bep[constants.BE_MEMORY],
864
    'vcpus': bep[constants.BE_VCPUS],
865
    'nics': _NICListToTuple(lu, instance.nics),
866
    'disk_template': instance.disk_template,
867
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
868
    'bep': bep,
869
    'hvp': hvp,
870
    'hypervisor_name': instance.hypervisor,
871
  }
872
  if override:
873
    args.update(override)
874
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
875

    
876

    
877
def _AdjustCandidatePool(lu, exceptions):
878
  """Adjust the candidate pool after node operations.
879

880
  """
881
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
882
  if mod_list:
883
    lu.LogInfo("Promoted nodes to master candidate role: %s",
884
               utils.CommaJoin(node.name for node in mod_list))
885
    for name in mod_list:
886
      lu.context.ReaddNode(name)
887
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
888
  if mc_now > mc_max:
889
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
890
               (mc_now, mc_max))
891

    
892

    
893
def _DecideSelfPromotion(lu, exceptions=None):
894
  """Decide whether I should promote myself as a master candidate.
895

896
  """
897
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
898
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
899
  # the new node will increase mc_max with one, so:
900
  mc_should = min(mc_should + 1, cp_size)
901
  return mc_now < mc_should
902

    
903

    
904
def _CheckNicsBridgesExist(lu, target_nics, target_node):
905
  """Check that the brigdes needed by a list of nics exist.
906

907
  """
908
  cluster = lu.cfg.GetClusterInfo()
909
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
910
  brlist = [params[constants.NIC_LINK] for params in paramslist
911
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
912
  if brlist:
913
    result = lu.rpc.call_bridges_exist(target_node, brlist)
914
    result.Raise("Error checking bridges on destination node '%s'" %
915
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
916

    
917

    
918
def _CheckInstanceBridgesExist(lu, instance, node=None):
919
  """Check that the brigdes needed by an instance exist.
920

921
  """
922
  if node is None:
923
    node = instance.primary_node
924
  _CheckNicsBridgesExist(lu, instance.nics, node)
925

    
926

    
927
def _CheckOSVariant(os_obj, name):
928
  """Check whether an OS name conforms to the os variants specification.
929

930
  @type os_obj: L{objects.OS}
931
  @param os_obj: OS object to check
932
  @type name: string
933
  @param name: OS name passed by the user, to check for validity
934

935
  """
936
  if not os_obj.supported_variants:
937
    return
938
  variant = objects.OS.GetVariant(name)
939
  if not variant:
940
    raise errors.OpPrereqError("OS name must include a variant",
941
                               errors.ECODE_INVAL)
942

    
943
  if variant not in os_obj.supported_variants:
944
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
945

    
946

    
947
def _GetNodeInstancesInner(cfg, fn):
948
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
949

    
950

    
951
def _GetNodeInstances(cfg, node_name):
952
  """Returns a list of all primary and secondary instances on a node.
953

954
  """
955

    
956
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
957

    
958

    
959
def _GetNodePrimaryInstances(cfg, node_name):
960
  """Returns primary instances on a node.
961

962
  """
963
  return _GetNodeInstancesInner(cfg,
964
                                lambda inst: node_name == inst.primary_node)
965

    
966

    
967
def _GetNodeSecondaryInstances(cfg, node_name):
968
  """Returns secondary instances on a node.
969

970
  """
971
  return _GetNodeInstancesInner(cfg,
972
                                lambda inst: node_name in inst.secondary_nodes)
973

    
974

    
975
def _GetStorageTypeArgs(cfg, storage_type):
976
  """Returns the arguments for a storage type.
977

978
  """
979
  # Special case for file storage
980
  if storage_type == constants.ST_FILE:
981
    # storage.FileStorage wants a list of storage directories
982
    return [[cfg.GetFileStorageDir()]]
983

    
984
  return []
985

    
986

    
987
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
988
  faulty = []
989

    
990
  for dev in instance.disks:
991
    cfg.SetDiskID(dev, node_name)
992

    
993
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
994
  result.Raise("Failed to get disk status from node %s" % node_name,
995
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
996

    
997
  for idx, bdev_status in enumerate(result.payload):
998
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
999
      faulty.append(idx)
1000

    
1001
  return faulty
1002

    
1003

    
1004
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1005
  """Check the sanity of iallocator and node arguments and use the
1006
  cluster-wide iallocator if appropriate.
1007

1008
  Check that at most one of (iallocator, node) is specified. If none is
1009
  specified, then the LU's opcode's iallocator slot is filled with the
1010
  cluster-wide default iallocator.
1011

1012
  @type iallocator_slot: string
1013
  @param iallocator_slot: the name of the opcode iallocator slot
1014
  @type node_slot: string
1015
  @param node_slot: the name of the opcode target node slot
1016

1017
  """
1018
  node = getattr(lu.op, node_slot, None)
1019
  iallocator = getattr(lu.op, iallocator_slot, None)
1020

    
1021
  if node is not None and iallocator is not None:
1022
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1023
                               errors.ECODE_INVAL)
1024
  elif node is None and iallocator is None:
1025
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1026
    if default_iallocator:
1027
      setattr(lu.op, iallocator_slot, default_iallocator)
1028
    else:
1029
      raise errors.OpPrereqError("No iallocator or node given and no"
1030
                                 " cluster-wide default iallocator found."
1031
                                 " Please specify either an iallocator or a"
1032
                                 " node, or set a cluster-wide default"
1033
                                 " iallocator.")
1034

    
1035

    
1036
class LUPostInitCluster(LogicalUnit):
1037
  """Logical unit for running hooks after cluster initialization.
1038

1039
  """
1040
  HPATH = "cluster-init"
1041
  HTYPE = constants.HTYPE_CLUSTER
1042

    
1043
  def BuildHooksEnv(self):
1044
    """Build hooks env.
1045

1046
    """
1047
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1048
    mn = self.cfg.GetMasterNode()
1049
    return env, [], [mn]
1050

    
1051
  def Exec(self, feedback_fn):
1052
    """Nothing to do.
1053

1054
    """
1055
    return True
1056

    
1057

    
1058
class LUDestroyCluster(LogicalUnit):
1059
  """Logical unit for destroying the cluster.
1060

1061
  """
1062
  HPATH = "cluster-destroy"
1063
  HTYPE = constants.HTYPE_CLUSTER
1064

    
1065
  def BuildHooksEnv(self):
1066
    """Build hooks env.
1067

1068
    """
1069
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1070
    return env, [], []
1071

    
1072
  def CheckPrereq(self):
1073
    """Check prerequisites.
1074

1075
    This checks whether the cluster is empty.
1076

1077
    Any errors are signaled by raising errors.OpPrereqError.
1078

1079
    """
1080
    master = self.cfg.GetMasterNode()
1081

    
1082
    nodelist = self.cfg.GetNodeList()
1083
    if len(nodelist) != 1 or nodelist[0] != master:
1084
      raise errors.OpPrereqError("There are still %d node(s) in"
1085
                                 " this cluster." % (len(nodelist) - 1),
1086
                                 errors.ECODE_INVAL)
1087
    instancelist = self.cfg.GetInstanceList()
1088
    if instancelist:
1089
      raise errors.OpPrereqError("There are still %d instance(s) in"
1090
                                 " this cluster." % len(instancelist),
1091
                                 errors.ECODE_INVAL)
1092

    
1093
  def Exec(self, feedback_fn):
1094
    """Destroys the cluster.
1095

1096
    """
1097
    master = self.cfg.GetMasterNode()
1098

    
1099
    # Run post hooks on master node before it's removed
1100
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1101
    try:
1102
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1103
    except:
1104
      # pylint: disable-msg=W0702
1105
      self.LogWarning("Errors occurred running hooks on %s" % master)
1106

    
1107
    result = self.rpc.call_node_stop_master(master, False)
1108
    result.Raise("Could not disable the master role")
1109

    
1110
    return master
1111

    
1112

    
1113
def _VerifyCertificate(filename):
1114
  """Verifies a certificate for LUVerifyCluster.
1115

1116
  @type filename: string
1117
  @param filename: Path to PEM file
1118

1119
  """
1120
  try:
1121
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1122
                                           utils.ReadFile(filename))
1123
  except Exception, err: # pylint: disable-msg=W0703
1124
    return (LUVerifyCluster.ETYPE_ERROR,
1125
            "Failed to load X509 certificate %s: %s" % (filename, err))
1126

    
1127
  (errcode, msg) = \
1128
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1129
                                constants.SSL_CERT_EXPIRATION_ERROR)
1130

    
1131
  if msg:
1132
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1133
  else:
1134
    fnamemsg = None
1135

    
1136
  if errcode is None:
1137
    return (None, fnamemsg)
1138
  elif errcode == utils.CERT_WARNING:
1139
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1140
  elif errcode == utils.CERT_ERROR:
1141
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1142

    
1143
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1144

    
1145

    
1146
class LUVerifyCluster(LogicalUnit):
1147
  """Verifies the cluster status.
1148

1149
  """
1150
  HPATH = "cluster-verify"
1151
  HTYPE = constants.HTYPE_CLUSTER
1152
  _OP_PARAMS = [
1153
    ("skip_checks", ht.EmptyList,
1154
     ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1155
    ("verbose", False, ht.TBool),
1156
    ("error_codes", False, ht.TBool),
1157
    ("debug_simulate_errors", False, ht.TBool),
1158
    ]
1159
  REQ_BGL = False
1160

    
1161
  TCLUSTER = "cluster"
1162
  TNODE = "node"
1163
  TINSTANCE = "instance"
1164

    
1165
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1166
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1167
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1168
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1169
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1170
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1171
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1172
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1173
  ENODEDRBD = (TNODE, "ENODEDRBD")
1174
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1175
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1176
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1177
  ENODEHV = (TNODE, "ENODEHV")
1178
  ENODELVM = (TNODE, "ENODELVM")
1179
  ENODEN1 = (TNODE, "ENODEN1")
1180
  ENODENET = (TNODE, "ENODENET")
1181
  ENODEOS = (TNODE, "ENODEOS")
1182
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1183
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1184
  ENODERPC = (TNODE, "ENODERPC")
1185
  ENODESSH = (TNODE, "ENODESSH")
1186
  ENODEVERSION = (TNODE, "ENODEVERSION")
1187
  ENODESETUP = (TNODE, "ENODESETUP")
1188
  ENODETIME = (TNODE, "ENODETIME")
1189

    
1190
  ETYPE_FIELD = "code"
1191
  ETYPE_ERROR = "ERROR"
1192
  ETYPE_WARNING = "WARNING"
1193

    
1194
  class NodeImage(object):
1195
    """A class representing the logical and physical status of a node.
1196

1197
    @type name: string
1198
    @ivar name: the node name to which this object refers
1199
    @ivar volumes: a structure as returned from
1200
        L{ganeti.backend.GetVolumeList} (runtime)
1201
    @ivar instances: a list of running instances (runtime)
1202
    @ivar pinst: list of configured primary instances (config)
1203
    @ivar sinst: list of configured secondary instances (config)
1204
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1205
        of this node (config)
1206
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1207
    @ivar dfree: free disk, as reported by the node (runtime)
1208
    @ivar offline: the offline status (config)
1209
    @type rpc_fail: boolean
1210
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1211
        not whether the individual keys were correct) (runtime)
1212
    @type lvm_fail: boolean
1213
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1214
    @type hyp_fail: boolean
1215
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1216
    @type ghost: boolean
1217
    @ivar ghost: whether this is a known node or not (config)
1218
    @type os_fail: boolean
1219
    @ivar os_fail: whether the RPC call didn't return valid OS data
1220
    @type oslist: list
1221
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1222

1223
    """
1224
    def __init__(self, offline=False, name=None):
1225
      self.name = name
1226
      self.volumes = {}
1227
      self.instances = []
1228
      self.pinst = []
1229
      self.sinst = []
1230
      self.sbp = {}
1231
      self.mfree = 0
1232
      self.dfree = 0
1233
      self.offline = offline
1234
      self.rpc_fail = False
1235
      self.lvm_fail = False
1236
      self.hyp_fail = False
1237
      self.ghost = False
1238
      self.os_fail = False
1239
      self.oslist = {}
1240

    
1241
  def ExpandNames(self):
1242
    self.needed_locks = {
1243
      locking.LEVEL_NODE: locking.ALL_SET,
1244
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1245
    }
1246
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1247

    
1248
  def _Error(self, ecode, item, msg, *args, **kwargs):
1249
    """Format an error message.
1250

1251
    Based on the opcode's error_codes parameter, either format a
1252
    parseable error code, or a simpler error string.
1253

1254
    This must be called only from Exec and functions called from Exec.
1255

1256
    """
1257
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1258
    itype, etxt = ecode
1259
    # first complete the msg
1260
    if args:
1261
      msg = msg % args
1262
    # then format the whole message
1263
    if self.op.error_codes:
1264
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1265
    else:
1266
      if item:
1267
        item = " " + item
1268
      else:
1269
        item = ""
1270
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1271
    # and finally report it via the feedback_fn
1272
    self._feedback_fn("  - %s" % msg)
1273

    
1274
  def _ErrorIf(self, cond, *args, **kwargs):
1275
    """Log an error message if the passed condition is True.
1276

1277
    """
1278
    cond = bool(cond) or self.op.debug_simulate_errors
1279
    if cond:
1280
      self._Error(*args, **kwargs)
1281
    # do not mark the operation as failed for WARN cases only
1282
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1283
      self.bad = self.bad or cond
1284

    
1285
  def _VerifyNode(self, ninfo, nresult):
1286
    """Perform some basic validation on data returned from a node.
1287

1288
      - check the result data structure is well formed and has all the
1289
        mandatory fields
1290
      - check ganeti version
1291

1292
    @type ninfo: L{objects.Node}
1293
    @param ninfo: the node to check
1294
    @param nresult: the results from the node
1295
    @rtype: boolean
1296
    @return: whether overall this call was successful (and we can expect
1297
         reasonable values in the respose)
1298

1299
    """
1300
    node = ninfo.name
1301
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1302

    
1303
    # main result, nresult should be a non-empty dict
1304
    test = not nresult or not isinstance(nresult, dict)
1305
    _ErrorIf(test, self.ENODERPC, node,
1306
                  "unable to verify node: no data returned")
1307
    if test:
1308
      return False
1309

    
1310
    # compares ganeti version
1311
    local_version = constants.PROTOCOL_VERSION
1312
    remote_version = nresult.get("version", None)
1313
    test = not (remote_version and
1314
                isinstance(remote_version, (list, tuple)) and
1315
                len(remote_version) == 2)
1316
    _ErrorIf(test, self.ENODERPC, node,
1317
             "connection to node returned invalid data")
1318
    if test:
1319
      return False
1320

    
1321
    test = local_version != remote_version[0]
1322
    _ErrorIf(test, self.ENODEVERSION, node,
1323
             "incompatible protocol versions: master %s,"
1324
             " node %s", local_version, remote_version[0])
1325
    if test:
1326
      return False
1327

    
1328
    # node seems compatible, we can actually try to look into its results
1329

    
1330
    # full package version
1331
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1332
                  self.ENODEVERSION, node,
1333
                  "software version mismatch: master %s, node %s",
1334
                  constants.RELEASE_VERSION, remote_version[1],
1335
                  code=self.ETYPE_WARNING)
1336

    
1337
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1338
    if isinstance(hyp_result, dict):
1339
      for hv_name, hv_result in hyp_result.iteritems():
1340
        test = hv_result is not None
1341
        _ErrorIf(test, self.ENODEHV, node,
1342
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1343

    
1344
    test = nresult.get(constants.NV_NODESETUP,
1345
                           ["Missing NODESETUP results"])
1346
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1347
             "; ".join(test))
1348

    
1349
    return True
1350

    
1351
  def _VerifyNodeTime(self, ninfo, nresult,
1352
                      nvinfo_starttime, nvinfo_endtime):
1353
    """Check the node time.
1354

1355
    @type ninfo: L{objects.Node}
1356
    @param ninfo: the node to check
1357
    @param nresult: the remote results for the node
1358
    @param nvinfo_starttime: the start time of the RPC call
1359
    @param nvinfo_endtime: the end time of the RPC call
1360

1361
    """
1362
    node = ninfo.name
1363
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1364

    
1365
    ntime = nresult.get(constants.NV_TIME, None)
1366
    try:
1367
      ntime_merged = utils.MergeTime(ntime)
1368
    except (ValueError, TypeError):
1369
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1370
      return
1371

    
1372
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1373
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1374
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1375
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1376
    else:
1377
      ntime_diff = None
1378

    
1379
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1380
             "Node time diverges by at least %s from master node time",
1381
             ntime_diff)
1382

    
1383
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1384
    """Check the node time.
1385

1386
    @type ninfo: L{objects.Node}
1387
    @param ninfo: the node to check
1388
    @param nresult: the remote results for the node
1389
    @param vg_name: the configured VG name
1390

1391
    """
1392
    if vg_name is None:
1393
      return
1394

    
1395
    node = ninfo.name
1396
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1397

    
1398
    # checks vg existence and size > 20G
1399
    vglist = nresult.get(constants.NV_VGLIST, None)
1400
    test = not vglist
1401
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1402
    if not test:
1403
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1404
                                            constants.MIN_VG_SIZE)
1405
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1406

    
1407
    # check pv names
1408
    pvlist = nresult.get(constants.NV_PVLIST, None)
1409
    test = pvlist is None
1410
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1411
    if not test:
1412
      # check that ':' is not present in PV names, since it's a
1413
      # special character for lvcreate (denotes the range of PEs to
1414
      # use on the PV)
1415
      for _, pvname, owner_vg in pvlist:
1416
        test = ":" in pvname
1417
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1418
                 " '%s' of VG '%s'", pvname, owner_vg)
1419

    
1420
  def _VerifyNodeNetwork(self, ninfo, nresult):
1421
    """Check the node time.
1422

1423
    @type ninfo: L{objects.Node}
1424
    @param ninfo: the node to check
1425
    @param nresult: the remote results for the node
1426

1427
    """
1428
    node = ninfo.name
1429
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1430

    
1431
    test = constants.NV_NODELIST not in nresult
1432
    _ErrorIf(test, self.ENODESSH, node,
1433
             "node hasn't returned node ssh connectivity data")
1434
    if not test:
1435
      if nresult[constants.NV_NODELIST]:
1436
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1437
          _ErrorIf(True, self.ENODESSH, node,
1438
                   "ssh communication with node '%s': %s", a_node, a_msg)
1439

    
1440
    test = constants.NV_NODENETTEST not in nresult
1441
    _ErrorIf(test, self.ENODENET, node,
1442
             "node hasn't returned node tcp connectivity data")
1443
    if not test:
1444
      if nresult[constants.NV_NODENETTEST]:
1445
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1446
        for anode in nlist:
1447
          _ErrorIf(True, self.ENODENET, node,
1448
                   "tcp communication with node '%s': %s",
1449
                   anode, nresult[constants.NV_NODENETTEST][anode])
1450

    
1451
    test = constants.NV_MASTERIP not in nresult
1452
    _ErrorIf(test, self.ENODENET, node,
1453
             "node hasn't returned node master IP reachability data")
1454
    if not test:
1455
      if not nresult[constants.NV_MASTERIP]:
1456
        if node == self.master_node:
1457
          msg = "the master node cannot reach the master IP (not configured?)"
1458
        else:
1459
          msg = "cannot reach the master IP"
1460
        _ErrorIf(True, self.ENODENET, node, msg)
1461

    
1462
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1463
                      diskstatus):
1464
    """Verify an instance.
1465

1466
    This function checks to see if the required block devices are
1467
    available on the instance's node.
1468

1469
    """
1470
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1471
    node_current = instanceconfig.primary_node
1472

    
1473
    node_vol_should = {}
1474
    instanceconfig.MapLVsByNode(node_vol_should)
1475

    
1476
    for node in node_vol_should:
1477
      n_img = node_image[node]
1478
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1479
        # ignore missing volumes on offline or broken nodes
1480
        continue
1481
      for volume in node_vol_should[node]:
1482
        test = volume not in n_img.volumes
1483
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1484
                 "volume %s missing on node %s", volume, node)
1485

    
1486
    if instanceconfig.admin_up:
1487
      pri_img = node_image[node_current]
1488
      test = instance not in pri_img.instances and not pri_img.offline
1489
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1490
               "instance not running on its primary node %s",
1491
               node_current)
1492

    
1493
    for node, n_img in node_image.items():
1494
      if (not node == node_current):
1495
        test = instance in n_img.instances
1496
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1497
                 "instance should not run on node %s", node)
1498

    
1499
    diskdata = [(nname, disk, idx)
1500
                for (nname, disks) in diskstatus.items()
1501
                for idx, disk in enumerate(disks)]
1502

    
1503
    for nname, bdev_status, idx in diskdata:
1504
      _ErrorIf(not bdev_status,
1505
               self.EINSTANCEFAULTYDISK, instance,
1506
               "couldn't retrieve status for disk/%s on %s", idx, nname)
1507
      _ErrorIf(bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY,
1508
               self.EINSTANCEFAULTYDISK, instance,
1509
               "disk/%s on %s is faulty", idx, nname)
1510

    
1511
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1512
    """Verify if there are any unknown volumes in the cluster.
1513

1514
    The .os, .swap and backup volumes are ignored. All other volumes are
1515
    reported as unknown.
1516

1517
    @type reserved: L{ganeti.utils.FieldSet}
1518
    @param reserved: a FieldSet of reserved volume names
1519

1520
    """
1521
    for node, n_img in node_image.items():
1522
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1523
        # skip non-healthy nodes
1524
        continue
1525
      for volume in n_img.volumes:
1526
        test = ((node not in node_vol_should or
1527
                volume not in node_vol_should[node]) and
1528
                not reserved.Matches(volume))
1529
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1530
                      "volume %s is unknown", volume)
1531

    
1532
  def _VerifyOrphanInstances(self, instancelist, node_image):
1533
    """Verify the list of running instances.
1534

1535
    This checks what instances are running but unknown to the cluster.
1536

1537
    """
1538
    for node, n_img in node_image.items():
1539
      for o_inst in n_img.instances:
1540
        test = o_inst not in instancelist
1541
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1542
                      "instance %s on node %s should not exist", o_inst, node)
1543

    
1544
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1545
    """Verify N+1 Memory Resilience.
1546

1547
    Check that if one single node dies we can still start all the
1548
    instances it was primary for.
1549

1550
    """
1551
    for node, n_img in node_image.items():
1552
      # This code checks that every node which is now listed as
1553
      # secondary has enough memory to host all instances it is
1554
      # supposed to should a single other node in the cluster fail.
1555
      # FIXME: not ready for failover to an arbitrary node
1556
      # FIXME: does not support file-backed instances
1557
      # WARNING: we currently take into account down instances as well
1558
      # as up ones, considering that even if they're down someone
1559
      # might want to start them even in the event of a node failure.
1560
      for prinode, instances in n_img.sbp.items():
1561
        needed_mem = 0
1562
        for instance in instances:
1563
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1564
          if bep[constants.BE_AUTO_BALANCE]:
1565
            needed_mem += bep[constants.BE_MEMORY]
1566
        test = n_img.mfree < needed_mem
1567
        self._ErrorIf(test, self.ENODEN1, node,
1568
                      "not enough memory on to accommodate"
1569
                      " failovers should peer node %s fail", prinode)
1570

    
1571
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1572
                       master_files):
1573
    """Verifies and computes the node required file checksums.
1574

1575
    @type ninfo: L{objects.Node}
1576
    @param ninfo: the node to check
1577
    @param nresult: the remote results for the node
1578
    @param file_list: required list of files
1579
    @param local_cksum: dictionary of local files and their checksums
1580
    @param master_files: list of files that only masters should have
1581

1582
    """
1583
    node = ninfo.name
1584
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1585

    
1586
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1587
    test = not isinstance(remote_cksum, dict)
1588
    _ErrorIf(test, self.ENODEFILECHECK, node,
1589
             "node hasn't returned file checksum data")
1590
    if test:
1591
      return
1592

    
1593
    for file_name in file_list:
1594
      node_is_mc = ninfo.master_candidate
1595
      must_have = (file_name not in master_files) or node_is_mc
1596
      # missing
1597
      test1 = file_name not in remote_cksum
1598
      # invalid checksum
1599
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1600
      # existing and good
1601
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1602
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1603
               "file '%s' missing", file_name)
1604
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1605
               "file '%s' has wrong checksum", file_name)
1606
      # not candidate and this is not a must-have file
1607
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1608
               "file '%s' should not exist on non master"
1609
               " candidates (and the file is outdated)", file_name)
1610
      # all good, except non-master/non-must have combination
1611
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1612
               "file '%s' should not exist"
1613
               " on non master candidates", file_name)
1614

    
1615
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1616
                      drbd_map):
1617
    """Verifies and the node DRBD status.
1618

1619
    @type ninfo: L{objects.Node}
1620
    @param ninfo: the node to check
1621
    @param nresult: the remote results for the node
1622
    @param instanceinfo: the dict of instances
1623
    @param drbd_helper: the configured DRBD usermode helper
1624
    @param drbd_map: the DRBD map as returned by
1625
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1626

1627
    """
1628
    node = ninfo.name
1629
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1630

    
1631
    if drbd_helper:
1632
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1633
      test = (helper_result == None)
1634
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1635
               "no drbd usermode helper returned")
1636
      if helper_result:
1637
        status, payload = helper_result
1638
        test = not status
1639
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1640
                 "drbd usermode helper check unsuccessful: %s", payload)
1641
        test = status and (payload != drbd_helper)
1642
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1643
                 "wrong drbd usermode helper: %s", payload)
1644

    
1645
    # compute the DRBD minors
1646
    node_drbd = {}
1647
    for minor, instance in drbd_map[node].items():
1648
      test = instance not in instanceinfo
1649
      _ErrorIf(test, self.ECLUSTERCFG, None,
1650
               "ghost instance '%s' in temporary DRBD map", instance)
1651
        # ghost instance should not be running, but otherwise we
1652
        # don't give double warnings (both ghost instance and
1653
        # unallocated minor in use)
1654
      if test:
1655
        node_drbd[minor] = (instance, False)
1656
      else:
1657
        instance = instanceinfo[instance]
1658
        node_drbd[minor] = (instance.name, instance.admin_up)
1659

    
1660
    # and now check them
1661
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1662
    test = not isinstance(used_minors, (tuple, list))
1663
    _ErrorIf(test, self.ENODEDRBD, node,
1664
             "cannot parse drbd status file: %s", str(used_minors))
1665
    if test:
1666
      # we cannot check drbd status
1667
      return
1668

    
1669
    for minor, (iname, must_exist) in node_drbd.items():
1670
      test = minor not in used_minors and must_exist
1671
      _ErrorIf(test, self.ENODEDRBD, node,
1672
               "drbd minor %d of instance %s is not active", minor, iname)
1673
    for minor in used_minors:
1674
      test = minor not in node_drbd
1675
      _ErrorIf(test, self.ENODEDRBD, node,
1676
               "unallocated drbd minor %d is in use", minor)
1677

    
1678
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1679
    """Builds the node OS structures.
1680

1681
    @type ninfo: L{objects.Node}
1682
    @param ninfo: the node to check
1683
    @param nresult: the remote results for the node
1684
    @param nimg: the node image object
1685

1686
    """
1687
    node = ninfo.name
1688
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1689

    
1690
    remote_os = nresult.get(constants.NV_OSLIST, None)
1691
    test = (not isinstance(remote_os, list) or
1692
            not compat.all(isinstance(v, list) and len(v) == 7
1693
                           for v in remote_os))
1694

    
1695
    _ErrorIf(test, self.ENODEOS, node,
1696
             "node hasn't returned valid OS data")
1697

    
1698
    nimg.os_fail = test
1699

    
1700
    if test:
1701
      return
1702

    
1703
    os_dict = {}
1704

    
1705
    for (name, os_path, status, diagnose,
1706
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1707

    
1708
      if name not in os_dict:
1709
        os_dict[name] = []
1710

    
1711
      # parameters is a list of lists instead of list of tuples due to
1712
      # JSON lacking a real tuple type, fix it:
1713
      parameters = [tuple(v) for v in parameters]
1714
      os_dict[name].append((os_path, status, diagnose,
1715
                            set(variants), set(parameters), set(api_ver)))
1716

    
1717
    nimg.oslist = os_dict
1718

    
1719
  def _VerifyNodeOS(self, ninfo, nimg, base):
1720
    """Verifies the node OS list.
1721

1722
    @type ninfo: L{objects.Node}
1723
    @param ninfo: the node to check
1724
    @param nimg: the node image object
1725
    @param base: the 'template' node we match against (e.g. from the master)
1726

1727
    """
1728
    node = ninfo.name
1729
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1730

    
1731
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1732

    
1733
    for os_name, os_data in nimg.oslist.items():
1734
      assert os_data, "Empty OS status for OS %s?!" % os_name
1735
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1736
      _ErrorIf(not f_status, self.ENODEOS, node,
1737
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1738
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1739
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1740
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1741
      # this will catched in backend too
1742
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1743
               and not f_var, self.ENODEOS, node,
1744
               "OS %s with API at least %d does not declare any variant",
1745
               os_name, constants.OS_API_V15)
1746
      # comparisons with the 'base' image
1747
      test = os_name not in base.oslist
1748
      _ErrorIf(test, self.ENODEOS, node,
1749
               "Extra OS %s not present on reference node (%s)",
1750
               os_name, base.name)
1751
      if test:
1752
        continue
1753
      assert base.oslist[os_name], "Base node has empty OS status?"
1754
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1755
      if not b_status:
1756
        # base OS is invalid, skipping
1757
        continue
1758
      for kind, a, b in [("API version", f_api, b_api),
1759
                         ("variants list", f_var, b_var),
1760
                         ("parameters", f_param, b_param)]:
1761
        _ErrorIf(a != b, self.ENODEOS, node,
1762
                 "OS %s %s differs from reference node %s: %s vs. %s",
1763
                 kind, os_name, base.name,
1764
                 utils.CommaJoin(a), utils.CommaJoin(b))
1765

    
1766
    # check any missing OSes
1767
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1768
    _ErrorIf(missing, self.ENODEOS, node,
1769
             "OSes present on reference node %s but missing on this node: %s",
1770
             base.name, utils.CommaJoin(missing))
1771

    
1772
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1773
    """Verifies and updates the node volume data.
1774

1775
    This function will update a L{NodeImage}'s internal structures
1776
    with data from the remote call.
1777

1778
    @type ninfo: L{objects.Node}
1779
    @param ninfo: the node to check
1780
    @param nresult: the remote results for the node
1781
    @param nimg: the node image object
1782
    @param vg_name: the configured VG name
1783

1784
    """
1785
    node = ninfo.name
1786
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1787

    
1788
    nimg.lvm_fail = True
1789
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1790
    if vg_name is None:
1791
      pass
1792
    elif isinstance(lvdata, basestring):
1793
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1794
               utils.SafeEncode(lvdata))
1795
    elif not isinstance(lvdata, dict):
1796
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1797
    else:
1798
      nimg.volumes = lvdata
1799
      nimg.lvm_fail = False
1800

    
1801
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1802
    """Verifies and updates the node instance list.
1803

1804
    If the listing was successful, then updates this node's instance
1805
    list. Otherwise, it marks the RPC call as failed for the instance
1806
    list key.
1807

1808
    @type ninfo: L{objects.Node}
1809
    @param ninfo: the node to check
1810
    @param nresult: the remote results for the node
1811
    @param nimg: the node image object
1812

1813
    """
1814
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1815
    test = not isinstance(idata, list)
1816
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1817
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1818
    if test:
1819
      nimg.hyp_fail = True
1820
    else:
1821
      nimg.instances = idata
1822

    
1823
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1824
    """Verifies and computes a node information map
1825

1826
    @type ninfo: L{objects.Node}
1827
    @param ninfo: the node to check
1828
    @param nresult: the remote results for the node
1829
    @param nimg: the node image object
1830
    @param vg_name: the configured VG name
1831

1832
    """
1833
    node = ninfo.name
1834
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1835

    
1836
    # try to read free memory (from the hypervisor)
1837
    hv_info = nresult.get(constants.NV_HVINFO, None)
1838
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1839
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1840
    if not test:
1841
      try:
1842
        nimg.mfree = int(hv_info["memory_free"])
1843
      except (ValueError, TypeError):
1844
        _ErrorIf(True, self.ENODERPC, node,
1845
                 "node returned invalid nodeinfo, check hypervisor")
1846

    
1847
    # FIXME: devise a free space model for file based instances as well
1848
    if vg_name is not None:
1849
      test = (constants.NV_VGLIST not in nresult or
1850
              vg_name not in nresult[constants.NV_VGLIST])
1851
      _ErrorIf(test, self.ENODELVM, node,
1852
               "node didn't return data for the volume group '%s'"
1853
               " - it is either missing or broken", vg_name)
1854
      if not test:
1855
        try:
1856
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1857
        except (ValueError, TypeError):
1858
          _ErrorIf(True, self.ENODERPC, node,
1859
                   "node returned invalid LVM info, check LVM status")
1860

    
1861
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1862
    """Gets per-disk status information for all instances.
1863

1864
    @type nodelist: list of strings
1865
    @param nodelist: Node names
1866
    @type node_image: dict of (name, L{objects.Node})
1867
    @param node_image: Node objects
1868
    @type instanceinfo: dict of (name, L{objects.Instance})
1869
    @param instanceinfo: Instance objects
1870

1871
    """
1872
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1873

    
1874
    node_disks = {}
1875
    node_disks_devonly = {}
1876

    
1877
    for nname in nodelist:
1878
      disks = [(inst, disk)
1879
               for instlist in [node_image[nname].pinst,
1880
                                node_image[nname].sinst]
1881
               for inst in instlist
1882
               for disk in instanceinfo[inst].disks]
1883

    
1884
      if not disks:
1885
        # No need to collect data
1886
        continue
1887

    
1888
      node_disks[nname] = disks
1889

    
1890
      # Creating copies as SetDiskID below will modify the objects and that can
1891
      # lead to incorrect data returned from nodes
1892
      devonly = [dev.Copy() for (_, dev) in disks]
1893

    
1894
      for dev in devonly:
1895
        self.cfg.SetDiskID(dev, nname)
1896

    
1897
      node_disks_devonly[nname] = devonly
1898

    
1899
    assert len(node_disks) == len(node_disks_devonly)
1900

    
1901
    # Collect data from all nodes with disks
1902
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
1903
                                                          node_disks_devonly)
1904

    
1905
    assert len(result) == len(node_disks)
1906

    
1907
    instdisk = {}
1908

    
1909
    for (nname, nres) in result.items():
1910
      if nres.offline:
1911
        # Ignore offline node
1912
        continue
1913

    
1914
      disks = node_disks[nname]
1915

    
1916
      msg = nres.fail_msg
1917
      _ErrorIf(msg, self.ENODERPC, nname,
1918
               "while getting disk information: %s", nres.fail_msg)
1919
      if msg:
1920
        # No data from this node
1921
        data = len(disks) * [None]
1922
      else:
1923
        data = nres.payload
1924

    
1925
      for ((inst, _), status) in zip(disks, data):
1926
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
1927

    
1928
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
1929
                      len(nnames) <= len(instanceinfo[inst].all_nodes)
1930
                      for inst, nnames in instdisk.items()
1931
                      for nname, statuses in nnames.items())
1932

    
1933
    return instdisk
1934

    
1935
  def BuildHooksEnv(self):
1936
    """Build hooks env.
1937

1938
    Cluster-Verify hooks just ran in the post phase and their failure makes
1939
    the output be logged in the verify output and the verification to fail.
1940

1941
    """
1942
    all_nodes = self.cfg.GetNodeList()
1943
    env = {
1944
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1945
      }
1946
    for node in self.cfg.GetAllNodesInfo().values():
1947
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1948

    
1949
    return env, [], all_nodes
1950

    
1951
  def Exec(self, feedback_fn):
1952
    """Verify integrity of cluster, performing various test on nodes.
1953

1954
    """
1955
    self.bad = False
1956
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1957
    verbose = self.op.verbose
1958
    self._feedback_fn = feedback_fn
1959
    feedback_fn("* Verifying global settings")
1960
    for msg in self.cfg.VerifyConfig():
1961
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1962

    
1963
    # Check the cluster certificates
1964
    for cert_filename in constants.ALL_CERT_FILES:
1965
      (errcode, msg) = _VerifyCertificate(cert_filename)
1966
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1967

    
1968
    vg_name = self.cfg.GetVGName()
1969
    drbd_helper = self.cfg.GetDRBDHelper()
1970
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1971
    cluster = self.cfg.GetClusterInfo()
1972
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1973
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1974
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1975
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1976
                        for iname in instancelist)
1977
    i_non_redundant = [] # Non redundant instances
1978
    i_non_a_balanced = [] # Non auto-balanced instances
1979
    n_offline = 0 # Count of offline nodes
1980
    n_drained = 0 # Count of nodes being drained
1981
    node_vol_should = {}
1982

    
1983
    # FIXME: verify OS list
1984
    # do local checksums
1985
    master_files = [constants.CLUSTER_CONF_FILE]
1986
    master_node = self.master_node = self.cfg.GetMasterNode()
1987
    master_ip = self.cfg.GetMasterIP()
1988

    
1989
    file_names = ssconf.SimpleStore().GetFileList()
1990
    file_names.extend(constants.ALL_CERT_FILES)
1991
    file_names.extend(master_files)
1992
    if cluster.modify_etc_hosts:
1993
      file_names.append(constants.ETC_HOSTS)
1994

    
1995
    local_checksums = utils.FingerprintFiles(file_names)
1996

    
1997
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1998
    node_verify_param = {
1999
      constants.NV_FILELIST: file_names,
2000
      constants.NV_NODELIST: [node.name for node in nodeinfo
2001
                              if not node.offline],
2002
      constants.NV_HYPERVISOR: hypervisors,
2003
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2004
                                  node.secondary_ip) for node in nodeinfo
2005
                                 if not node.offline],
2006
      constants.NV_INSTANCELIST: hypervisors,
2007
      constants.NV_VERSION: None,
2008
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2009
      constants.NV_NODESETUP: None,
2010
      constants.NV_TIME: None,
2011
      constants.NV_MASTERIP: (master_node, master_ip),
2012
      constants.NV_OSLIST: None,
2013
      }
2014

    
2015
    if vg_name is not None:
2016
      node_verify_param[constants.NV_VGLIST] = None
2017
      node_verify_param[constants.NV_LVLIST] = vg_name
2018
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2019
      node_verify_param[constants.NV_DRBDLIST] = None
2020

    
2021
    if drbd_helper:
2022
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2023

    
2024
    # Build our expected cluster state
2025
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2026
                                                 name=node.name))
2027
                      for node in nodeinfo)
2028

    
2029
    for instance in instancelist:
2030
      inst_config = instanceinfo[instance]
2031

    
2032
      for nname in inst_config.all_nodes:
2033
        if nname not in node_image:
2034
          # ghost node
2035
          gnode = self.NodeImage(name=nname)
2036
          gnode.ghost = True
2037
          node_image[nname] = gnode
2038

    
2039
      inst_config.MapLVsByNode(node_vol_should)
2040

    
2041
      pnode = inst_config.primary_node
2042
      node_image[pnode].pinst.append(instance)
2043

    
2044
      for snode in inst_config.secondary_nodes:
2045
        nimg = node_image[snode]
2046
        nimg.sinst.append(instance)
2047
        if pnode not in nimg.sbp:
2048
          nimg.sbp[pnode] = []
2049
        nimg.sbp[pnode].append(instance)
2050

    
2051
    # At this point, we have the in-memory data structures complete,
2052
    # except for the runtime information, which we'll gather next
2053

    
2054
    # Due to the way our RPC system works, exact response times cannot be
2055
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2056
    # time before and after executing the request, we can at least have a time
2057
    # window.
2058
    nvinfo_starttime = time.time()
2059
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2060
                                           self.cfg.GetClusterName())
2061
    nvinfo_endtime = time.time()
2062

    
2063
    all_drbd_map = self.cfg.ComputeDRBDMap()
2064

    
2065
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2066
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2067

    
2068
    feedback_fn("* Verifying node status")
2069

    
2070
    refos_img = None
2071

    
2072
    for node_i in nodeinfo:
2073
      node = node_i.name
2074
      nimg = node_image[node]
2075

    
2076
      if node_i.offline:
2077
        if verbose:
2078
          feedback_fn("* Skipping offline node %s" % (node,))
2079
        n_offline += 1
2080
        continue
2081

    
2082
      if node == master_node:
2083
        ntype = "master"
2084
      elif node_i.master_candidate:
2085
        ntype = "master candidate"
2086
      elif node_i.drained:
2087
        ntype = "drained"
2088
        n_drained += 1
2089
      else:
2090
        ntype = "regular"
2091
      if verbose:
2092
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2093

    
2094
      msg = all_nvinfo[node].fail_msg
2095
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2096
      if msg:
2097
        nimg.rpc_fail = True
2098
        continue
2099

    
2100
      nresult = all_nvinfo[node].payload
2101

    
2102
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2103
      self._VerifyNodeNetwork(node_i, nresult)
2104
      self._VerifyNodeLVM(node_i, nresult, vg_name)
2105
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2106
                            master_files)
2107
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2108
                           all_drbd_map)
2109
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2110

    
2111
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2112
      self._UpdateNodeInstances(node_i, nresult, nimg)
2113
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2114
      self._UpdateNodeOS(node_i, nresult, nimg)
2115
      if not nimg.os_fail:
2116
        if refos_img is None:
2117
          refos_img = nimg
2118
        self._VerifyNodeOS(node_i, nimg, refos_img)
2119

    
2120
    feedback_fn("* Verifying instance status")
2121
    for instance in instancelist:
2122
      if verbose:
2123
        feedback_fn("* Verifying instance %s" % instance)
2124
      inst_config = instanceinfo[instance]
2125
      self._VerifyInstance(instance, inst_config, node_image,
2126
                           instdisk[instance])
2127
      inst_nodes_offline = []
2128

    
2129
      pnode = inst_config.primary_node
2130
      pnode_img = node_image[pnode]
2131
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2132
               self.ENODERPC, pnode, "instance %s, connection to"
2133
               " primary node failed", instance)
2134

    
2135
      if pnode_img.offline:
2136
        inst_nodes_offline.append(pnode)
2137

    
2138
      # If the instance is non-redundant we cannot survive losing its primary
2139
      # node, so we are not N+1 compliant. On the other hand we have no disk
2140
      # templates with more than one secondary so that situation is not well
2141
      # supported either.
2142
      # FIXME: does not support file-backed instances
2143
      if not inst_config.secondary_nodes:
2144
        i_non_redundant.append(instance)
2145
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2146
               instance, "instance has multiple secondary nodes: %s",
2147
               utils.CommaJoin(inst_config.secondary_nodes),
2148
               code=self.ETYPE_WARNING)
2149

    
2150
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2151
        i_non_a_balanced.append(instance)
2152

    
2153
      for snode in inst_config.secondary_nodes:
2154
        s_img = node_image[snode]
2155
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2156
                 "instance %s, connection to secondary node failed", instance)
2157

    
2158
        if s_img.offline:
2159
          inst_nodes_offline.append(snode)
2160

    
2161
      # warn that the instance lives on offline nodes
2162
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2163
               "instance lives on offline node(s) %s",
2164
               utils.CommaJoin(inst_nodes_offline))
2165
      # ... or ghost nodes
2166
      for node in inst_config.all_nodes:
2167
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2168
                 "instance lives on ghost node %s", node)
2169

    
2170
    feedback_fn("* Verifying orphan volumes")
2171
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2172
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2173

    
2174
    feedback_fn("* Verifying orphan instances")
2175
    self._VerifyOrphanInstances(instancelist, node_image)
2176

    
2177
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2178
      feedback_fn("* Verifying N+1 Memory redundancy")
2179
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2180

    
2181
    feedback_fn("* Other Notes")
2182
    if i_non_redundant:
2183
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2184
                  % len(i_non_redundant))
2185

    
2186
    if i_non_a_balanced:
2187
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2188
                  % len(i_non_a_balanced))
2189

    
2190
    if n_offline:
2191
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2192

    
2193
    if n_drained:
2194
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2195

    
2196
    return not self.bad
2197

    
2198
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2199
    """Analyze the post-hooks' result
2200

2201
    This method analyses the hook result, handles it, and sends some
2202
    nicely-formatted feedback back to the user.
2203

2204
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2205
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2206
    @param hooks_results: the results of the multi-node hooks rpc call
2207
    @param feedback_fn: function used send feedback back to the caller
2208
    @param lu_result: previous Exec result
2209
    @return: the new Exec result, based on the previous result
2210
        and hook results
2211

2212
    """
2213
    # We only really run POST phase hooks, and are only interested in
2214
    # their results
2215
    if phase == constants.HOOKS_PHASE_POST:
2216
      # Used to change hooks' output to proper indentation
2217
      indent_re = re.compile('^', re.M)
2218
      feedback_fn("* Hooks Results")
2219
      assert hooks_results, "invalid result from hooks"
2220

    
2221
      for node_name in hooks_results:
2222
        res = hooks_results[node_name]
2223
        msg = res.fail_msg
2224
        test = msg and not res.offline
2225
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2226
                      "Communication failure in hooks execution: %s", msg)
2227
        if res.offline or msg:
2228
          # No need to investigate payload if node is offline or gave an error.
2229
          # override manually lu_result here as _ErrorIf only
2230
          # overrides self.bad
2231
          lu_result = 1
2232
          continue
2233
        for script, hkr, output in res.payload:
2234
          test = hkr == constants.HKR_FAIL
2235
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2236
                        "Script %s failed, output:", script)
2237
          if test:
2238
            output = indent_re.sub('      ', output)
2239
            feedback_fn("%s" % output)
2240
            lu_result = 0
2241

    
2242
      return lu_result
2243

    
2244

    
2245
class LUVerifyDisks(NoHooksLU):
2246
  """Verifies the cluster disks status.
2247

2248
  """
2249
  REQ_BGL = False
2250

    
2251
  def ExpandNames(self):
2252
    self.needed_locks = {
2253
      locking.LEVEL_NODE: locking.ALL_SET,
2254
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2255
    }
2256
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2257

    
2258
  def Exec(self, feedback_fn):
2259
    """Verify integrity of cluster disks.
2260

2261
    @rtype: tuple of three items
2262
    @return: a tuple of (dict of node-to-node_error, list of instances
2263
        which need activate-disks, dict of instance: (node, volume) for
2264
        missing volumes
2265

2266
    """
2267
    result = res_nodes, res_instances, res_missing = {}, [], {}
2268

    
2269
    vg_name = self.cfg.GetVGName()
2270
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2271
    instances = [self.cfg.GetInstanceInfo(name)
2272
                 for name in self.cfg.GetInstanceList()]
2273

    
2274
    nv_dict = {}
2275
    for inst in instances:
2276
      inst_lvs = {}
2277
      if (not inst.admin_up or
2278
          inst.disk_template not in constants.DTS_NET_MIRROR):
2279
        continue
2280
      inst.MapLVsByNode(inst_lvs)
2281
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2282
      for node, vol_list in inst_lvs.iteritems():
2283
        for vol in vol_list:
2284
          nv_dict[(node, vol)] = inst
2285

    
2286
    if not nv_dict:
2287
      return result
2288

    
2289
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2290

    
2291
    for node in nodes:
2292
      # node_volume
2293
      node_res = node_lvs[node]
2294
      if node_res.offline:
2295
        continue
2296
      msg = node_res.fail_msg
2297
      if msg:
2298
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2299
        res_nodes[node] = msg
2300
        continue
2301

    
2302
      lvs = node_res.payload
2303
      for lv_name, (_, _, lv_online) in lvs.items():
2304
        inst = nv_dict.pop((node, lv_name), None)
2305
        if (not lv_online and inst is not None
2306
            and inst.name not in res_instances):
2307
          res_instances.append(inst.name)
2308

    
2309
    # any leftover items in nv_dict are missing LVs, let's arrange the
2310
    # data better
2311
    for key, inst in nv_dict.iteritems():
2312
      if inst.name not in res_missing:
2313
        res_missing[inst.name] = []
2314
      res_missing[inst.name].append(key)
2315

    
2316
    return result
2317

    
2318

    
2319
class LURepairDiskSizes(NoHooksLU):
2320
  """Verifies the cluster disks sizes.
2321

2322
  """
2323
  _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2324
  REQ_BGL = False
2325

    
2326
  def ExpandNames(self):
2327
    if self.op.instances:
2328
      self.wanted_names = []
2329
      for name in self.op.instances:
2330
        full_name = _ExpandInstanceName(self.cfg, name)
2331
        self.wanted_names.append(full_name)
2332
      self.needed_locks = {
2333
        locking.LEVEL_NODE: [],
2334
        locking.LEVEL_INSTANCE: self.wanted_names,
2335
        }
2336
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2337
    else:
2338
      self.wanted_names = None
2339
      self.needed_locks = {
2340
        locking.LEVEL_NODE: locking.ALL_SET,
2341
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2342
        }
2343
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2344

    
2345
  def DeclareLocks(self, level):
2346
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2347
      self._LockInstancesNodes(primary_only=True)
2348

    
2349
  def CheckPrereq(self):
2350
    """Check prerequisites.
2351

2352
    This only checks the optional instance list against the existing names.
2353

2354
    """
2355
    if self.wanted_names is None:
2356
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2357

    
2358
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2359
                             in self.wanted_names]
2360

    
2361
  def _EnsureChildSizes(self, disk):
2362
    """Ensure children of the disk have the needed disk size.
2363

2364
    This is valid mainly for DRBD8 and fixes an issue where the
2365
    children have smaller disk size.
2366

2367
    @param disk: an L{ganeti.objects.Disk} object
2368

2369
    """
2370
    if disk.dev_type == constants.LD_DRBD8:
2371
      assert disk.children, "Empty children for DRBD8?"
2372
      fchild = disk.children[0]
2373
      mismatch = fchild.size < disk.size
2374
      if mismatch:
2375
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2376
                     fchild.size, disk.size)
2377
        fchild.size = disk.size
2378

    
2379
      # and we recurse on this child only, not on the metadev
2380
      return self._EnsureChildSizes(fchild) or mismatch
2381
    else:
2382
      return False
2383

    
2384
  def Exec(self, feedback_fn):
2385
    """Verify the size of cluster disks.
2386

2387
    """
2388
    # TODO: check child disks too
2389
    # TODO: check differences in size between primary/secondary nodes
2390
    per_node_disks = {}
2391
    for instance in self.wanted_instances:
2392
      pnode = instance.primary_node
2393
      if pnode not in per_node_disks:
2394
        per_node_disks[pnode] = []
2395
      for idx, disk in enumerate(instance.disks):
2396
        per_node_disks[pnode].append((instance, idx, disk))
2397

    
2398
    changed = []
2399
    for node, dskl in per_node_disks.items():
2400
      newl = [v[2].Copy() for v in dskl]
2401
      for dsk in newl:
2402
        self.cfg.SetDiskID(dsk, node)
2403
      result = self.rpc.call_blockdev_getsizes(node, newl)
2404
      if result.fail_msg:
2405
        self.LogWarning("Failure in blockdev_getsizes call to node"
2406
                        " %s, ignoring", node)
2407
        continue
2408
      if len(result.data) != len(dskl):
2409
        self.LogWarning("Invalid result from node %s, ignoring node results",
2410
                        node)
2411
        continue
2412
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2413
        if size is None:
2414
          self.LogWarning("Disk %d of instance %s did not return size"
2415
                          " information, ignoring", idx, instance.name)
2416
          continue
2417
        if not isinstance(size, (int, long)):
2418
          self.LogWarning("Disk %d of instance %s did not return valid"
2419
                          " size information, ignoring", idx, instance.name)
2420
          continue
2421
        size = size >> 20
2422
        if size != disk.size:
2423
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2424
                       " correcting: recorded %d, actual %d", idx,
2425
                       instance.name, disk.size, size)
2426
          disk.size = size
2427
          self.cfg.Update(instance, feedback_fn)
2428
          changed.append((instance.name, idx, size))
2429
        if self._EnsureChildSizes(disk):
2430
          self.cfg.Update(instance, feedback_fn)
2431
          changed.append((instance.name, idx, disk.size))
2432
    return changed
2433

    
2434

    
2435
class LURenameCluster(LogicalUnit):
2436
  """Rename the cluster.
2437

2438
  """
2439
  HPATH = "cluster-rename"
2440
  HTYPE = constants.HTYPE_CLUSTER
2441
  _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)]
2442

    
2443
  def BuildHooksEnv(self):
2444
    """Build hooks env.
2445

2446
    """
2447
    env = {
2448
      "OP_TARGET": self.cfg.GetClusterName(),
2449
      "NEW_NAME": self.op.name,
2450
      }
2451
    mn = self.cfg.GetMasterNode()
2452
    all_nodes = self.cfg.GetNodeList()
2453
    return env, [mn], all_nodes
2454

    
2455
  def CheckPrereq(self):
2456
    """Verify that the passed name is a valid one.
2457

2458
    """
2459
    hostname = netutils.GetHostname(name=self.op.name,
2460
                                    family=self.cfg.GetPrimaryIPFamily())
2461

    
2462
    new_name = hostname.name
2463
    self.ip = new_ip = hostname.ip
2464
    old_name = self.cfg.GetClusterName()
2465
    old_ip = self.cfg.GetMasterIP()
2466
    if new_name == old_name and new_ip == old_ip:
2467
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2468
                                 " cluster has changed",
2469
                                 errors.ECODE_INVAL)
2470
    if new_ip != old_ip:
2471
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2472
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2473
                                   " reachable on the network" %
2474
                                   new_ip, errors.ECODE_NOTUNIQUE)
2475

    
2476
    self.op.name = new_name
2477

    
2478
  def Exec(self, feedback_fn):
2479
    """Rename the cluster.
2480

2481
    """
2482
    clustername = self.op.name
2483
    ip = self.ip
2484

    
2485
    # shutdown the master IP
2486
    master = self.cfg.GetMasterNode()
2487
    result = self.rpc.call_node_stop_master(master, False)
2488
    result.Raise("Could not disable the master role")
2489

    
2490
    try:
2491
      cluster = self.cfg.GetClusterInfo()
2492
      cluster.cluster_name = clustername
2493
      cluster.master_ip = ip
2494
      self.cfg.Update(cluster, feedback_fn)
2495

    
2496
      # update the known hosts file
2497
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2498
      node_list = self.cfg.GetNodeList()
2499
      try:
2500
        node_list.remove(master)
2501
      except ValueError:
2502
        pass
2503
      result = self.rpc.call_upload_file(node_list,
2504
                                         constants.SSH_KNOWN_HOSTS_FILE)
2505
      for to_node, to_result in result.iteritems():
2506
        msg = to_result.fail_msg
2507
        if msg:
2508
          msg = ("Copy of file %s to node %s failed: %s" %
2509
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2510
          self.proc.LogWarning(msg)
2511

    
2512
    finally:
2513
      result = self.rpc.call_node_start_master(master, False, False)
2514
      msg = result.fail_msg
2515
      if msg:
2516
        self.LogWarning("Could not re-enable the master role on"
2517
                        " the master, please restart manually: %s", msg)
2518

    
2519
    return clustername
2520

    
2521

    
2522
class LUSetClusterParams(LogicalUnit):
2523
  """Change the parameters of the cluster.
2524

2525
  """
2526
  HPATH = "cluster-modify"
2527
  HTYPE = constants.HTYPE_CLUSTER
2528
  _OP_PARAMS = [
2529
    ("vg_name", None, ht.TMaybeString),
2530
    ("enabled_hypervisors", None,
2531
     ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2532
            ht.TNone)),
2533
    ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2534
                              ht.TNone)),
2535
    ("beparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2536
                              ht.TNone)),
2537
    ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2538
                            ht.TNone)),
2539
    ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2540
                              ht.TNone)),
2541
    ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2542
    ("uid_pool", None, ht.NoType),
2543
    ("add_uids", None, ht.NoType),
2544
    ("remove_uids", None, ht.NoType),
2545
    ("maintain_node_health", None, ht.TMaybeBool),
2546
    ("prealloc_wipe_disks", None, ht.TMaybeBool),
2547
    ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2548
    ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2549
    ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)),
2550
    ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2551
    ("hidden_os", None, ht.TOr(ht.TListOf(\
2552
          ht.TAnd(ht.TList,
2553
                ht.TIsLength(2),
2554
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2555
          ht.TNone)),
2556
    ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2557
          ht.TAnd(ht.TList,
2558
                ht.TIsLength(2),
2559
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2560
          ht.TNone)),
2561
    ]
2562
  REQ_BGL = False
2563

    
2564
  def CheckArguments(self):
2565
    """Check parameters
2566

2567
    """
2568
    if self.op.uid_pool:
2569
      uidpool.CheckUidPool(self.op.uid_pool)
2570

    
2571
    if self.op.add_uids:
2572
      uidpool.CheckUidPool(self.op.add_uids)
2573

    
2574
    if self.op.remove_uids:
2575
      uidpool.CheckUidPool(self.op.remove_uids)
2576

    
2577
  def ExpandNames(self):
2578
    # FIXME: in the future maybe other cluster params won't require checking on
2579
    # all nodes to be modified.
2580
    self.needed_locks = {
2581
      locking.LEVEL_NODE: locking.ALL_SET,
2582
    }
2583
    self.share_locks[locking.LEVEL_NODE] = 1
2584

    
2585
  def BuildHooksEnv(self):
2586
    """Build hooks env.
2587

2588
    """
2589
    env = {
2590
      "OP_TARGET": self.cfg.GetClusterName(),
2591
      "NEW_VG_NAME": self.op.vg_name,
2592
      }
2593
    mn = self.cfg.GetMasterNode()
2594
    return env, [mn], [mn]
2595

    
2596
  def CheckPrereq(self):
2597
    """Check prerequisites.
2598

2599
    This checks whether the given params don't conflict and
2600
    if the given volume group is valid.
2601

2602
    """
2603
    if self.op.vg_name is not None and not self.op.vg_name:
2604
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2605
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2606
                                   " instances exist", errors.ECODE_INVAL)
2607

    
2608
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2609
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2610
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2611
                                   " drbd-based instances exist",
2612
                                   errors.ECODE_INVAL)
2613

    
2614
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2615

    
2616
    # if vg_name not None, checks given volume group on all nodes
2617
    if self.op.vg_name:
2618
      vglist = self.rpc.call_vg_list(node_list)
2619
      for node in node_list:
2620
        msg = vglist[node].fail_msg
2621
        if msg:
2622
          # ignoring down node
2623
          self.LogWarning("Error while gathering data on node %s"
2624
                          " (ignoring node): %s", node, msg)
2625
          continue
2626
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2627
                                              self.op.vg_name,
2628
                                              constants.MIN_VG_SIZE)
2629
        if vgstatus:
2630
          raise errors.OpPrereqError("Error on node '%s': %s" %
2631
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2632

    
2633
    if self.op.drbd_helper:
2634
      # checks given drbd helper on all nodes
2635
      helpers = self.rpc.call_drbd_helper(node_list)
2636
      for node in node_list:
2637
        ninfo = self.cfg.GetNodeInfo(node)
2638
        if ninfo.offline:
2639
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2640
          continue
2641
        msg = helpers[node].fail_msg
2642
        if msg:
2643
          raise errors.OpPrereqError("Error checking drbd helper on node"
2644
                                     " '%s': %s" % (node, msg),
2645
                                     errors.ECODE_ENVIRON)
2646
        node_helper = helpers[node].payload
2647
        if node_helper != self.op.drbd_helper:
2648
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2649
                                     (node, node_helper), errors.ECODE_ENVIRON)
2650

    
2651
    self.cluster = cluster = self.cfg.GetClusterInfo()
2652
    # validate params changes
2653
    if self.op.beparams:
2654
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2655
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2656

    
2657
    if self.op.nicparams:
2658
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2659
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2660
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2661
      nic_errors = []
2662

    
2663
      # check all instances for consistency
2664
      for instance in self.cfg.GetAllInstancesInfo().values():
2665
        for nic_idx, nic in enumerate(instance.nics):
2666
          params_copy = copy.deepcopy(nic.nicparams)
2667
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2668

    
2669
          # check parameter syntax
2670
          try:
2671
            objects.NIC.CheckParameterSyntax(params_filled)
2672
          except errors.ConfigurationError, err:
2673
            nic_errors.append("Instance %s, nic/%d: %s" %
2674
                              (instance.name, nic_idx, err))
2675

    
2676
          # if we're moving instances to routed, check that they have an ip
2677
          target_mode = params_filled[constants.NIC_MODE]
2678
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2679
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2680
                              (instance.name, nic_idx))
2681
      if nic_errors:
2682
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2683
                                   "\n".join(nic_errors))
2684

    
2685
    # hypervisor list/parameters
2686
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2687
    if self.op.hvparams:
2688
      for hv_name, hv_dict in self.op.hvparams.items():
2689
        if hv_name not in self.new_hvparams:
2690
          self.new_hvparams[hv_name] = hv_dict
2691
        else:
2692
          self.new_hvparams[hv_name].update(hv_dict)
2693

    
2694
    # os hypervisor parameters
2695
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2696
    if self.op.os_hvp:
2697
      for os_name, hvs in self.op.os_hvp.items():
2698
        if os_name not in self.new_os_hvp:
2699
          self.new_os_hvp[os_name] = hvs
2700
        else:
2701
          for hv_name, hv_dict in hvs.items():
2702
            if hv_name not in self.new_os_hvp[os_name]:
2703
              self.new_os_hvp[os_name][hv_name] = hv_dict
2704
            else:
2705
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2706

    
2707
    # os parameters
2708
    self.new_osp = objects.FillDict(cluster.osparams, {})
2709
    if self.op.osparams:
2710
      for os_name, osp in self.op.osparams.items():
2711
        if os_name not in self.new_osp:
2712
          self.new_osp[os_name] = {}
2713

    
2714
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2715
                                                  use_none=True)
2716

    
2717
        if not self.new_osp[os_name]:
2718
          # we removed all parameters
2719
          del self.new_osp[os_name]
2720
        else:
2721
          # check the parameter validity (remote check)
2722
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2723
                         os_name, self.new_osp[os_name])
2724

    
2725
    # changes to the hypervisor list
2726
    if self.op.enabled_hypervisors is not None:
2727
      self.hv_list = self.op.enabled_hypervisors
2728
      for hv in self.hv_list:
2729
        # if the hypervisor doesn't already exist in the cluster
2730
        # hvparams, we initialize it to empty, and then (in both
2731
        # cases) we make sure to fill the defaults, as we might not
2732
        # have a complete defaults list if the hypervisor wasn't
2733
        # enabled before
2734
        if hv not in new_hvp:
2735
          new_hvp[hv] = {}
2736
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2737
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2738
    else:
2739
      self.hv_list = cluster.enabled_hypervisors
2740

    
2741
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2742
      # either the enabled list has changed, or the parameters have, validate
2743
      for hv_name, hv_params in self.new_hvparams.items():
2744
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2745
            (self.op.enabled_hypervisors and
2746
             hv_name in self.op.enabled_hypervisors)):
2747
          # either this is a new hypervisor, or its parameters have changed
2748
          hv_class = hypervisor.GetHypervisor(hv_name)
2749
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2750
          hv_class.CheckParameterSyntax(hv_params)
2751
          _CheckHVParams(self, node_list, hv_name, hv_params)
2752

    
2753
    if self.op.os_hvp:
2754
      # no need to check any newly-enabled hypervisors, since the
2755
      # defaults have already been checked in the above code-block
2756
      for os_name, os_hvp in self.new_os_hvp.items():
2757
        for hv_name, hv_params in os_hvp.items():
2758
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2759
          # we need to fill in the new os_hvp on top of the actual hv_p
2760
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2761
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2762
          hv_class = hypervisor.GetHypervisor(hv_name)
2763
          hv_class.CheckParameterSyntax(new_osp)
2764
          _CheckHVParams(self, node_list, hv_name, new_osp)
2765

    
2766
    if self.op.default_iallocator:
2767
      alloc_script = utils.FindFile(self.op.default_iallocator,
2768
                                    constants.IALLOCATOR_SEARCH_PATH,
2769
                                    os.path.isfile)
2770
      if alloc_script is None:
2771
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2772
                                   " specified" % self.op.default_iallocator,
2773
                                   errors.ECODE_INVAL)
2774

    
2775
  def Exec(self, feedback_fn):
2776
    """Change the parameters of the cluster.
2777

2778
    """
2779
    if self.op.vg_name is not None:
2780
      new_volume = self.op.vg_name
2781
      if not new_volume:
2782
        new_volume = None
2783
      if new_volume != self.cfg.GetVGName():
2784
        self.cfg.SetVGName(new_volume)
2785
      else:
2786
        feedback_fn("Cluster LVM configuration already in desired"
2787
                    " state, not changing")
2788
    if self.op.drbd_helper is not None:
2789
      new_helper = self.op.drbd_helper
2790
      if not new_helper:
2791
        new_helper = None
2792
      if new_helper != self.cfg.GetDRBDHelper():
2793
        self.cfg.SetDRBDHelper(new_helper)
2794
      else:
2795
        feedback_fn("Cluster DRBD helper already in desired state,"
2796
                    " not changing")
2797
    if self.op.hvparams:
2798
      self.cluster.hvparams = self.new_hvparams
2799
    if self.op.os_hvp:
2800
      self.cluster.os_hvp = self.new_os_hvp
2801
    if self.op.enabled_hypervisors is not None:
2802
      self.cluster.hvparams = self.new_hvparams
2803
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2804
    if self.op.beparams:
2805
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2806
    if self.op.nicparams:
2807
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2808
    if self.op.osparams:
2809
      self.cluster.osparams = self.new_osp
2810

    
2811
    if self.op.candidate_pool_size is not None:
2812
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2813
      # we need to update the pool size here, otherwise the save will fail
2814
      _AdjustCandidatePool(self, [])
2815

    
2816
    if self.op.maintain_node_health is not None:
2817
      self.cluster.maintain_node_health = self.op.maintain_node_health
2818

    
2819
    if self.op.prealloc_wipe_disks is not None:
2820
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2821

    
2822
    if self.op.add_uids is not None:
2823
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2824

    
2825
    if self.op.remove_uids is not None:
2826
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2827

    
2828
    if self.op.uid_pool is not None:
2829
      self.cluster.uid_pool = self.op.uid_pool
2830

    
2831
    if self.op.default_iallocator is not None:
2832
      self.cluster.default_iallocator = self.op.default_iallocator
2833

    
2834
    if self.op.reserved_lvs is not None:
2835
      self.cluster.reserved_lvs = self.op.reserved_lvs
2836

    
2837
    def helper_os(aname, mods, desc):
2838
      desc += " OS list"
2839
      lst = getattr(self.cluster, aname)
2840
      for key, val in mods:
2841
        if key == constants.DDM_ADD:
2842
          if val in lst:
2843
            feedback_fn("OS %s already in %s, ignoring", val, desc)
2844
          else:
2845
            lst.append(val)
2846
        elif key == constants.DDM_REMOVE:
2847
          if val in lst:
2848
            lst.remove(val)
2849
          else:
2850
            feedback_fn("OS %s not found in %s, ignoring", val, desc)
2851
        else:
2852
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
2853

    
2854
    if self.op.hidden_os:
2855
      helper_os("hidden_os", self.op.hidden_os, "hidden")
2856

    
2857
    if self.op.blacklisted_os:
2858
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2859

    
2860
    self.cfg.Update(self.cluster, feedback_fn)
2861

    
2862

    
2863
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2864
  """Distribute additional files which are part of the cluster configuration.
2865

2866
  ConfigWriter takes care of distributing the config and ssconf files, but
2867
  there are more files which should be distributed to all nodes. This function
2868
  makes sure those are copied.
2869

2870
  @param lu: calling logical unit
2871
  @param additional_nodes: list of nodes not in the config to distribute to
2872

2873
  """
2874
  # 1. Gather target nodes
2875
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2876
  dist_nodes = lu.cfg.GetOnlineNodeList()
2877
  if additional_nodes is not None:
2878
    dist_nodes.extend(additional_nodes)
2879
  if myself.name in dist_nodes:
2880
    dist_nodes.remove(myself.name)
2881

    
2882
  # 2. Gather files to distribute
2883
  dist_files = set([constants.ETC_HOSTS,
2884
                    constants.SSH_KNOWN_HOSTS_FILE,
2885
                    constants.RAPI_CERT_FILE,
2886
                    constants.RAPI_USERS_FILE,
2887
                    constants.CONFD_HMAC_KEY,
2888
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2889
                   ])
2890

    
2891
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2892
  for hv_name in enabled_hypervisors:
2893
    hv_class = hypervisor.GetHypervisor(hv_name)
2894
    dist_files.update(hv_class.GetAncillaryFiles())
2895

    
2896
  # 3. Perform the files upload
2897
  for fname in dist_files:
2898
    if os.path.exists(fname):
2899
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2900
      for to_node, to_result in result.items():
2901
        msg = to_result.fail_msg
2902
        if msg:
2903
          msg = ("Copy of file %s to node %s failed: %s" %
2904
                 (fname, to_node, msg))
2905
          lu.proc.LogWarning(msg)
2906

    
2907

    
2908
class LURedistributeConfig(NoHooksLU):
2909
  """Force the redistribution of cluster configuration.
2910

2911
  This is a very simple LU.
2912

2913
  """
2914
  REQ_BGL = False
2915

    
2916
  def ExpandNames(self):
2917
    self.needed_locks = {
2918
      locking.LEVEL_NODE: locking.ALL_SET,
2919
    }
2920
    self.share_locks[locking.LEVEL_NODE] = 1
2921

    
2922
  def Exec(self, feedback_fn):
2923
    """Redistribute the configuration.
2924

2925
    """
2926
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2927
    _RedistributeAncillaryFiles(self)
2928

    
2929

    
2930
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2931
  """Sleep and poll for an instance's disk to sync.
2932

2933
  """
2934
  if not instance.disks or disks is not None and not disks:
2935
    return True
2936

    
2937
  disks = _ExpandCheckDisks(instance, disks)
2938

    
2939
  if not oneshot:
2940
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2941

    
2942
  node = instance.primary_node
2943

    
2944
  for dev in disks:
2945
    lu.cfg.SetDiskID(dev, node)
2946

    
2947
  # TODO: Convert to utils.Retry
2948

    
2949
  retries = 0
2950
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2951
  while True:
2952
    max_time = 0
2953
    done = True
2954
    cumul_degraded = False
2955
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2956
    msg = rstats.fail_msg
2957
    if msg:
2958
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2959
      retries += 1
2960
      if retries >= 10:
2961
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2962
                                 " aborting." % node)
2963
      time.sleep(6)
2964
      continue
2965
    rstats = rstats.payload
2966
    retries = 0
2967
    for i, mstat in enumerate(rstats):
2968
      if mstat is None:
2969
        lu.LogWarning("Can't compute data for node %s/%s",
2970
                           node, disks[i].iv_name)
2971
        continue
2972

    
2973
      cumul_degraded = (cumul_degraded or
2974
                        (mstat.is_degraded and mstat.sync_percent is None))
2975
      if mstat.sync_percent is not None:
2976
        done = False
2977
        if mstat.estimated_time is not None:
2978
          rem_time = ("%s remaining (estimated)" %
2979
                      utils.FormatSeconds(mstat.estimated_time))
2980
          max_time = mstat.estimated_time
2981
        else:
2982
          rem_time = "no time estimate"
2983
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2984
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
2985

    
2986
    # if we're done but degraded, let's do a few small retries, to
2987
    # make sure we see a stable and not transient situation; therefore
2988
    # we force restart of the loop
2989
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2990
      logging.info("Degraded disks found, %d retries left", degr_retries)
2991
      degr_retries -= 1
2992
      time.sleep(1)
2993
      continue
2994

    
2995
    if done or oneshot:
2996
      break
2997

    
2998
    time.sleep(min(60, max_time))
2999

    
3000
  if done:
3001
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3002
  return not cumul_degraded
3003

    
3004

    
3005
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3006
  """Check that mirrors are not degraded.
3007

3008
  The ldisk parameter, if True, will change the test from the
3009
  is_degraded attribute (which represents overall non-ok status for
3010
  the device(s)) to the ldisk (representing the local storage status).
3011

3012
  """
3013
  lu.cfg.SetDiskID(dev, node)
3014

    
3015
  result = True
3016

    
3017
  if on_primary or dev.AssembleOnSecondary():
3018
    rstats = lu.rpc.call_blockdev_find(node, dev)
3019
    msg = rstats.fail_msg
3020
    if msg:
3021
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3022
      result = False
3023
    elif not rstats.payload:
3024
      lu.LogWarning("Can't find disk on node %s", node)
3025
      result = False
3026
    else:
3027
      if ldisk:
3028
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3029
      else:
3030
        result = result and not rstats.payload.is_degraded
3031

    
3032
  if dev.children:
3033
    for child in dev.children:
3034
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3035

    
3036
  return result
3037

    
3038

    
3039
class LUDiagnoseOS(NoHooksLU):
3040
  """Logical unit for OS diagnose/query.
3041

3042
  """
3043
  _OP_PARAMS = [
3044
    _POutputFields,
3045
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3046
    ]
3047
  REQ_BGL = False
3048
  _HID = "hidden"
3049
  _BLK = "blacklisted"
3050
  _VLD = "valid"
3051
  _FIELDS_STATIC = utils.FieldSet()
3052
  _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3053
                                   "parameters", "api_versions", _HID, _BLK)
3054

    
3055
  def CheckArguments(self):
3056
    if self.op.names:
3057
      raise errors.OpPrereqError("Selective OS query not supported",
3058
                                 errors.ECODE_INVAL)
3059

    
3060
    _CheckOutputFields(static=self._FIELDS_STATIC,
3061
                       dynamic=self._FIELDS_DYNAMIC,
3062
                       selected=self.op.output_fields)
3063

    
3064
  def ExpandNames(self):
3065
    # Lock all nodes, in shared mode
3066
    # Temporary removal of locks, should be reverted later
3067
    # TODO: reintroduce locks when they are lighter-weight
3068
    self.needed_locks = {}
3069
    #self.share_locks[locking.LEVEL_NODE] = 1
3070
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3071

    
3072
  @staticmethod
3073
  def _DiagnoseByOS(rlist):
3074
    """Remaps a per-node return list into an a per-os per-node dictionary
3075

3076
    @param rlist: a map with node names as keys and OS objects as values
3077

3078
    @rtype: dict
3079
    @return: a dictionary with osnames as keys and as value another
3080
        map, with nodes as keys and tuples of (path, status, diagnose,
3081
        variants, parameters, api_versions) as values, eg::
3082

3083
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3084
                                     (/srv/..., False, "invalid api")],
3085
                           "node2": [(/srv/..., True, "", [], [])]}
3086
          }
3087

3088
    """
3089
    all_os = {}
3090
    # we build here the list of nodes that didn't fail the RPC (at RPC
3091
    # level), so that nodes with a non-responding node daemon don't
3092
    # make all OSes invalid
3093
    good_nodes = [node_name for node_name in rlist
3094
                  if not rlist[node_name].fail_msg]
3095
    for node_name, nr in rlist.items():
3096
      if nr.fail_msg or not nr.payload:
3097
        continue
3098
      for (name, path, status, diagnose, variants,
3099
           params, api_versions) in nr.payload:
3100
        if name not in all_os:
3101
          # build a list of nodes for this os containing empty lists
3102
          # for each node in node_list
3103
          all_os[name] = {}
3104
          for nname in good_nodes:
3105
            all_os[name][nname] = []
3106
        # convert params from [name, help] to (name, help)
3107
        params = [tuple(v) for v in params]
3108
        all_os[name][node_name].append((path, status, diagnose,
3109
                                        variants, params, api_versions))
3110
    return all_os
3111

    
3112
  def Exec(self, feedback_fn):
3113
    """Compute the list of OSes.
3114

3115
    """
3116
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3117
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3118
    pol = self._DiagnoseByOS(node_data)
3119
    output = []
3120
    cluster = self.cfg.GetClusterInfo()
3121

    
3122
    for os_name in utils.NiceSort(pol.keys()):
3123
      os_data = pol[os_name]
3124
      row = []
3125
      valid = True
3126
      (variants, params, api_versions) = null_state = (set(), set(), set())
3127
      for idx, osl in enumerate(os_data.values()):
3128
        valid = bool(valid and osl and osl[0][1])
3129
        if not valid:
3130
          (variants, params, api_versions) = null_state
3131
          break
3132
        node_variants, node_params, node_api = osl[0][3:6]
3133
        if idx == 0: # first entry
3134
          variants = set(node_variants)
3135
          params = set(node_params)
3136
          api_versions = set(node_api)
3137
        else: # keep consistency
3138
          variants.intersection_update(node_variants)
3139
          params.intersection_update(node_params)
3140
          api_versions.intersection_update(node_api)
3141

    
3142
      is_hid = os_name in cluster.hidden_os
3143
      is_blk = os_name in cluster.blacklisted_os
3144
      if ((self._HID not in self.op.output_fields and is_hid) or
3145
          (self._BLK not in self.op.output_fields and is_blk) or
3146
          (self._VLD not in self.op.output_fields and not valid)):
3147
        continue
3148

    
3149
      for field in self.op.output_fields:
3150
        if field == "name":
3151
          val = os_name
3152
        elif field == self._VLD:
3153
          val = valid
3154
        elif field == "node_status":
3155
          # this is just a copy of the dict
3156
          val = {}
3157
          for node_name, nos_list in os_data.items():
3158
            val[node_name] = nos_list
3159
        elif field == "variants":
3160
          val = utils.NiceSort(list(variants))
3161
        elif field == "parameters":
3162
          val = list(params)
3163
        elif field == "api_versions":
3164
          val = list(api_versions)
3165
        elif field == self._HID:
3166
          val = is_hid
3167
        elif field == self._BLK:
3168
          val = is_blk
3169
        else:
3170
          raise errors.ParameterError(field)
3171
        row.append(val)
3172
      output.append(row)
3173

    
3174
    return output
3175

    
3176

    
3177
class LURemoveNode(LogicalUnit):
3178
  """Logical unit for removing a node.
3179

3180
  """
3181
  HPATH = "node-remove"
3182
  HTYPE = constants.HTYPE_NODE
3183
  _OP_PARAMS = [
3184
    _PNodeName,
3185
    ]
3186

    
3187
  def BuildHooksEnv(self):
3188
    """Build hooks env.
3189

3190
    This doesn't run on the target node in the pre phase as a failed
3191
    node would then be impossible to remove.
3192

3193
    """
3194
    env = {
3195
      "OP_TARGET": self.op.node_name,
3196
      "NODE_NAME": self.op.node_name,
3197
      }
3198
    all_nodes = self.cfg.GetNodeList()
3199
    try:
3200
      all_nodes.remove(self.op.node_name)
3201
    except ValueError:
3202
      logging.warning("Node %s which is about to be removed not found"
3203
                      " in the all nodes list", self.op.node_name)
3204
    return env, all_nodes, all_nodes
3205

    
3206
  def CheckPrereq(self):
3207
    """Check prerequisites.
3208

3209
    This checks:
3210
     - the node exists in the configuration
3211
     - it does not have primary or secondary instances
3212
     - it's not the master
3213

3214
    Any errors are signaled by raising errors.OpPrereqError.
3215

3216
    """
3217
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3218
    node = self.cfg.GetNodeInfo(self.op.node_name)
3219
    assert node is not None
3220

    
3221
    instance_list = self.cfg.GetInstanceList()
3222

    
3223
    masternode = self.cfg.GetMasterNode()
3224
    if node.name == masternode:
3225
      raise errors.OpPrereqError("Node is the master node,"
3226
                                 " you need to failover first.",
3227
                                 errors.ECODE_INVAL)
3228

    
3229
    for instance_name in instance_list:
3230
      instance = self.cfg.GetInstanceInfo(instance_name)
3231
      if node.name in instance.all_nodes:
3232
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3233
                                   " please remove first." % instance_name,
3234
                                   errors.ECODE_INVAL)
3235
    self.op.node_name = node.name
3236
    self.node = node
3237

    
3238
  def Exec(self, feedback_fn):
3239
    """Removes the node from the cluster.
3240

3241
    """
3242
    node = self.node
3243
    logging.info("Stopping the node daemon and removing configs from node %s",
3244
                 node.name)
3245

    
3246
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3247

    
3248
    # Promote nodes to master candidate as needed
3249
    _AdjustCandidatePool(self, exceptions=[node.name])
3250
    self.context.RemoveNode(node.name)
3251

    
3252
    # Run post hooks on the node before it's removed
3253
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3254
    try:
3255
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3256
    except:
3257
      # pylint: disable-msg=W0702
3258
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3259

    
3260
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3261
    msg = result.fail_msg
3262
    if msg:
3263
      self.LogWarning("Errors encountered on the remote node while leaving"
3264
                      " the cluster: %s", msg)
3265

    
3266
    # Remove node from our /etc/hosts
3267
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3268
      master_node = self.cfg.GetMasterNode()
3269
      result = self.rpc.call_etc_hosts_modify(master_node,
3270
                                              constants.ETC_HOSTS_REMOVE,
3271
                                              node.name, None)
3272
      result.Raise("Can't update hosts file with new host data")
3273
      _RedistributeAncillaryFiles(self)
3274

    
3275

    
3276
class LUQueryNodes(NoHooksLU):
3277
  """Logical unit for querying nodes.
3278

3279
  """
3280
  # pylint: disable-msg=W0142
3281
  _OP_PARAMS = [
3282
    _POutputFields,
3283
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3284
    ("use_locking", False, ht.TBool),
3285
    ]
3286
  REQ_BGL = False
3287

    
3288
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3289
                    "master_candidate", "offline", "drained",
3290
                    "master_capable", "vm_capable"]
3291

    
3292
  _FIELDS_DYNAMIC = utils.FieldSet(
3293
    "dtotal", "dfree",
3294
    "mtotal", "mnode", "mfree",
3295
    "bootid",
3296
    "ctotal", "cnodes", "csockets",
3297
    )
3298

    
3299
  _FIELDS_STATIC = utils.FieldSet(*[
3300
    "pinst_cnt", "sinst_cnt",
3301
    "pinst_list", "sinst_list",
3302
    "pip", "sip", "tags",
3303
    "master",
3304
    "role"] + _SIMPLE_FIELDS
3305
    )
3306

    
3307
  def CheckArguments(self):
3308
    _CheckOutputFields(static=self._FIELDS_STATIC,
3309
                       dynamic=self._FIELDS_DYNAMIC,
3310
                       selected=self.op.output_fields)
3311

    
3312
  def ExpandNames(self):
3313
    self.needed_locks = {}
3314
    self.share_locks[locking.LEVEL_NODE] = 1
3315

    
3316
    if self.op.names:
3317
      self.wanted = _GetWantedNodes(self, self.op.names)
3318
    else:
3319
      self.wanted = locking.ALL_SET
3320

    
3321
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3322
    self.do_locking = self.do_node_query and self.op.use_locking
3323
    if self.do_locking:
3324
      # if we don't request only static fields, we need to lock the nodes
3325
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
3326

    
3327
  def Exec(self, feedback_fn):
3328
    """Computes the list of nodes and their attributes.
3329

3330
    """
3331
    all_info = self.cfg.GetAllNodesInfo()
3332
    if self.do_locking:
3333
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
3334
    elif self.wanted != locking.ALL_SET:
3335
      nodenames = self.wanted
3336
      missing = set(nodenames).difference(all_info.keys())
3337
      if missing:
3338
        raise errors.OpExecError(
3339
          "Some nodes were removed before retrieving their data: %s" % missing)
3340
    else:
3341
      nodenames = all_info.keys()
3342

    
3343
    nodenames = utils.NiceSort(nodenames)
3344
    nodelist = [all_info[name] for name in nodenames]
3345

    
3346
    # begin data gathering
3347

    
3348
    if self.do_node_query:
3349
      live_data = {}
3350
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3351
                                          self.cfg.GetHypervisorType())
3352
      for name in nodenames:
3353
        nodeinfo = node_data[name]
3354
        if not nodeinfo.fail_msg and nodeinfo.payload:
3355
          nodeinfo = nodeinfo.payload
3356
          fn = utils.TryConvert
3357
          live_data[name] = {
3358
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3359
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3360
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
3361
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3362
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3363
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3364
            "bootid": nodeinfo.get('bootid', None),
3365
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3366
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3367
            }
3368
        else:
3369
          live_data[name] = {}
3370
    else:
3371
      live_data = dict.fromkeys(nodenames, {})
3372

    
3373
    node_to_primary = dict([(name, set()) for name in nodenames])
3374
    node_to_secondary = dict([(name, set()) for name in nodenames])
3375

    
3376
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3377
                             "sinst_cnt", "sinst_list"))
3378
    if inst_fields & frozenset(self.op.output_fields):
3379
      inst_data = self.cfg.GetAllInstancesInfo()
3380

    
3381
      for inst in inst_data.values():
3382
        if inst.primary_node in node_to_primary:
3383
          node_to_primary[inst.primary_node].add(inst.name)
3384
        for secnode in inst.secondary_nodes:
3385
          if secnode in node_to_secondary:
3386
            node_to_secondary[secnode].add(inst.name)
3387

    
3388
    master_node = self.cfg.GetMasterNode()
3389

    
3390
    # end data gathering
3391

    
3392
    output = []
3393
    for node in nodelist:
3394
      node_output = []
3395
      for field in self.op.output_fields:
3396
        if field in self._SIMPLE_FIELDS:
3397
          val = getattr(node, field)
3398
        elif field == "pinst_list":
3399
          val = list(node_to_primary[node.name])
3400
        elif field == "sinst_list":
3401
          val = list(node_to_secondary[node.name])
3402
        elif field == "pinst_cnt":
3403
          val = len(node_to_primary[node.name])
3404
        elif field == "sinst_cnt":
3405
          val = len(node_to_secondary[node.name])
3406
        elif field == "pip":
3407
          val = node.primary_ip
3408
        elif field == "sip":
3409
          val = node.secondary_ip
3410
        elif field == "tags":
3411
          val = list(node.GetTags())
3412
        elif field == "master":
3413
          val = node.name == master_node
3414
        elif self._FIELDS_DYNAMIC.Matches(field):
3415
          val = live_data[node.name].get(field, None)
3416
        elif field == "role":
3417
          if node.name == master_node:
3418
            val = "M"
3419
          elif node.master_candidate:
3420
            val = "C"
3421
          elif node.drained:
3422
            val = "D"
3423
          elif node.offline:
3424
            val = "O"
3425
          else:
3426
            val = "R"
3427
        else:
3428
          raise errors.ParameterError(field)
3429
        node_output.append(val)
3430
      output.append(node_output)
3431

    
3432
    return output
3433

    
3434

    
3435
class LUQueryNodeVolumes(NoHooksLU):
3436
  """Logical unit for getting volumes on node(s).
3437

3438
  """
3439
  _OP_PARAMS = [
3440
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3441
    ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3442
    ]
3443
  REQ_BGL = False
3444
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3445
  _FIELDS_STATIC = utils.FieldSet("node")
3446

    
3447
  def CheckArguments(self):
3448
    _CheckOutputFields(static=self._FIELDS_STATIC,
3449
                       dynamic=self._FIELDS_DYNAMIC,
3450
                       selected=self.op.output_fields)
3451

    
3452
  def ExpandNames(self):
3453
    self.needed_locks = {}
3454
    self.share_locks[locking.LEVEL_NODE] = 1
3455
    if not self.op.nodes:
3456
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3457
    else:
3458
      self.needed_locks[locking.LEVEL_NODE] = \
3459
        _GetWantedNodes(self, self.op.nodes)
3460

    
3461
  def Exec(self, feedback_fn):
3462
    """Computes the list of nodes and their attributes.
3463

3464
    """
3465
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3466
    volumes = self.rpc.call_node_volumes(nodenames)
3467

    
3468
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3469
             in self.cfg.GetInstanceList()]
3470

    
3471
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3472

    
3473
    output = []
3474
    for node in nodenames:
3475
      nresult = volumes[node]
3476
      if nresult.offline:
3477
        continue
3478
      msg = nresult.fail_msg
3479
      if msg:
3480
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3481
        continue
3482

    
3483
      node_vols = nresult.payload[:]
3484
      node_vols.sort(key=lambda vol: vol['dev'])
3485

    
3486
      for vol in node_vols:
3487
        node_output = []
3488
        for field in self.op.output_fields:
3489
          if field == "node":
3490
            val = node
3491
          elif field == "phys":
3492
            val = vol['dev']
3493
          elif field == "vg":
3494
            val = vol['vg']
3495
          elif field == "name":
3496
            val = vol['name']
3497
          elif field == "size":
3498
            val = int(float(vol['size']))
3499
          elif field == "instance":
3500
            for inst in ilist:
3501
              if node not in lv_by_node[inst]:
3502
                continue
3503
              if vol['name'] in lv_by_node[inst][node]:
3504
                val = inst.name
3505
                break
3506
            else:
3507
              val = '-'
3508
          else:
3509
            raise errors.ParameterError(field)
3510
          node_output.append(str(val))
3511

    
3512
        output.append(node_output)
3513

    
3514
    return output
3515

    
3516

    
3517
class LUQueryNodeStorage(NoHooksLU):
3518
  """Logical unit for getting information on storage units on node(s).
3519

3520
  """
3521
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3522
  _OP_PARAMS = [
3523
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3524
    ("storage_type", ht.NoDefault, _CheckStorageType),
3525
    ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3526
    ("name", None, ht.TMaybeString),
3527
    ]
3528
  REQ_BGL = False
3529

    
3530
  def CheckArguments(self):
3531
    _CheckOutputFields(static=self._FIELDS_STATIC,
3532
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3533
                       selected=self.op.output_fields)
3534

    
3535
  def ExpandNames(self):
3536
    self.needed_locks = {}
3537
    self.share_locks[locking.LEVEL_NODE] = 1
3538

    
3539
    if self.op.nodes:
3540
      self.needed_locks[locking.LEVEL_NODE] = \
3541
        _GetWantedNodes(self, self.op.nodes)
3542
    else:
3543
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3544

    
3545
  def Exec(self, feedback_fn):
3546
    """Computes the list of nodes and their attributes.
3547

3548
    """
3549
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3550

    
3551
    # Always get name to sort by
3552
    if constants.SF_NAME in self.op.output_fields:
3553
      fields = self.op.output_fields[:]
3554
    else:
3555
      fields = [constants.SF_NAME] + self.op.output_fields
3556

    
3557
    # Never ask for node or type as it's only known to the LU
3558
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3559
      while extra in fields:
3560
        fields.remove(extra)
3561

    
3562
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3563
    name_idx = field_idx[constants.SF_NAME]
3564

    
3565
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3566
    data = self.rpc.call_storage_list(self.nodes,
3567
                                      self.op.storage_type, st_args,
3568
                                      self.op.name, fields)
3569

    
3570
    result = []
3571

    
3572
    for node in utils.NiceSort(self.nodes):
3573
      nresult = data[node]
3574
      if nresult.offline:
3575
        continue
3576

    
3577
      msg = nresult.fail_msg
3578
      if msg:
3579
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3580
        continue
3581

    
3582
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3583

    
3584
      for name in utils.NiceSort(rows.keys()):
3585
        row = rows[name]
3586

    
3587
        out = []
3588

    
3589
        for field in self.op.output_fields:
3590
          if field == constants.SF_NODE:
3591
            val = node
3592
          elif field == constants.SF_TYPE:
3593
            val = self.op.storage_type
3594
          elif field in field_idx:
3595
            val = row[field_idx[field]]
3596
          else:
3597
            raise errors.ParameterError(field)
3598

    
3599
          out.append(val)
3600

    
3601
        result.append(out)
3602

    
3603
    return result
3604

    
3605

    
3606
class LUModifyNodeStorage(NoHooksLU):
3607
  """Logical unit for modifying a storage volume on a node.
3608

3609
  """
3610
  _OP_PARAMS = [
3611
    _PNodeName,
3612
    ("storage_type", ht.NoDefault, _CheckStorageType),
3613
    ("name", ht.NoDefault, ht.TNonEmptyString),
3614
    ("changes", ht.NoDefault, ht.TDict),
3615
    ]
3616
  REQ_BGL = False
3617

    
3618
  def CheckArguments(self):
3619
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3620

    
3621
    storage_type = self.op.storage_type
3622

    
3623
    try:
3624
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3625
    except KeyError:
3626
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3627
                                 " modified" % storage_type,
3628
                                 errors.ECODE_INVAL)
3629

    
3630
    diff = set(self.op.changes.keys()) - modifiable
3631
    if diff:
3632
      raise errors.OpPrereqError("The following fields can not be modified for"
3633
                                 " storage units of type '%s': %r" %
3634
                                 (storage_type, list(diff)),
3635
                                 errors.ECODE_INVAL)
3636

    
3637
  def ExpandNames(self):
3638
    self.needed_locks = {
3639
      locking.LEVEL_NODE: self.op.node_name,
3640
      }
3641

    
3642
  def Exec(self, feedback_fn):
3643
    """Computes the list of nodes and their attributes.
3644

3645
    """
3646
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3647
    result = self.rpc.call_storage_modify(self.op.node_name,
3648
                                          self.op.storage_type, st_args,
3649
                                          self.op.name, self.op.changes)
3650
    result.Raise("Failed to modify storage unit '%s' on %s" %
3651
                 (self.op.name, self.op.node_name))
3652

    
3653

    
3654
class LUAddNode(LogicalUnit):
3655
  """Logical unit for adding node to the cluster.
3656

3657
  """
3658
  HPATH = "node-add"
3659
  HTYPE = constants.HTYPE_NODE
3660
  _OP_PARAMS = [
3661
    _PNodeName,
3662
    ("primary_ip", None, ht.NoType),
3663
    ("secondary_ip", None, ht.TMaybeString),
3664
    ("readd", False, ht.TBool),
3665
    ("group", None, ht.TMaybeString)
3666
    ]
3667

    
3668
  def CheckArguments(self):
3669
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3670
    # validate/normalize the node name
3671
    self.hostname = netutils.GetHostname(name=self.op.node_name,
3672
                                         family=self.primary_ip_family)
3673
    self.op.node_name = self.hostname.name
3674
    if self.op.readd and self.op.group:
3675
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
3676
                                 " being readded", errors.ECODE_INVAL)
3677

    
3678
  def BuildHooksEnv(self):
3679
    """Build hooks env.
3680

3681
    This will run on all nodes before, and on all nodes + the new node after.
3682

3683
    """
3684
    env = {
3685
      "OP_TARGET": self.op.node_name,
3686
      "NODE_NAME": self.op.node_name,
3687
      "NODE_PIP": self.op.primary_ip,
3688
      "NODE_SIP": self.op.secondary_ip,
3689
      }
3690
    nodes_0 = self.cfg.GetNodeList()
3691
    nodes_1 = nodes_0 + [self.op.node_name, ]
3692
    return env, nodes_0, nodes_1
3693

    
3694
  def CheckPrereq(self):
3695
    """Check prerequisites.
3696

3697
    This checks:
3698
     - the new node is not already in the config
3699
     - it is resolvable
3700
     - its parameters (single/dual homed) matches the cluster
3701

3702
    Any errors are signaled by raising errors.OpPrereqError.
3703

3704
    """
3705
    cfg = self.cfg
3706
    hostname = self.hostname
3707
    node = hostname.name
3708
    primary_ip = self.op.primary_ip = hostname.ip
3709
    if self.op.secondary_ip is None:
3710
      if self.primary_ip_family == netutils.IP6Address.family:
3711
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3712
                                   " IPv4 address must be given as secondary",
3713
                                   errors.ECODE_INVAL)
3714
      self.op.secondary_ip = primary_ip
3715

    
3716
    secondary_ip = self.op.secondary_ip
3717
    if not netutils.IP4Address.IsValid(secondary_ip):
3718
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3719
                                 " address" % secondary_ip, errors.ECODE_INVAL)
3720

    
3721
    node_list = cfg.GetNodeList()
3722
    if not self.op.readd and node in node_list:
3723
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3724
                                 node, errors.ECODE_EXISTS)
3725
    elif self.op.readd and node not in node_list:
3726
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3727
                                 errors.ECODE_NOENT)
3728

    
3729
    self.changed_primary_ip = False
3730

    
3731
    for existing_node_name in node_list:
3732
      existing_node = cfg.GetNodeInfo(existing_node_name)
3733

    
3734
      if self.op.readd and node == existing_node_name:
3735
        if existing_node.secondary_ip != secondary_ip:
3736
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3737
                                     " address configuration as before",
3738
                                     errors.ECODE_INVAL)
3739
        if existing_node.primary_ip != primary_ip:
3740
          self.changed_primary_ip = True
3741

    
3742
        continue
3743

    
3744
      if (existing_node.primary_ip == primary_ip or
3745
          existing_node.secondary_ip == primary_ip or
3746
          existing_node.primary_ip == secondary_ip or
3747
          existing_node.secondary_ip == secondary_ip):
3748
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3749
                                   " existing node %s" % existing_node.name,
3750
                                   errors.ECODE_NOTUNIQUE)
3751

    
3752
    # check that the type of the node (single versus dual homed) is the
3753
    # same as for the master
3754
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3755
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3756
    newbie_singlehomed = secondary_ip == primary_ip
3757
    if master_singlehomed != newbie_singlehomed:
3758
      if master_singlehomed:
3759
        raise errors.OpPrereqError("The master has no private ip but the"
3760
                                   " new node has one",
3761
                                   errors.ECODE_INVAL)
3762
      else:
3763
        raise errors.OpPrereqError("The master has a private ip but the"
3764
                                   " new node doesn't have one",
3765
                                   errors.ECODE_INVAL)
3766

    
3767
    # checks reachability
3768
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3769
      raise errors.OpPrereqError("Node not reachable by ping",
3770
                                 errors.ECODE_ENVIRON)
3771

    
3772
    if not newbie_singlehomed:
3773
      # check reachability from my secondary ip to newbie's secondary ip
3774
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3775
                           source=myself.secondary_ip):
3776
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3777
                                   " based ping to noded port",
3778
                                   errors.ECODE_ENVIRON)
3779

    
3780
    if self.op.readd:
3781
      exceptions = [node]
3782
    else:
3783
      exceptions = []
3784

    
3785
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3786

    
3787
    if self.op.readd:
3788
      self.new_node = self.cfg.GetNodeInfo(node)
3789
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3790
    else:
3791
      node_group = cfg.LookupNodeGroup(self.op.group)
3792
      self.new_node = objects.Node(name=node,
3793
                                   primary_ip=primary_ip,
3794
                                   secondary_ip=secondary_ip,
3795
                                   master_candidate=self.master_candidate,
3796
                                   master_capable=True,
3797
                                   vm_capable=True,
3798
                                   offline=False, drained=False,
3799
                                   group=node_group)
3800

    
3801
  def Exec(self, feedback_fn):
3802
    """Adds the new node to the cluster.
3803

3804
    """
3805
    new_node = self.new_node
3806
    node = new_node.name
3807

    
3808
    # for re-adds, reset the offline/drained/master-candidate flags;
3809
    # we need to reset here, otherwise offline would prevent RPC calls
3810
    # later in the procedure; this also means that if the re-add
3811
    # fails, we are left with a non-offlined, broken node
3812
    if self.op.readd:
3813
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3814
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3815
      # if we demote the node, we do cleanup later in the procedure
3816
      new_node.master_candidate = self.master_candidate
3817
      if self.changed_primary_ip:
3818
        new_node.primary_ip = self.op.primary_ip
3819

    
3820
    # notify the user about any possible mc promotion
3821
    if new_node.master_candidate:
3822
      self.LogInfo("Node will be a master candidate")
3823

    
3824
    # check connectivity
3825
    result = self.rpc.call_version([node])[node]
3826
    result.Raise("Can't get version information from node %s" % node)
3827
    if constants.PROTOCOL_VERSION == result.payload:
3828
      logging.info("Communication to node %s fine, sw version %s match",
3829
                   node, result.payload)
3830
    else:
3831
      raise errors.OpExecError("Version mismatch master version %s,"
3832
                               " node version %s" %
3833
                               (constants.PROTOCOL_VERSION, result.payload))
3834

    
3835
    # Add node to our /etc/hosts, and add key to known_hosts
3836
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3837
      master_node = self.cfg.GetMasterNode()
3838
      result = self.rpc.call_etc_hosts_modify(master_node,
3839
                                              constants.ETC_HOSTS_ADD,
3840
                                              self.hostname.name,
3841
                                              self.hostname.ip)
3842
      result.Raise("Can't update hosts file with new host data")
3843

    
3844
    if new_node.secondary_ip != new_node.primary_ip:
3845
      result = self.rpc.call_node_has_ip_address(new_node.name,
3846
                                                 new_node.secondary_ip)
3847
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3848
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3849
      if not result.payload:
3850
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3851
                                 " you gave (%s). Please fix and re-run this"
3852
                                 " command." % new_node.secondary_ip)
3853

    
3854
    node_verify_list = [self.cfg.GetMasterNode()]
3855
    node_verify_param = {
3856
      constants.NV_NODELIST: [node],
3857
      # TODO: do a node-net-test as well?
3858
    }
3859

    
3860
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3861
                                       self.cfg.GetClusterName())
3862
    for verifier in node_verify_list:
3863
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3864
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3865
      if nl_payload:
3866
        for failed in nl_payload:
3867
          feedback_fn("ssh/hostname verification failed"
3868
                      " (checking from %s): %s" %
3869
                      (verifier, nl_payload[failed]))
3870
        raise errors.OpExecError("ssh/hostname verification failed.")
3871

    
3872
    if self.op.readd:
3873
      _RedistributeAncillaryFiles(self)
3874
      self.context.ReaddNode(new_node)
3875
      # make sure we redistribute the config
3876
      self.cfg.Update(new_node, feedback_fn)
3877
      # and make sure the new node will not have old files around
3878
      if not new_node.master_candidate:
3879
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3880
        msg = result.fail_msg
3881
        if msg:
3882
          self.LogWarning("Node failed to demote itself from master"
3883
                          " candidate status: %s" % msg)
3884
    else:
3885
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3886
      self.context.AddNode(new_node, self.proc.GetECId())
3887

    
3888

    
3889
class LUSetNodeParams(LogicalUnit):
3890
  """Modifies the parameters of a node.
3891

3892
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
3893
      to the node role (as _ROLE_*)
3894
  @cvar _R2F: a dictionary from node role to tuples of flags
3895
  @cvar _FLAGS: a list of attribute names corresponding to the flags
3896

3897
  """
3898
  HPATH = "node-modify"
3899
  HTYPE = constants.HTYPE_NODE
3900
  _OP_PARAMS = [
3901
    _PNodeName,
3902
    ("master_candidate", None, ht.TMaybeBool),
3903
    ("offline", None, ht.TMaybeBool),
3904
    ("drained", None, ht.TMaybeBool),
3905
    ("auto_promote", False, ht.TBool),
3906
    ("master_capable", None, ht.TMaybeBool),
3907
    _PForce,
3908
    ]
3909
  REQ_BGL = False
3910
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
3911
  _F2R = {
3912
    (True, False, False): _ROLE_CANDIDATE,
3913
    (False, True, False): _ROLE_DRAINED,
3914
    (False, False, True): _ROLE_OFFLINE,
3915
    (False, False, False): _ROLE_REGULAR,
3916
    }
3917
  _R2F = dict((v, k) for k, v in _F2R.items())
3918
  _FLAGS = ["master_candidate", "drained", "offline"]
3919

    
3920
  def CheckArguments(self):
3921
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3922
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
3923
                self.op.master_capable]
3924
    if all_mods.count(None) == len(all_mods):
3925
      raise errors.OpPrereqError("Please pass at least one modification",
3926
                                 errors.ECODE_INVAL)
3927
    if all_mods.count(True) > 1:
3928
      raise errors.OpPrereqError("Can't set the node into more than one"
3929
                                 " state at the same time",
3930
                                 errors.ECODE_INVAL)
3931

    
3932
    # Boolean value that tells us whether we might be demoting from MC
3933
    self.might_demote = (self.op.master_candidate == False or
3934
                         self.op.offline == True or
3935
                         self.op.drained == True or
3936
                         self.op.master_capable == False)
3937

    
3938
    self.lock_all = self.op.auto_promote and self.might_demote
3939

    
3940
  def ExpandNames(self):
3941
    if self.lock_all:
3942
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3943
    else:
3944
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3945

    
3946
  def BuildHooksEnv(self):
3947
    """Build hooks env.
3948

3949
    This runs on the master node.
3950

3951
    """
3952
    env = {
3953
      "OP_TARGET": self.op.node_name,
3954
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3955
      "OFFLINE": str(self.op.offline),
3956
      "DRAINED": str(self.op.drained),
3957
      "MASTER_CAPABLE": str(self.op.master_capable),
3958
      }
3959
    nl = [self.cfg.GetMasterNode(),
3960
          self.op.node_name]
3961
    return env, nl, nl
3962

    
3963
  def CheckPrereq(self):
3964
    """Check prerequisites.
3965

3966
    This only checks the instance list against the existing names.
3967

3968
    """
3969
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3970

    
3971
    if (self.op.master_candidate is not None or
3972
        self.op.drained is not None or
3973
        self.op.offline is not None):
3974
      # we can't change the master's node flags
3975
      if self.op.node_name == self.cfg.GetMasterNode():
3976
        raise errors.OpPrereqError("The master role can be changed"
3977
                                   " only via master-failover",
3978
                                   errors.ECODE_INVAL)
3979

    
3980
    if self.op.master_candidate and not node.master_capable:
3981
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
3982
                                 " it a master candidate" % node.name,
3983
                                 errors.ECODE_STATE)
3984

    
3985
    if node.master_candidate and self.might_demote and not self.lock_all:
3986
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3987
      # check if after removing the current node, we're missing master
3988
      # candidates
3989
      (mc_remaining, mc_should, _) = \
3990
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3991
      if mc_remaining < mc_should:
3992
        raise errors.OpPrereqError("Not enough master candidates, please"
3993
                                   " pass auto_promote to allow promotion",
3994
                                   errors.ECODE_STATE)
3995

    
3996
    self.old_flags = old_flags = (node.master_candidate,
3997
                                  node.drained, node.offline)
3998
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
3999
    self.old_role = self._F2R[old_flags]
4000

    
4001
    # Check for ineffective changes
4002
    for attr in self._FLAGS:
4003
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4004
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4005
        setattr(self.op, attr, None)
4006

    
4007
    # Past this point, any flag change to False means a transition
4008
    # away from the respective state, as only real changes are kept
4009

    
4010
    # If we're being deofflined/drained, we'll MC ourself if needed
4011
    if (self.op.drained == False or self.op.offline == False or
4012
        (self.op.master_capable and not node.master_capable)):
4013
      if _DecideSelfPromotion(self):
4014
        self.op.master_candidate = True
4015
        self.LogInfo("Auto-promoting node to master candidate")
4016

    
4017
    # If we're no longer master capable, we'll demote ourselves from MC
4018
    if self.op.master_capable == False and node.master_candidate:
4019
      self.LogInfo("Demoting from master candidate")
4020
      self.op.master_candidate = False
4021

    
4022
  def Exec(self, feedback_fn):
4023
    """Modifies a node.
4024

4025
    """
4026
    node = self.node
4027
    old_role = self.old_role
4028

    
4029
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4030

    
4031
    # compute new flags
4032
    if self.op.master_candidate:
4033
      new_role = self._ROLE_CANDIDATE
4034
    elif self.op.drained:
4035
      new_role = self._ROLE_DRAINED
4036
    elif self.op.offline:
4037
      new_role = self._ROLE_OFFLINE
4038
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4039
      # False is still in new flags, which means we're un-setting (the
4040
      # only) True flag
4041
      new_role = self._ROLE_REGULAR
4042
    else: # no new flags, nothing, keep old role
4043
      new_role = old_role
4044

    
4045
    result = []
4046
    changed_mc = [old_role, new_role].count(self._ROLE_CANDIDATE) == 1
4047

    
4048
    if self.op.master_capable is not None:
4049
      node.master_capable = self.op.master_capable
4050
      result.append(("master_capable", str(self.op.master_capable)))
4051

    
4052
    # Tell the node to demote itself, if no longer MC and not offline
4053
    if (old_role == self._ROLE_CANDIDATE and
4054
        new_role != self._ROLE_OFFLINE and new_role != old_role):
4055
      msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4056
      if msg:
4057
        self.LogWarning("Node failed to demote itself: %s", msg)
4058

    
4059
    new_flags = self._R2F[new_role]
4060
    for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4061
      if of != nf:
4062
        result.append((desc, str(nf)))
4063
    (node.master_candidate, node.drained, node.offline) = new_flags
4064

    
4065
    # we locked all nodes, we adjust the CP before updating this node
4066
    if self.lock_all:
4067
      _AdjustCandidatePool(self, [node.name])
4068

    
4069
    # this will trigger configuration file update, if needed
4070
    self.cfg.Update(node, feedback_fn)
4071

    
4072
    # this will trigger job queue propagation or cleanup
4073
    if changed_mc:
4074
      self.context.ReaddNode(node)
4075

    
4076
    return result
4077

    
4078

    
4079
class LUPowercycleNode(NoHooksLU):
4080
  """Powercycles a node.
4081

4082
  """
4083
  _OP_PARAMS = [
4084
    _PNodeName,
4085
    _PForce,
4086
    ]
4087
  REQ_BGL = False
4088

    
4089
  def CheckArguments(self):
4090
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4091
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4092
      raise errors.OpPrereqError("The node is the master and the force"
4093
                                 " parameter was not set",
4094
                                 errors.ECODE_INVAL)
4095

    
4096
  def ExpandNames(self):
4097
    """Locking for PowercycleNode.
4098

4099
    This is a last-resort option and shouldn't block on other
4100
    jobs. Therefore, we grab no locks.
4101

4102
    """
4103
    self.needed_locks = {}
4104

    
4105
  def Exec(self, feedback_fn):
4106
    """Reboots a node.
4107

4108
    """
4109
    result = self.rpc.call_node_powercycle(self.op.node_name,
4110
                                           self.cfg.GetHypervisorType())
4111
    result.Raise("Failed to schedule the reboot")
4112
    return result.payload
4113

    
4114

    
4115
class LUQueryClusterInfo(NoHooksLU):
4116
  """Query cluster configuration.
4117

4118
  """
4119
  REQ_BGL = False
4120

    
4121
  def ExpandNames(self):
4122
    self.needed_locks = {}
4123

    
4124
  def Exec(self, feedback_fn):
4125
    """Return cluster config.
4126

4127
    """
4128
    cluster = self.cfg.GetClusterInfo()
4129
    os_hvp = {}
4130

    
4131
    # Filter just for enabled hypervisors
4132
    for os_name, hv_dict in cluster.os_hvp.items():
4133
      os_hvp[os_name] = {}
4134
      for hv_name, hv_params in hv_dict.items():
4135
        if hv_name in cluster.enabled_hypervisors:
4136
          os_hvp[os_name][hv_name] = hv_params
4137

    
4138
    # Convert ip_family to ip_version
4139
    primary_ip_version = constants.IP4_VERSION
4140
    if cluster.primary_ip_family == netutils.IP6Address.family:
4141
      primary_ip_version = constants.IP6_VERSION
4142

    
4143
    result = {
4144
      "software_version": constants.RELEASE_VERSION,
4145
      "protocol_version": constants.PROTOCOL_VERSION,
4146
      "config_version": constants.CONFIG_VERSION,
4147
      "os_api_version": max(constants.OS_API_VERSIONS),
4148
      "export_version": constants.EXPORT_VERSION,
4149
      "architecture": (platform.architecture()[0], platform.machine()),
4150
      "name": cluster.cluster_name,
4151
      "master": cluster.master_node,
4152
      "default_hypervisor": cluster.enabled_hypervisors[0],
4153
      "enabled_hypervisors": cluster.enabled_hypervisors,
4154
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4155
                        for hypervisor_name in cluster.enabled_hypervisors]),
4156
      "os_hvp": os_hvp,
4157
      "beparams": cluster.beparams,
4158
      "osparams": cluster.osparams,
4159
      "nicparams": cluster.nicparams,
4160
      "candidate_pool_size": cluster.candidate_pool_size,
4161
      "master_netdev": cluster.master_netdev,
4162
      "volume_group_name": cluster.volume_group_name,
4163
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4164
      "file_storage_dir": cluster.file_storage_dir,
4165
      "maintain_node_health": cluster.maintain_node_health,
4166
      "ctime": cluster.ctime,
4167
      "mtime": cluster.mtime,
4168
      "uuid": cluster.uuid,
4169
      "tags": list(cluster.GetTags()),
4170
      "uid_pool": cluster.uid_pool,
4171
      "default_iallocator": cluster.default_iallocator,
4172
      "reserved_lvs": cluster.reserved_lvs,
4173
      "primary_ip_version": primary_ip_version,
4174
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4175
      }
4176

    
4177
    return result
4178

    
4179

    
4180
class LUQueryConfigValues(NoHooksLU):
4181
  """Return configuration values.
4182

4183
  """
4184
  _OP_PARAMS = [_POutputFields]
4185
  REQ_BGL = False
4186
  _FIELDS_DYNAMIC = utils.FieldSet()
4187
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4188
                                  "watcher_pause", "volume_group_name")
4189

    
4190
  def CheckArguments(self):
4191
    _CheckOutputFields(static=self._FIELDS_STATIC,
4192
                       dynamic=self._FIELDS_DYNAMIC,
4193
                       selected=self.op.output_fields)
4194

    
4195
  def ExpandNames(self):
4196
    self.needed_locks = {}
4197

    
4198
  def Exec(self, feedback_fn):
4199
    """Dump a representation of the cluster config to the standard output.
4200

4201
    """
4202
    values = []
4203
    for field in self.op.output_fields:
4204
      if field == "cluster_name":
4205
        entry = self.cfg.GetClusterName()
4206
      elif field == "master_node":
4207
        entry = self.cfg.GetMasterNode()
4208
      elif field == "drain_flag":
4209
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4210
      elif field == "watcher_pause":
4211
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4212
      elif field == "volume_group_name":
4213
        entry = self.cfg.GetVGName()
4214
      else:
4215
        raise errors.ParameterError(field)
4216
      values.append(entry)
4217
    return values
4218

    
4219

    
4220
class LUActivateInstanceDisks(NoHooksLU):
4221
  """Bring up an instance's disks.
4222

4223
  """
4224
  _OP_PARAMS = [
4225
    _PInstanceName,
4226
    ("ignore_size", False, ht.TBool),
4227
    ]
4228
  REQ_BGL = False
4229

    
4230
  def ExpandNames(self):
4231
    self._ExpandAndLockInstance()
4232
    self.needed_locks[locking.LEVEL_NODE] = []
4233
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4234

    
4235
  def DeclareLocks(self, level):
4236
    if level == locking.LEVEL_NODE:
4237
      self._LockInstancesNodes()
4238

    
4239
  def CheckPrereq(self):
4240
    """Check prerequisites.
4241

4242
    This checks that the instance is in the cluster.
4243

4244
    """
4245
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4246
    assert self.instance is not None, \
4247
      "Cannot retrieve locked instance %s" % self.op.instance_name
4248
    _CheckNodeOnline(self, self.instance.primary_node)
4249

    
4250
  def Exec(self, feedback_fn):
4251
    """Activate the disks.
4252

4253
    """
4254
    disks_ok, disks_info = \
4255
              _AssembleInstanceDisks(self, self.instance,
4256
                                     ignore_size=self.op.ignore_size)
4257
    if not disks_ok:
4258
      raise errors.OpExecError("Cannot activate block devices")
4259

    
4260
    return disks_info
4261

    
4262

    
4263
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4264
                           ignore_size=False):
4265
  """Prepare the block devices for an instance.
4266

4267
  This sets up the block devices on all nodes.
4268

4269
  @type lu: L{LogicalUnit}
4270
  @param lu: the logical unit on whose behalf we execute
4271
  @type instance: L{objects.Instance}
4272
  @param instance: the instance for whose disks we assemble
4273
  @type disks: list of L{objects.Disk} or None
4274
  @param disks: which disks to assemble (or all, if None)
4275
  @type ignore_secondaries: boolean
4276
  @param ignore_secondaries: if true, errors on secondary nodes
4277
      won't result in an error return from the function
4278
  @type ignore_size: boolean
4279
  @param ignore_size: if true, the current known size of the disk
4280
      will not be used during the disk activation, useful for cases
4281
      when the size is wrong
4282
  @return: False if the operation failed, otherwise a list of
4283
      (host, instance_visible_name, node_visible_name)
4284
      with the mapping from node devices to instance devices
4285

4286
  """
4287
  device_info = []
4288
  disks_ok = True
4289
  iname = instance.name
4290
  disks = _ExpandCheckDisks(instance, disks)
4291

    
4292
  # With the two passes mechanism we try to reduce the window of
4293
  # opportunity for the race condition of switching DRBD to primary
4294
  # before handshaking occured, but we do not eliminate it
4295

    
4296
  # The proper fix would be to wait (with some limits) until the
4297
  # connection has been made and drbd transitions from WFConnection
4298
  # into any other network-connected state (Connected, SyncTarget,
4299
  # SyncSource, etc.)
4300

    
4301
  # 1st pass, assemble on all nodes in secondary mode
4302
  for inst_disk in disks:
4303
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4304
      if ignore_size:
4305
        node_disk = node_disk.Copy()
4306
        node_disk.UnsetSize()
4307
      lu.cfg.SetDiskID(node_disk, node)
4308
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4309
      msg = result.fail_msg
4310
      if msg:
4311
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4312
                           " (is_primary=False, pass=1): %s",
4313
                           inst_disk.iv_name, node, msg)
4314
        if not ignore_secondaries:
4315
          disks_ok = False
4316

    
4317
  # FIXME: race condition on drbd migration to primary
4318

    
4319
  # 2nd pass, do only the primary node
4320
  for inst_disk in disks:
4321
    dev_path = None
4322

    
4323
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4324
      if node != instance.primary_node:
4325
        continue
4326
      if ignore_size:
4327
        node_disk = node_disk.Copy()
4328
        node_disk.UnsetSize()
4329
      lu.cfg.SetDiskID(node_disk, node)
4330
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4331
      msg = result.fail_msg
4332
      if msg:
4333
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4334
                           " (is_primary=True, pass=2): %s",
4335
                           inst_disk.iv_name, node, msg)
4336
        disks_ok = False
4337
      else:
4338
        dev_path = result.payload
4339

    
4340
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4341

    
4342
  # leave the disks configured for the primary node
4343
  # this is a workaround that would be fixed better by
4344
  # improving the logical/physical id handling
4345
  for disk in disks:
4346
    lu.cfg.SetDiskID(disk, instance.primary_node)
4347

    
4348
  return disks_ok, device_info
4349

    
4350

    
4351
def _StartInstanceDisks(lu, instance, force):
4352
  """Start the disks of an instance.
4353

4354
  """
4355
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4356
                                           ignore_secondaries=force)
4357
  if not disks_ok:
4358
    _ShutdownInstanceDisks(lu, instance)
4359
    if force is not None and not force:
4360
      lu.proc.LogWarning("", hint="If the message above refers to a"
4361
                         " secondary node,"
4362
                         " you can retry the operation using '--force'.")
4363
    raise errors.OpExecError("Disk consistency error")
4364

    
4365

    
4366
class LUDeactivateInstanceDisks(NoHooksLU):
4367
  """Shutdown an instance's disks.
4368

4369
  """
4370
  _OP_PARAMS = [
4371
    _PInstanceName,
4372
    ]
4373
  REQ_BGL = False
4374

    
4375
  def ExpandNames(self):
4376
    self._ExpandAndLockInstance()
4377
    self.needed_locks[locking.LEVEL_NODE] = []
4378
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4379

    
4380
  def DeclareLocks(self, level):
4381
    if level == locking.LEVEL_NODE:
4382
      self._LockInstancesNodes()
4383

    
4384
  def CheckPrereq(self):
4385
    """Check prerequisites.
4386

4387
    This checks that the instance is in the cluster.
4388

4389
    """
4390
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4391
    assert self.instance is not None, \
4392
      "Cannot retrieve locked instance %s" % self.op.instance_name
4393

    
4394
  def Exec(self, feedback_fn):
4395
    """Deactivate the disks
4396

4397
    """
4398
    instance = self.instance
4399
    _SafeShutdownInstanceDisks(self, instance)
4400

    
4401

    
4402
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4403
  """Shutdown block devices of an instance.
4404

4405
  This function checks if an instance is running, before calling
4406
  _ShutdownInstanceDisks.
4407

4408
  """
4409
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4410
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4411

    
4412

    
4413
def _ExpandCheckDisks(instance, disks):
4414
  """Return the instance disks selected by the disks list
4415

4416
  @type disks: list of L{objects.Disk} or None
4417
  @param disks: selected disks
4418
  @rtype: list of L{objects.Disk}
4419
  @return: selected instance disks to act on
4420

4421
  """
4422
  if disks is None:
4423
    return instance.disks
4424
  else:
4425
    if not set(disks).issubset(instance.disks):
4426
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4427
                                   " target instance")
4428
    return disks
4429

    
4430

    
4431
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4432
  """Shutdown block devices of an instance.
4433

4434
  This does the shutdown on all nodes of the instance.
4435

4436
  If the ignore_primary is false, errors on the primary node are
4437
  ignored.
4438

4439
  """
4440
  all_result = True
4441
  disks = _ExpandCheckDisks(instance, disks)
4442

    
4443
  for disk in disks:
4444
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4445
      lu.cfg.SetDiskID(top_disk, node)
4446
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4447
      msg = result.fail_msg
4448
      if msg:
4449
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4450
                      disk.iv_name, node, msg)
4451
        if not ignore_primary or node != instance.primary_node:
4452
          all_result = False
4453
  return all_result
4454

    
4455

    
4456
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4457
  """Checks if a node has enough free memory.
4458

4459
  This function check if a given node has the needed amount of free
4460
  memory. In case the node has less memory or we cannot get the
4461
  information from the node, this function raise an OpPrereqError
4462
  exception.
4463

4464
  @type lu: C{LogicalUnit}
4465
  @param lu: a logical unit from which we get configuration data
4466
  @type node: C{str}
4467
  @param node: the node to check
4468
  @type reason: C{str}
4469
  @param reason: string to use in the error message
4470
  @type requested: C{int}
4471
  @param requested: the amount of memory in MiB to check for
4472
  @type hypervisor_name: C{str}
4473
  @param hypervisor_name: the hypervisor to ask for memory stats
4474
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4475
      we cannot check the node
4476

4477
  """
4478
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4479
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4480
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4481
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4482
  if not isinstance(free_mem, int):
4483
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4484
                               " was '%s'" % (node, free_mem),
4485
                               errors.ECODE_ENVIRON)
4486
  if requested > free_mem:
4487
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4488
                               " needed %s MiB, available %s MiB" %
4489
                               (node, reason, requested, free_mem),
4490
                               errors.ECODE_NORES)
4491

    
4492

    
4493
def _CheckNodesFreeDisk(lu, nodenames, requested):
4494
  """Checks if nodes have enough free disk space in the default VG.
4495

4496
  This function check if all given nodes have the needed amount of
4497
  free disk. In case any node has less disk or we cannot get the
4498
  information from the node, this function raise an OpPrereqError
4499
  exception.
4500

4501
  @type lu: C{LogicalUnit}
4502
  @param lu: a logical unit from which we get configuration data
4503
  @type nodenames: C{list}
4504
  @param nodenames: the list of node names to check
4505
  @type requested: C{int}
4506
  @param requested: the amount of disk in MiB to check for
4507
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4508
      we cannot check the node
4509

4510
  """
4511
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4512
                                   lu.cfg.GetHypervisorType())
4513
  for node in nodenames:
4514
    info = nodeinfo[node]
4515
    info.Raise("Cannot get current information from node %s" % node,
4516
               prereq=True, ecode=errors.ECODE_ENVIRON)
4517
    vg_free = info.payload.get("vg_free", None)
4518
    if not isinstance(vg_free, int):
4519
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4520
                                 " result was '%s'" % (node, vg_free),
4521
                                 errors.ECODE_ENVIRON)
4522
    if requested > vg_free:
4523
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4524
                                 " required %d MiB, available %d MiB" %
4525
                                 (node, requested, vg_free),
4526
                                 errors.ECODE_NORES)
4527

    
4528

    
4529
class LUStartupInstance(LogicalUnit):
4530
  """Starts an instance.
4531

4532
  """
4533
  HPATH = "instance-start"
4534
  HTYPE = constants.HTYPE_INSTANCE
4535
  _OP_PARAMS = [
4536
    _PInstanceName,
4537
    _PForce,
4538
    _PIgnoreOfflineNodes,
4539
    ("hvparams", ht.EmptyDict, ht.TDict),
4540
    ("beparams", ht.EmptyDict, ht.TDict),
4541
    ]
4542
  REQ_BGL = False
4543

    
4544
  def CheckArguments(self):
4545
    # extra beparams
4546
    if self.op.beparams:
4547
      # fill the beparams dict
4548
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4549

    
4550
  def ExpandNames(self):
4551
    self._ExpandAndLockInstance()
4552

    
4553
  def BuildHooksEnv(self):
4554
    """Build hooks env.
4555

4556
    This runs on master, primary and secondary nodes of the instance.
4557

4558
    """
4559
    env = {
4560
      "FORCE": self.op.force,
4561
      }
4562
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4563
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4564
    return env, nl, nl
4565

    
4566
  def CheckPrereq(self):
4567
    """Check prerequisites.
4568

4569
    This checks that the instance is in the cluster.
4570

4571
    """
4572
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4573
    assert self.instance is not None, \
4574
      "Cannot retrieve locked instance %s" % self.op.instance_name
4575

    
4576
    # extra hvparams
4577
    if self.op.hvparams:
4578
      # check hypervisor parameter syntax (locally)
4579
      cluster = self.cfg.GetClusterInfo()
4580
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4581
      filled_hvp = cluster.FillHV(instance)
4582
      filled_hvp.update(self.op.hvparams)
4583
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4584
      hv_type.CheckParameterSyntax(filled_hvp)
4585
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4586

    
4587
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
4588

    
4589
    if self.primary_offline and self.op.ignore_offline_nodes:
4590
      self.proc.LogWarning("Ignoring offline primary node")
4591

    
4592
      if self.op.hvparams or self.op.beparams:
4593
        self.proc.LogWarning("Overridden parameters are ignored")
4594
    else:
4595
      _CheckNodeOnline(self, instance.primary_node)
4596

    
4597
      bep = self.cfg.GetClusterInfo().FillBE(instance)
4598

    
4599
      # check bridges existence
4600
      _CheckInstanceBridgesExist(self, instance)
4601

    
4602
      remote_info = self.rpc.call_instance_info(instance.primary_node,
4603
                                                instance.name,
4604
                                                instance.hypervisor)
4605
      remote_info.Raise("Error checking node %s" % instance.primary_node,
4606
                        prereq=True, ecode=errors.ECODE_ENVIRON)
4607
      if not remote_info.payload: # not running already
4608
        _CheckNodeFreeMemory(self, instance.primary_node,
4609
                             "starting instance %s" % instance.name,
4610
                             bep[constants.BE_MEMORY], instance.hypervisor)
4611

    
4612
  def Exec(self, feedback_fn):
4613
    """Start the instance.
4614

4615
    """
4616
    instance = self.instance
4617
    force = self.op.force
4618

    
4619
    self.cfg.MarkInstanceUp(instance.name)
4620

    
4621
    if self.primary_offline:
4622
      assert self.op.ignore_offline_nodes
4623
      self.proc.LogInfo("Primary node offline, marked instance as started")
4624
    else:
4625
      node_current = instance.primary_node
4626

    
4627
      _StartInstanceDisks(self, instance, force)
4628

    
4629
      result = self.rpc.call_instance_start(node_current, instance,
4630
                                            self.op.hvparams, self.op.beparams)
4631
      msg = result.fail_msg
4632
      if msg:
4633
        _ShutdownInstanceDisks(self, instance)
4634
        raise errors.OpExecError("Could not start instance: %s" % msg)
4635

    
4636

    
4637
class LURebootInstance(LogicalUnit):
4638
  """Reboot an instance.
4639

4640
  """
4641
  HPATH = "instance-reboot"
4642
  HTYPE = constants.HTYPE_INSTANCE
4643
  _OP_PARAMS = [
4644
    _PInstanceName,
4645
    ("ignore_secondaries", False, ht.TBool),
4646
    ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
4647
    _PShutdownTimeout,
4648
    ]
4649
  REQ_BGL = False
4650

    
4651
  def ExpandNames(self):
4652
    self._ExpandAndLockInstance()
4653

    
4654
  def BuildHooksEnv(self):
4655
    """Build hooks env.
4656

4657
    This runs on master, primary and secondary nodes of the instance.
4658

4659
    """
4660
    env = {
4661
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4662
      "REBOOT_TYPE": self.op.reboot_type,
4663
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4664
      }
4665
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4666
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4667
    return env, nl, nl
4668

    
4669
  def CheckPrereq(self):
4670
    """Check prerequisites.
4671

4672
    This checks that the instance is in the cluster.
4673

4674
    """
4675
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4676
    assert self.instance is not None, \
4677
      "Cannot retrieve locked instance %s" % self.op.instance_name
4678

    
4679
    _CheckNodeOnline(self, instance.primary_node)
4680

    
4681
    # check bridges existence
4682
    _CheckInstanceBridgesExist(self, instance)
4683

    
4684
  def Exec(self, feedback_fn):
4685
    """Reboot the instance.
4686

4687
    """
4688
    instance = self.instance
4689
    ignore_secondaries = self.op.ignore_secondaries
4690
    reboot_type = self.op.reboot_type
4691

    
4692
    node_current = instance.primary_node
4693

    
4694
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4695
                       constants.INSTANCE_REBOOT_HARD]:
4696
      for disk in instance.disks:
4697
        self.cfg.SetDiskID(disk, node_current)
4698
      result = self.rpc.call_instance_reboot(node_current, instance,
4699
                                             reboot_type,
4700
                                             self.op.shutdown_timeout)
4701
      result.Raise("Could not reboot instance")
4702
    else:
4703
      result = self.rpc.call_instance_shutdown(node_current, instance,
4704
                                               self.op.shutdown_timeout)
4705
      result.Raise("Could not shutdown instance for full reboot")
4706
      _ShutdownInstanceDisks(self, instance)
4707
      _StartInstanceDisks(self, instance, ignore_secondaries)
4708
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4709
      msg = result.fail_msg
4710
      if msg:
4711
        _ShutdownInstanceDisks(self, instance)
4712
        raise errors.OpExecError("Could not start instance for"
4713
                                 " full reboot: %s" % msg)
4714

    
4715
    self.cfg.MarkInstanceUp(instance.name)
4716

    
4717

    
4718
class LUShutdownInstance(LogicalUnit):
4719
  """Shutdown an instance.
4720

4721
  """
4722
  HPATH = "instance-stop"
4723
  HTYPE = constants.HTYPE_INSTANCE
4724
  _OP_PARAMS = [
4725
    _PInstanceName,
4726
    _PIgnoreOfflineNodes,
4727
    ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt),
4728
    ]
4729
  REQ_BGL = False
4730

    
4731
  def ExpandNames(self):
4732
    self._ExpandAndLockInstance()
4733

    
4734
  def BuildHooksEnv(self):
4735
    """Build hooks env.
4736

4737
    This runs on master, primary and secondary nodes of the instance.
4738

4739
    """
4740
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4741
    env["TIMEOUT"] = self.op.timeout
4742
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4743
    return env, nl, nl
4744

    
4745
  def CheckPrereq(self):
4746
    """Check prerequisites.
4747

4748
    This checks that the instance is in the cluster.
4749

4750
    """
4751
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4752
    assert self.instance is not None, \
4753
      "Cannot retrieve locked instance %s" % self.op.instance_name
4754

    
4755
    self.primary_offline = \
4756
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
4757

    
4758
    if self.primary_offline and self.op.ignore_offline_nodes:
4759
      self.proc.LogWarning("Ignoring offline primary node")
4760
    else:
4761
      _CheckNodeOnline(self, self.instance.primary_node)
4762

    
4763
  def Exec(self, feedback_fn):
4764
    """Shutdown the instance.
4765

4766
    """
4767
    instance = self.instance
4768
    node_current = instance.primary_node
4769
    timeout = self.op.timeout
4770

    
4771
    self.cfg.MarkInstanceDown(instance.name)
4772

    
4773
    if self.primary_offline:
4774
      assert self.op.ignore_offline_nodes
4775
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
4776
    else:
4777
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4778
      msg = result.fail_msg
4779
      if msg:
4780
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4781

    
4782
      _ShutdownInstanceDisks(self, instance)
4783

    
4784

    
4785
class LUReinstallInstance(LogicalUnit):
4786
  """Reinstall an instance.
4787

4788
  """
4789
  HPATH = "instance-reinstall"
4790
  HTYPE = constants.HTYPE_INSTANCE
4791
  _OP_PARAMS = [
4792
    _PInstanceName,
4793
    ("os_type", None, ht.TMaybeString),
4794
    ("force_variant", False, ht.TBool),
4795
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
4796
    ]
4797
  REQ_BGL = False
4798

    
4799
  def ExpandNames(self):
4800
    self._ExpandAndLockInstance()
4801

    
4802
  def BuildHooksEnv(self):
4803
    """Build hooks env.
4804

4805
    This runs on master, primary and secondary nodes of the instance.
4806

4807
    """
4808
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4809
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4810
    return env, nl, nl
4811

    
4812
  def CheckPrereq(self):
4813
    """Check prerequisites.
4814

4815
    This checks that the instance is in the cluster and is not running.
4816

4817
    """
4818
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4819
    assert instance is not None, \
4820
      "Cannot retrieve locked instance %s" % self.op.instance_name
4821
    _CheckNodeOnline(self, instance.primary_node)
4822

    
4823
    if instance.disk_template == constants.DT_DISKLESS:
4824
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4825
                                 self.op.instance_name,
4826
                                 errors.ECODE_INVAL)
4827
    _CheckInstanceDown(self, instance, "cannot reinstall")
4828

    
4829
    if self.op.os_type is not None:
4830
      # OS verification
4831
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4832
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4833
      instance_os = self.op.os_type
4834
    else:
4835
      instance_os = instance.os
4836

    
4837
    nodelist = list(instance.all_nodes)
4838

    
4839
    if self.op.osparams:
4840
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
4841
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
4842
      self.os_inst = i_osdict # the new dict (without defaults)
4843
    else:
4844
      self.os_inst = None
4845

    
4846
    self.instance = instance
4847

    
4848
  def Exec(self, feedback_fn):
4849
    """Reinstall the instance.
4850

4851
    """
4852
    inst = self.instance
4853

    
4854
    if self.op.os_type is not None:
4855
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4856
      inst.os = self.op.os_type
4857
      # Write to configuration
4858
      self.cfg.Update(inst, feedback_fn)
4859

    
4860
    _StartInstanceDisks(self, inst, None)
4861
    try:
4862
      feedback_fn("Running the instance OS create scripts...")
4863
      # FIXME: pass debug option from opcode to backend
4864
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4865
                                             self.op.debug_level,
4866
                                             osparams=self.os_inst)
4867
      result.Raise("Could not install OS for instance %s on node %s" %
4868
                   (inst.name, inst.primary_node))
4869
    finally:
4870
      _ShutdownInstanceDisks(self, inst)
4871

    
4872

    
4873
class LURecreateInstanceDisks(LogicalUnit):
4874
  """Recreate an instance's missing disks.
4875

4876
  """
4877
  HPATH = "instance-recreate-disks"
4878
  HTYPE = constants.HTYPE_INSTANCE
4879
  _OP_PARAMS = [
4880
    _PInstanceName,
4881
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
4882
    ]
4883
  REQ_BGL = False
4884

    
4885
  def ExpandNames(self):
4886
    self._ExpandAndLockInstance()
4887

    
4888
  def BuildHooksEnv(self):
4889
    """Build hooks env.
4890

4891
    This runs on master, primary and secondary nodes of the instance.
4892

4893
    """
4894
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4895
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4896
    return env, nl, nl
4897

    
4898
  def CheckPrereq(self):
4899
    """Check prerequisites.
4900

4901
    This checks that the instance is in the cluster and is not running.
4902

4903
    """
4904
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4905
    assert instance is not None, \
4906
      "Cannot retrieve locked instance %s" % self.op.instance_name
4907
    _CheckNodeOnline(self, instance.primary_node)
4908

    
4909
    if instance.disk_template == constants.DT_DISKLESS:
4910
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4911
                                 self.op.instance_name, errors.ECODE_INVAL)
4912
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4913

    
4914
    if not self.op.disks:
4915
      self.op.disks = range(len(instance.disks))
4916
    else:
4917
      for idx in self.op.disks:
4918
        if idx >= len(instance.disks):
4919
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4920
                                     errors.ECODE_INVAL)
4921

    
4922
    self.instance = instance
4923

    
4924
  def Exec(self, feedback_fn):
4925
    """Recreate the disks.
4926

4927
    """
4928
    to_skip = []
4929
    for idx, _ in enumerate(self.instance.disks):
4930
      if idx not in self.op.disks: # disk idx has not been passed in
4931
        to_skip.append(idx)
4932
        continue
4933

    
4934
    _CreateDisks(self, self.instance, to_skip=to_skip)
4935

    
4936

    
4937
class LURenameInstance(LogicalUnit):
4938
  """Rename an instance.
4939

4940
  """
4941
  HPATH = "instance-rename"
4942
  HTYPE = constants.HTYPE_INSTANCE
4943
  _OP_PARAMS = [
4944
    _PInstanceName,
4945
    ("new_name", ht.NoDefault, ht.TNonEmptyString),
4946
    ("ip_check", False, ht.TBool),
4947
    ("name_check", True, ht.TBool),
4948
    ]
4949

    
4950
  def CheckArguments(self):
4951
    """Check arguments.
4952

4953
    """
4954
    if self.op.ip_check and not self.op.name_check:
4955
      # TODO: make the ip check more flexible and not depend on the name check
4956
      raise errors.OpPrereqError("Cannot do ip check without a name check",
4957
                                 errors.ECODE_INVAL)
4958

    
4959
  def BuildHooksEnv(self):
4960
    """Build hooks env.
4961

4962
    This runs on master, primary and secondary nodes of the instance.
4963

4964
    """
4965
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4966
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4967
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4968
    return env, nl, nl
4969

    
4970
  def CheckPrereq(self):
4971
    """Check prerequisites.
4972

4973
    This checks that the instance is in the cluster and is not running.
4974

4975
    """
4976
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4977
                                                self.op.instance_name)
4978
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4979
    assert instance is not None
4980
    _CheckNodeOnline(self, instance.primary_node)
4981
    _CheckInstanceDown(self, instance, "cannot rename")
4982
    self.instance = instance
4983

    
4984
    new_name = self.op.new_name
4985
    if self.op.name_check:
4986
      hostname = netutils.GetHostname(name=new_name)
4987
      new_name = self.op.new_name = hostname.name
4988
      if (self.op.ip_check and
4989
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
4990
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4991
                                   (hostname.ip, new_name),
4992
                                   errors.ECODE_NOTUNIQUE)
4993

    
4994
    instance_list = self.cfg.GetInstanceList()
4995
    if new_name in instance_list:
4996
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4997
                                 new_name, errors.ECODE_EXISTS)
4998

    
4999
  def Exec(self, feedback_fn):
5000
    """Reinstall the instance.
5001

5002
    """
5003
    inst = self.instance
5004
    old_name = inst.name
5005

    
5006
    if inst.disk_template == constants.DT_FILE:
5007
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5008

    
5009
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5010
    # Change the instance lock. This is definitely safe while we hold the BGL
5011
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5012
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5013

    
5014
    # re-read the instance from the configuration after rename
5015
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5016

    
5017
    if inst.disk_template == constants.DT_FILE:
5018
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5019
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5020
                                                     old_file_storage_dir,
5021
                                                     new_file_storage_dir)
5022
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5023
                   " (but the instance has been renamed in Ganeti)" %
5024
                   (inst.primary_node, old_file_storage_dir,
5025
                    new_file_storage_dir))
5026

    
5027
    _StartInstanceDisks(self, inst, None)
5028
    try:
5029
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5030
                                                 old_name, self.op.debug_level)
5031
      msg = result.fail_msg
5032
      if msg:
5033
        msg = ("Could not run OS rename script for instance %s on node %s"
5034
               " (but the instance has been renamed in Ganeti): %s" %
5035
               (inst.name, inst.primary_node, msg))
5036
        self.proc.LogWarning(msg)
5037
    finally:
5038
      _ShutdownInstanceDisks(self, inst)
5039

    
5040
    return inst.name
5041

    
5042

    
5043
class LURemoveInstance(LogicalUnit):
5044
  """Remove an instance.
5045

5046
  """
5047
  HPATH = "instance-remove"
5048
  HTYPE = constants.HTYPE_INSTANCE
5049
  _OP_PARAMS = [
5050
    _PInstanceName,
5051
    ("ignore_failures", False, ht.TBool),
5052
    _PShutdownTimeout,
5053
    ]
5054
  REQ_BGL = False
5055

    
5056
  def ExpandNames(self):
5057
    self._ExpandAndLockInstance()
5058
    self.needed_locks[locking.LEVEL_NODE] = []
5059
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5060

    
5061
  def DeclareLocks(self, level):
5062
    if level == locking.LEVEL_NODE:
5063
      self._LockInstancesNodes()
5064

    
5065
  def BuildHooksEnv(self):
5066
    """Build hooks env.
5067

5068
    This runs on master, primary and secondary nodes of the instance.
5069

5070
    """
5071
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5072
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5073
    nl = [self.cfg.GetMasterNode()]
5074
    nl_post = list(self.instance.all_nodes) + nl
5075
    return env, nl, nl_post
5076

    
5077
  def CheckPrereq(self):
5078
    """Check prerequisites.
5079

5080
    This checks that the instance is in the cluster.
5081

5082
    """
5083
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5084
    assert self.instance is not None, \
5085
      "Cannot retrieve locked instance %s" % self.op.instance_name
5086

    
5087
  def Exec(self, feedback_fn):
5088
    """Remove the instance.
5089

5090
    """
5091
    instance = self.instance
5092
    logging.info("Shutting down instance %s on node %s",
5093
                 instance.name, instance.primary_node)
5094

    
5095
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5096
                                             self.op.shutdown_timeout)
5097
    msg = result.fail_msg
5098
    if msg:
5099
      if self.op.ignore_failures:
5100
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5101
      else:
5102
        raise errors.OpExecError("Could not shutdown instance %s on"
5103
                                 " node %s: %s" %
5104
                                 (instance.name, instance.primary_node, msg))
5105

    
5106
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5107

    
5108

    
5109
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5110
  """Utility function to remove an instance.
5111

5112
  """
5113
  logging.info("Removing block devices for instance %s", instance.name)
5114

    
5115
  if not _RemoveDisks(lu, instance):
5116
    if not ignore_failures:
5117
      raise errors.OpExecError("Can't remove instance's disks")
5118
    feedback_fn("Warning: can't remove instance's disks")
5119

    
5120
  logging.info("Removing instance %s out of cluster config", instance.name)
5121

    
5122
  lu.cfg.RemoveInstance(instance.name)
5123

    
5124
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5125
    "Instance lock removal conflict"
5126

    
5127
  # Remove lock for the instance
5128
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5129

    
5130

    
5131
class LUQueryInstances(NoHooksLU):
5132
  """Logical unit for querying instances.
5133

5134
  """
5135
  # pylint: disable-msg=W0142
5136
  _OP_PARAMS = [
5137
    ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
5138
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5139
    ("use_locking", False, ht.TBool),
5140
    ]
5141
  REQ_BGL = False
5142
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5143
                    "serial_no", "ctime", "mtime", "uuid"]
5144
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5145
                                    "admin_state",
5146
                                    "disk_template", "ip", "mac", "bridge",
5147
                                    "nic_mode", "nic_link",
5148
                                    "sda_size", "sdb_size", "vcpus", "tags",
5149
                                    "network_port", "beparams",
5150
                                    r"(disk)\.(size)/([0-9]+)",
5151
                                    r"(disk)\.(sizes)", "disk_usage",
5152
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5153
                                    r"(nic)\.(bridge)/([0-9]+)",
5154
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
5155
                                    r"(disk|nic)\.(count)",
5156
                                    "hvparams", "custom_hvparams",
5157
                                    "custom_beparams", "custom_nicparams",
5158
                                    ] + _SIMPLE_FIELDS +
5159
                                  ["hv/%s" % name
5160
                                   for name in constants.HVS_PARAMETERS
5161
                                   if name not in constants.HVC_GLOBALS] +
5162
                                  ["be/%s" % name
5163
                                   for name in constants.BES_PARAMETERS])
5164
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5165
                                   "oper_ram",
5166
                                   "oper_vcpus",
5167
                                   "status")
5168

    
5169

    
5170
  def CheckArguments(self):
5171
    _CheckOutputFields(static=self._FIELDS_STATIC,
5172
                       dynamic=self._FIELDS_DYNAMIC,
5173
                       selected=self.op.output_fields)
5174

    
5175
  def ExpandNames(self):
5176
    self.needed_locks = {}
5177
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5178
    self.share_locks[locking.LEVEL_NODE] = 1
5179

    
5180
    if self.op.names:
5181
      self.wanted = _GetWantedInstances(self, self.op.names)
5182
    else:
5183
      self.wanted = locking.ALL_SET
5184

    
5185
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5186
    self.do_locking = self.do_node_query and self.op.use_locking
5187
    if self.do_locking:
5188
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5189
      self.needed_locks[locking.LEVEL_NODE] = []
5190
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5191

    
5192
  def DeclareLocks(self, level):
5193
    if level == locking.LEVEL_NODE and self.do_locking:
5194
      self._LockInstancesNodes()
5195

    
5196
  def Exec(self, feedback_fn):
5197
    """Computes the list of nodes and their attributes.
5198

5199
    """
5200
    # pylint: disable-msg=R0912
5201
    # way too many branches here
5202
    all_info = self.cfg.GetAllInstancesInfo()
5203
    if self.wanted == locking.ALL_SET:
5204
      # caller didn't specify instance names, so ordering is not important
5205
      if self.do_locking:
5206
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5207
      else:
5208
        instance_names = all_info.keys()
5209
      instance_names = utils.NiceSort(instance_names)
5210
    else:
5211
      # caller did specify names, so we must keep the ordering
5212
      if self.do_locking:
5213
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5214
      else:
5215
        tgt_set = all_info.keys()
5216
      missing = set(self.wanted).difference(tgt_set)
5217
      if missing:
5218
        raise errors.OpExecError("Some instances were removed before"
5219
                                 " retrieving their data: %s" % missing)
5220
      instance_names = self.wanted
5221

    
5222
    instance_list = [all_info[iname] for iname in instance_names]
5223

    
5224
    # begin data gathering
5225

    
5226
    nodes = frozenset([inst.primary_node for inst in instance_list])
5227
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
5228

    
5229
    bad_nodes = []
5230
    off_nodes = []
5231
    if self.do_node_query:
5232
      live_data = {}
5233
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5234
      for name in nodes:
5235
        result = node_data[name]
5236
        if result.offline:
5237
          # offline nodes will be in both lists
5238
          off_nodes.append(name)
5239
        if result.fail_msg:
5240
          bad_nodes.append(name)
5241
        else:
5242
          if result.payload:
5243
            live_data.update(result.payload)
5244
          # else no instance is alive
5245
    else:
5246
      live_data = dict([(name, {}) for name in instance_names])
5247

    
5248
    # end data gathering
5249

    
5250
    HVPREFIX = "hv/"
5251
    BEPREFIX = "be/"
5252
    output = []
5253
    cluster = self.cfg.GetClusterInfo()
5254
    for instance in instance_list:
5255
      iout = []
5256
      i_hv = cluster.FillHV(instance, skip_globals=True)
5257
      i_be = cluster.FillBE(instance)
5258
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5259
      for field in self.op.output_fields:
5260
        st_match = self._FIELDS_STATIC.Matches(field)
5261
        if field in self._SIMPLE_FIELDS:
5262
          val = getattr(instance, field)
5263
        elif field == "pnode":
5264
          val = instance.primary_node
5265
        elif field == "snodes":
5266
          val = list(instance.secondary_nodes)
5267
        elif field == "admin_state":
5268
          val = instance.admin_up
5269
        elif field == "oper_state":
5270
          if instance.primary_node in bad_nodes:
5271
            val = None
5272
          else:
5273
            val = bool(live_data.get(instance.name))
5274
        elif field == "status":
5275
          if instance.primary_node in off_nodes:
5276
            val = "ERROR_nodeoffline"
5277
          elif instance.primary_node in bad_nodes:
5278
            val = "ERROR_nodedown"
5279
          else:
5280
            running = bool(live_data.get(instance.name))
5281
            if running:
5282
              if instance.admin_up:
5283
                val = "running"
5284
              else:
5285
                val = "ERROR_up"
5286
            else:
5287
              if instance.admin_up:
5288
                val = "ERROR_down"
5289
              else:
5290
                val = "ADMIN_down"
5291
        elif field == "oper_ram":
5292
          if instance.primary_node in bad_nodes:
5293
            val = None
5294
          elif instance.name in live_data:
5295
            val = live_data[instance.name].get("memory", "?")
5296
          else:
5297
            val = "-"
5298
        elif field == "oper_vcpus":
5299
          if instance.primary_node in bad_nodes:
5300
            val = None
5301
          elif instance.name in live_data:
5302
            val = live_data[instance.name].get("vcpus", "?")
5303
          else:
5304
            val = "-"
5305
        elif field == "vcpus":
5306
          val = i_be[constants.BE_VCPUS]
5307
        elif field == "disk_template":
5308
          val = instance.disk_template
5309
        elif field == "ip":
5310
          if instance.nics:
5311
            val = instance.nics[0].ip
5312
          else:
5313
            val = None
5314
        elif field == "nic_mode":
5315
          if instance.nics:
5316
            val = i_nicp[0][constants.NIC_MODE]
5317
          else:
5318
            val = None
5319
        elif field == "nic_link":
5320
          if instance.nics:
5321
            val = i_nicp[0][constants.NIC_LINK]
5322
          else:
5323
            val = None
5324
        elif field == "bridge":
5325
          if (instance.nics and
5326
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5327
            val = i_nicp[0][constants.NIC_LINK]
5328
          else:
5329
            val = None
5330
        elif field == "mac":
5331
          if instance.nics:
5332
            val = instance.nics[0].mac
5333
          else:
5334
            val = None
5335
        elif field == "custom_nicparams":
5336
          val = [nic.nicparams for nic in instance.nics]
5337
        elif field == "sda_size" or field == "sdb_size":
5338
          idx = ord(field[2]) - ord('a')
5339
          try:
5340
            val = instance.FindDisk(idx).size
5341
          except errors.OpPrereqError:
5342
            val = None
5343
        elif field == "disk_usage": # total disk usage per node
5344
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5345
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5346
        elif field == "tags":
5347
          val = list(instance.GetTags())
5348
        elif field == "custom_hvparams":
5349
          val = instance.hvparams # not filled!
5350
        elif field == "hvparams":
5351
          val = i_hv
5352
        elif (field.startswith(HVPREFIX) and
5353
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5354
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5355
          val = i_hv.get(field[len(HVPREFIX):], None)
5356
        elif field == "custom_beparams":
5357
          val = instance.beparams
5358
        elif field == "beparams":
5359
          val = i_be
5360
        elif (field.startswith(BEPREFIX) and
5361
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5362
          val = i_be.get(field[len(BEPREFIX):], None)
5363
        elif st_match and st_match.groups():
5364
          # matches a variable list
5365
          st_groups = st_match.groups()
5366
          if st_groups and st_groups[0] == "disk":
5367
            if st_groups[1] == "count":
5368
              val = len(instance.disks)
5369
            elif st_groups[1] == "sizes":
5370
              val = [disk.size for disk in instance.disks]
5371
            elif st_groups[1] == "size":
5372
              try:
5373
                val = instance.FindDisk(st_groups[2]).size
5374
              except errors.OpPrereqError:
5375
                val = None
5376
            else:
5377
              assert False, "Unhandled disk parameter"
5378
          elif st_groups[0] == "nic":
5379
            if st_groups[1] == "count":
5380
              val = len(instance.nics)
5381
            elif st_groups[1] == "macs":
5382
              val = [nic.mac for nic in instance.nics]
5383
            elif st_groups[1] == "ips":
5384
              val = [nic.ip for nic in instance.nics]
5385
            elif st_groups[1] == "modes":
5386
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5387
            elif st_groups[1] == "links":
5388
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5389
            elif st_groups[1] == "bridges":
5390
              val = []
5391
              for nicp in i_nicp:
5392
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5393
                  val.append(nicp[constants.NIC_LINK])
5394
                else:
5395
                  val.append(None)
5396
            else:
5397
              # index-based item
5398
              nic_idx = int(st_groups[2])
5399
              if nic_idx >= len(instance.nics):
5400
                val = None
5401
              else:
5402
                if st_groups[1] == "mac":
5403
                  val = instance.nics[nic_idx].mac
5404
                elif st_groups[1] == "ip":
5405
                  val = instance.nics[nic_idx].ip
5406
                elif st_groups[1] == "mode":
5407
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5408
                elif st_groups[1] == "link":
5409
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5410
                elif st_groups[1] == "bridge":
5411
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5412
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5413
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5414
                  else:
5415
                    val = None
5416
                else:
5417
                  assert False, "Unhandled NIC parameter"
5418
          else:
5419
            assert False, ("Declared but unhandled variable parameter '%s'" %
5420
                           field)
5421
        else:
5422
          assert False, "Declared but unhandled parameter '%s'" % field
5423
        iout.append(val)
5424
      output.append(iout)
5425

    
5426
    return output
5427

    
5428

    
5429
class LUFailoverInstance(LogicalUnit):
5430
  """Failover an instance.
5431

5432
  """
5433
  HPATH = "instance-failover"
5434
  HTYPE = constants.HTYPE_INSTANCE
5435
  _OP_PARAMS = [
5436
    _PInstanceName,
5437
    ("ignore_consistency", False, ht.TBool),
5438
    _PShutdownTimeout,
5439
    ]
5440
  REQ_BGL = False
5441

    
5442
  def ExpandNames(self):
5443
    self._ExpandAndLockInstance()
5444
    self.needed_locks[locking.LEVEL_NODE] = []
5445
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5446

    
5447
  def DeclareLocks(self, level):
5448
    if level == locking.LEVEL_NODE:
5449
      self._LockInstancesNodes()
5450

    
5451
  def BuildHooksEnv(self):
5452
    """Build hooks env.
5453

5454
    This runs on master, primary and secondary nodes of the instance.
5455

5456
    """
5457
    instance = self.instance
5458
    source_node = instance.primary_node
5459
    target_node = instance.secondary_nodes[0]
5460
    env = {
5461
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5462
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5463
      "OLD_PRIMARY": source_node,
5464
      "OLD_SECONDARY": target_node,
5465
      "NEW_PRIMARY": target_node,
5466
      "NEW_SECONDARY": source_node,
5467
      }
5468
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5469
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5470
    nl_post = list(nl)
5471
    nl_post.append(source_node)
5472
    return env, nl, nl_post
5473

    
5474
  def CheckPrereq(self):
5475
    """Check prerequisites.
5476

5477
    This checks that the instance is in the cluster.
5478

5479
    """
5480
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5481
    assert self.instance is not None, \
5482
      "Cannot retrieve locked instance %s" % self.op.instance_name
5483

    
5484
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5485
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5486
      raise errors.OpPrereqError("Instance's disk layout is not"
5487
                                 " network mirrored, cannot failover.",
5488
                                 errors.ECODE_STATE)
5489

    
5490
    secondary_nodes = instance.secondary_nodes
5491
    if not secondary_nodes:
5492
      raise errors.ProgrammerError("no secondary node but using "
5493
                                   "a mirrored disk template")
5494

    
5495
    target_node = secondary_nodes[0]
5496
    _CheckNodeOnline(self, target_node)
5497
    _CheckNodeNotDrained(self, target_node)
5498
    if instance.admin_up:
5499
      # check memory requirements on the secondary node
5500
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5501
                           instance.name, bep[constants.BE_MEMORY],
5502
                           instance.hypervisor)
5503
    else:
5504
      self.LogInfo("Not checking memory on the secondary node as"
5505
                   " instance will not be started")
5506

    
5507
    # check bridge existance
5508
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5509

    
5510
  def Exec(self, feedback_fn):
5511
    """Failover an instance.
5512

5513
    The failover is done by shutting it down on its present node and
5514
    starting it on the secondary.
5515

5516
    """
5517
    instance = self.instance
5518
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5519

    
5520
    source_node = instance.primary_node
5521
    target_node = instance.secondary_nodes[0]
5522

    
5523
    if instance.admin_up:
5524
      feedback_fn("* checking disk consistency between source and target")
5525
      for dev in instance.disks:
5526
        # for drbd, these are drbd over lvm
5527
        if not _CheckDiskConsistency(self, dev, target_node, False):
5528
          if not self.op.ignore_consistency:
5529
            raise errors.OpExecError("Disk %s is degraded on target node,"
5530
                                     " aborting failover." % dev.iv_name)
5531
    else:
5532
      feedback_fn("* not checking disk consistency as instance is not running")
5533

    
5534
    feedback_fn("* shutting down instance on source node")
5535
    logging.info("Shutting down instance %s on node %s",
5536
                 instance.name, source_node)
5537

    
5538
    result = self.rpc.call_instance_shutdown(source_node, instance,
5539
                                             self.op.shutdown_timeout)
5540
    msg = result.fail_msg
5541
    if msg:
5542
      if self.op.ignore_consistency or primary_node.offline:
5543
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5544
                             " Proceeding anyway. Please make sure node"
5545
                             " %s is down. Error details: %s",
5546
                             instance.name, source_node, source_node, msg)
5547
      else:
5548
        raise errors.OpExecError("Could not shutdown instance %s on"
5549
                                 " node %s: %s" %
5550
                                 (instance.name, source_node, msg))
5551

    
5552
    feedback_fn("* deactivating the instance's disks on source node")
5553
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5554
      raise errors.OpExecError("Can't shut down the instance's disks.")
5555

    
5556
    instance.primary_node = target_node
5557
    # distribute new instance config to the other nodes
5558
    self.cfg.Update(instance, feedback_fn)
5559

    
5560
    # Only start the instance if it's marked as up
5561
    if instance.admin_up:
5562
      feedback_fn("* activating the instance's disks on target node")
5563
      logging.info("Starting instance %s on node %s",
5564
                   instance.name, target_node)
5565

    
5566
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5567
                                           ignore_secondaries=True)
5568
      if not disks_ok:
5569
        _ShutdownInstanceDisks(self, instance)
5570
        raise errors.OpExecError("Can't activate the instance's disks")
5571

    
5572
      feedback_fn("* starting the instance on the target node")
5573
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5574
      msg = result.fail_msg
5575
      if msg:
5576
        _ShutdownInstanceDisks(self, instance)
5577
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5578
                                 (instance.name, target_node, msg))
5579

    
5580

    
5581
class LUMigrateInstance(LogicalUnit):
5582
  """Migrate an instance.
5583

5584
  This is migration without shutting down, compared to the failover,
5585
  which is done with shutdown.
5586

5587
  """
5588
  HPATH = "instance-migrate"
5589
  HTYPE = constants.HTYPE_INSTANCE
5590
  _OP_PARAMS = [
5591
    _PInstanceName,
5592
    _PMigrationMode,
5593
    _PMigrationLive,
5594
    ("cleanup", False, ht.TBool),
5595
    ]
5596

    
5597
  REQ_BGL = False
5598

    
5599
  def ExpandNames(self):
5600
    self._ExpandAndLockInstance()
5601

    
5602
    self.needed_locks[locking.LEVEL_NODE] = []
5603
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5604

    
5605
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5606
                                       self.op.cleanup)
5607
    self.tasklets = [self._migrater]
5608

    
5609
  def DeclareLocks(self, level):
5610
    if level == locking.LEVEL_NODE:
5611
      self._LockInstancesNodes()
5612

    
5613
  def BuildHooksEnv(self):
5614
    """Build hooks env.
5615

5616
    This runs on master, primary and secondary nodes of the instance.
5617

5618
    """
5619
    instance = self._migrater.instance
5620
    source_node = instance.primary_node
5621
    target_node = instance.secondary_nodes[0]
5622
    env = _BuildInstanceHookEnvByObject(self, instance)
5623
    env["MIGRATE_LIVE"] = self._migrater.live
5624
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5625
    env.update({
5626
        "OLD_PRIMARY": source_node,
5627
        "OLD_SECONDARY": target_node,
5628
        "NEW_PRIMARY": target_node,
5629
        "NEW_SECONDARY": source_node,
5630
        })
5631
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5632
    nl_post = list(nl)
5633
    nl_post.append(source_node)
5634
    return env, nl, nl_post
5635

    
5636

    
5637
class LUMoveInstance(LogicalUnit):
5638
  """Move an instance by data-copying.
5639

5640
  """
5641
  HPATH = "instance-move"
5642
  HTYPE = constants.HTYPE_INSTANCE
5643
  _OP_PARAMS = [
5644
    _PInstanceName,
5645
    ("target_node", ht.NoDefault, ht.TNonEmptyString),
5646
    _PShutdownTimeout,
5647
    ]
5648
  REQ_BGL = False
5649

    
5650
  def ExpandNames(self):
5651
    self._ExpandAndLockInstance()
5652
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5653
    self.op.target_node = target_node
5654
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5655
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5656

    
5657
  def DeclareLocks(self, level):
5658
    if level == locking.LEVEL_NODE:
5659
      self._LockInstancesNodes(primary_only=True)
5660

    
5661
  def BuildHooksEnv(self):
5662
    """Build hooks env.
5663

5664
    This runs on master, primary and secondary nodes of the instance.
5665

5666
    """
5667
    env = {
5668
      "TARGET_NODE": self.op.target_node,
5669
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5670
      }
5671
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5672
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5673
                                       self.op.target_node]
5674
    return env, nl, nl
5675

    
5676
  def CheckPrereq(self):
5677
    """Check prerequisites.
5678

5679
    This checks that the instance is in the cluster.
5680

5681
    """
5682
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5683
    assert self.instance is not None, \
5684
      "Cannot retrieve locked instance %s" % self.op.instance_name
5685

    
5686
    node = self.cfg.GetNodeInfo(self.op.target_node)
5687
    assert node is not None, \
5688
      "Cannot retrieve locked node %s" % self.op.target_node
5689

    
5690
    self.target_node = target_node = node.name
5691

    
5692
    if target_node == instance.primary_node:
5693
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5694
                                 (instance.name, target_node),
5695
                                 errors.ECODE_STATE)
5696

    
5697
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5698

    
5699
    for idx, dsk in enumerate(instance.disks):
5700
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5701
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5702
                                   " cannot copy" % idx, errors.ECODE_STATE)
5703

    
5704
    _CheckNodeOnline(self, target_node)
5705
    _CheckNodeNotDrained(self, target_node)
5706

    
5707
    if instance.admin_up:
5708
      # check memory requirements on the secondary node
5709
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5710
                           instance.name, bep[constants.BE_MEMORY],
5711
                           instance.hypervisor)
5712
    else:
5713
      self.LogInfo("Not checking memory on the secondary node as"
5714
                   " instance will not be started")
5715

    
5716
    # check bridge existance
5717
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5718

    
5719
  def Exec(self, feedback_fn):
5720
    """Move an instance.
5721

5722
    The move is done by shutting it down on its present node, copying
5723
    the data over (slow) and starting it on the new node.
5724

5725
    """
5726
    instance = self.instance
5727

    
5728
    source_node = instance.primary_node
5729
    target_node = self.target_node
5730

    
5731
    self.LogInfo("Shutting down instance %s on source node %s",
5732
                 instance.name, source_node)
5733

    
5734
    result = self.rpc.call_instance_shutdown(source_node, instance,
5735
                                             self.op.shutdown_timeout)
5736
    msg = result.fail_msg
5737
    if msg:
5738
      if self.op.ignore_consistency:
5739
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5740
                             " Proceeding anyway. Please make sure node"
5741
                             " %s is down. Error details: %s",
5742
                             instance.name, source_node, source_node, msg)
5743
      else:
5744
        raise errors.OpExecError("Could not shutdown instance %s on"
5745
                                 " node %s: %s" %
5746
                                 (instance.name, source_node, msg))
5747

    
5748
    # create the target disks
5749
    try:
5750
      _CreateDisks(self, instance, target_node=target_node)
5751
    except errors.OpExecError:
5752
      self.LogWarning("Device creation failed, reverting...")
5753
      try:
5754
        _RemoveDisks(self, instance, target_node=target_node)
5755
      finally:
5756
        self.cfg.ReleaseDRBDMinors(instance.name)
5757
        raise
5758

    
5759
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5760

    
5761
    errs = []
5762
    # activate, get path, copy the data over
5763
    for idx, disk in enumerate(instance.disks):
5764
      self.LogInfo("Copying data for disk %d", idx)
5765
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5766
                                               instance.name, True)
5767
      if result.fail_msg:
5768
        self.LogWarning("Can't assemble newly created disk %d: %s",
5769
                        idx, result.fail_msg)
5770
        errs.append(result.fail_msg)
5771
        break
5772
      dev_path = result.payload
5773
      result = self.rpc.call_blockdev_export(source_node, disk,
5774
                                             target_node, dev_path,
5775
                                             cluster_name)
5776
      if result.fail_msg:
5777
        self.LogWarning("Can't copy data over for disk %d: %s",
5778
                        idx, result.fail_msg)
5779
        errs.append(result.fail_msg)
5780
        break
5781

    
5782
    if errs:
5783
      self.LogWarning("Some disks failed to copy, aborting")
5784
      try:
5785
        _RemoveDisks(self, instance, target_node=target_node)
5786
      finally:
5787
        self.cfg.ReleaseDRBDMinors(instance.name)
5788
        raise errors.OpExecError("Errors during disk copy: %s" %
5789
                                 (",".join(errs),))
5790

    
5791
    instance.primary_node = target_node
5792
    self.cfg.Update(instance, feedback_fn)
5793

    
5794
    self.LogInfo("Removing the disks on the original node")
5795
    _RemoveDisks(self, instance, target_node=source_node)
5796

    
5797
    # Only start the instance if it's marked as up
5798
    if instance.admin_up:
5799
      self.LogInfo("Starting instance %s on node %s",
5800
                   instance.name, target_node)
5801

    
5802
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5803
                                           ignore_secondaries=True)
5804
      if not disks_ok:
5805
        _ShutdownInstanceDisks(self, instance)
5806
        raise errors.OpExecError("Can't activate the instance's disks")
5807

    
5808
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5809
      msg = result.fail_msg
5810
      if msg:
5811
        _ShutdownInstanceDisks(self, instance)
5812
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5813
                                 (instance.name, target_node, msg))
5814

    
5815

    
5816
class LUMigrateNode(LogicalUnit):
5817
  """Migrate all instances from a node.
5818

5819
  """
5820
  HPATH = "node-migrate"
5821
  HTYPE = constants.HTYPE_NODE
5822
  _OP_PARAMS = [
5823
    _PNodeName,
5824
    _PMigrationMode,
5825
    _PMigrationLive,
5826
    ]
5827
  REQ_BGL = False
5828

    
5829
  def ExpandNames(self):
5830
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5831

    
5832
    self.needed_locks = {
5833
      locking.LEVEL_NODE: [self.op.node_name],
5834
      }
5835

    
5836
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5837

    
5838
    # Create tasklets for migrating instances for all instances on this node
5839
    names = []
5840
    tasklets = []
5841

    
5842
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5843
      logging.debug("Migrating instance %s", inst.name)
5844
      names.append(inst.name)
5845

    
5846
      tasklets.append(TLMigrateInstance(self, inst.name, False))
5847

    
5848
    self.tasklets = tasklets
5849

    
5850
    # Declare instance locks
5851
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5852

    
5853
  def DeclareLocks(self, level):
5854
    if level == locking.LEVEL_NODE:
5855
      self._LockInstancesNodes()
5856

    
5857
  def BuildHooksEnv(self):
5858
    """Build hooks env.
5859

5860
    This runs on the master, the primary and all the secondaries.
5861

5862
    """
5863
    env = {
5864
      "NODE_NAME": self.op.node_name,
5865
      }
5866

    
5867
    nl = [self.cfg.GetMasterNode()]
5868

    
5869
    return (env, nl, nl)
5870

    
5871

    
5872
class TLMigrateInstance(Tasklet):
5873
  """Tasklet class for instance migration.
5874

5875
  @type live: boolean
5876
  @ivar live: whether the migration will be done live or non-live;
5877
      this variable is initalized only after CheckPrereq has run
5878

5879
  """
5880
  def __init__(self, lu, instance_name, cleanup):
5881
    """Initializes this class.
5882

5883
    """
5884
    Tasklet.__init__(self, lu)
5885

    
5886
    # Parameters
5887
    self.instance_name = instance_name
5888
    self.cleanup = cleanup
5889
    self.live = False # will be overridden later
5890

    
5891
  def CheckPrereq(self):
5892
    """Check prerequisites.
5893

5894
    This checks that the instance is in the cluster.
5895

5896
    """
5897
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5898
    instance = self.cfg.GetInstanceInfo(instance_name)
5899
    assert instance is not None
5900

    
5901
    if instance.disk_template != constants.DT_DRBD8:
5902
      raise errors.OpPrereqError("Instance's disk layout is not"
5903
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5904

    
5905
    secondary_nodes = instance.secondary_nodes
5906
    if not secondary_nodes:
5907
      raise errors.ConfigurationError("No secondary node but using"
5908
                                      " drbd8 disk template")
5909

    
5910
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5911

    
5912
    target_node = secondary_nodes[0]
5913
    # check memory requirements on the secondary node
5914
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5915
                         instance.name, i_be[constants.BE_MEMORY],
5916
                         instance.hypervisor)
5917

    
5918
    # check bridge existance
5919
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5920

    
5921
    if not self.cleanup:
5922
      _CheckNodeNotDrained(self.lu, target_node)
5923
      result = self.rpc.call_instance_migratable(instance.primary_node,
5924
                                                 instance)
5925
      result.Raise("Can't migrate, please use failover",
5926
                   prereq=True, ecode=errors.ECODE_STATE)
5927

    
5928
    self.instance = instance
5929

    
5930
    if self.lu.op.live is not None and self.lu.op.mode is not None:
5931
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5932
                                 " parameters are accepted",
5933
                                 errors.ECODE_INVAL)
5934
    if self.lu.op.live is not None:
5935
      if self.lu.op.live:
5936
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
5937
      else:
5938
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5939
      # reset the 'live' parameter to None so that repeated
5940
      # invocations of CheckPrereq do not raise an exception
5941
      self.lu.op.live = None
5942
    elif self.lu.op.mode is None:
5943
      # read the default value from the hypervisor
5944
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5945
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5946

    
5947
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5948

    
5949
  def _WaitUntilSync(self):
5950
    """Poll with custom rpc for disk sync.
5951

5952
    This uses our own step-based rpc call.
5953

5954
    """
5955
    self.feedback_fn("* wait until resync is done")
5956
    all_done = False
5957
    while not all_done:
5958
      all_done = True
5959
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5960
                                            self.nodes_ip,
5961
                                            self.instance.disks)
5962
      min_percent = 100
5963
      for node, nres in result.items():
5964
        nres.Raise("Cannot resync disks on node %s" % node)
5965
        node_done, node_percent = nres.payload
5966
        all_done = all_done and node_done
5967
        if node_percent is not None:
5968
          min_percent = min(min_percent, node_percent)
5969
      if not all_done:
5970
        if min_percent < 100:
5971
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5972
        time.sleep(2)
5973

    
5974
  def _EnsureSecondary(self, node):
5975
    """Demote a node to secondary.
5976

5977
    """
5978
    self.feedback_fn("* switching node %s to secondary mode" % node)
5979

    
5980
    for dev in self.instance.disks:
5981
      self.cfg.SetDiskID(dev, node)
5982

    
5983
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5984
                                          self.instance.disks)
5985
    result.Raise("Cannot change disk to secondary on node %s" % node)
5986

    
5987
  def _GoStandalone(self):
5988
    """Disconnect from the network.
5989

5990
    """
5991
    self.feedback_fn("* changing into standalone mode")
5992
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5993
                                               self.instance.disks)
5994
    for node, nres in result.items():
5995
      nres.Raise("Cannot disconnect disks node %s" % node)
5996

    
5997
  def _GoReconnect(self, multimaster):
5998
    """Reconnect to the network.
5999

6000
    """
6001
    if multimaster:
6002
      msg = "dual-master"
6003
    else:
6004
      msg = "single-master"
6005
    self.feedback_fn("* changing disks into %s mode" % msg)
6006
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6007
                                           self.instance.disks,
6008
                                           self.instance.name, multimaster)
6009
    for node, nres in result.items():
6010
      nres.Raise("Cannot change disks config on node %s" % node)
6011

    
6012
  def _ExecCleanup(self):
6013
    """Try to cleanup after a failed migration.
6014

6015
    The cleanup is done by:
6016
      - check that the instance is running only on one node
6017
        (and update the config if needed)
6018
      - change disks on its secondary node to secondary
6019
      - wait until disks are fully synchronized
6020
      - disconnect from the network
6021
      - change disks into single-master mode
6022
      - wait again until disks are fully synchronized
6023

6024
    """
6025
    instance = self.instance
6026
    target_node = self.target_node
6027
    source_node = self.source_node
6028

    
6029
    # check running on only one node
6030
    self.feedback_fn("* checking where the instance actually runs"
6031
                     " (if this hangs, the hypervisor might be in"
6032
                     " a bad state)")
6033
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6034
    for node, result in ins_l.items():
6035
      result.Raise("Can't contact node %s" % node)
6036

    
6037
    runningon_source = instance.name in ins_l[source_node].payload
6038
    runningon_target = instance.name in ins_l[target_node].payload
6039

    
6040
    if runningon_source and runningon_target:
6041
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6042
                               " or the hypervisor is confused. You will have"
6043
                               " to ensure manually that it runs only on one"
6044
                               " and restart this operation.")
6045

    
6046
    if not (runningon_source or runningon_target):
6047
      raise errors.OpExecError("Instance does not seem to be running at all."
6048
                               " In this case, it's safer to repair by"
6049
                               " running 'gnt-instance stop' to ensure disk"
6050
                               " shutdown, and then restarting it.")
6051

    
6052
    if runningon_target:
6053
      # the migration has actually succeeded, we need to update the config
6054
      self.feedback_fn("* instance running on secondary node (%s),"
6055
                       " updating config" % target_node)
6056
      instance.primary_node = target_node
6057
      self.cfg.Update(instance, self.feedback_fn)
6058
      demoted_node = source_node
6059
    else:
6060
      self.feedback_fn("* instance confirmed to be running on its"
6061
                       " primary node (%s)" % source_node)
6062
      demoted_node = target_node
6063

    
6064
    self._EnsureSecondary(demoted_node)
6065
    try:
6066
      self._WaitUntilSync()
6067
    except errors.OpExecError:
6068
      # we ignore here errors, since if the device is standalone, it
6069
      # won't be able to sync
6070
      pass
6071
    self._GoStandalone()
6072
    self._GoReconnect(False)
6073
    self._WaitUntilSync()
6074

    
6075
    self.feedback_fn("* done")
6076

    
6077
  def _RevertDiskStatus(self):
6078
    """Try to revert the disk status after a failed migration.
6079

6080
    """
6081
    target_node = self.target_node
6082
    try:
6083
      self._EnsureSecondary(target_node)
6084
      self._GoStandalone()
6085
      self._GoReconnect(False)
6086
      self._WaitUntilSync()
6087
    except errors.OpExecError, err:
6088
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6089
                         " drives: error '%s'\n"
6090
                         "Please look and recover the instance status" %
6091
                         str(err))
6092

    
6093
  def _AbortMigration(self):
6094
    """Call the hypervisor code to abort a started migration.
6095

6096
    """
6097
    instance = self.instance
6098
    target_node = self.target_node
6099
    migration_info = self.migration_info
6100

    
6101
    abort_result = self.rpc.call_finalize_migration(target_node,
6102
                                                    instance,
6103
                                                    migration_info,
6104
                                                    False)
6105
    abort_msg = abort_result.fail_msg
6106
    if abort_msg:
6107
      logging.error("Aborting migration failed on target node %s: %s",
6108
                    target_node, abort_msg)
6109
      # Don't raise an exception here, as we stil have to try to revert the
6110
      # disk status, even if this step failed.
6111

    
6112
  def _ExecMigration(self):
6113
    """Migrate an instance.
6114

6115
    The migrate is done by:
6116
      - change the disks into dual-master mode
6117
      - wait until disks are fully synchronized again
6118
      - migrate the instance
6119
      - change disks on the new secondary node (the old primary) to secondary
6120
      - wait until disks are fully synchronized
6121
      - change disks into single-master mode
6122

6123
    """
6124
    instance = self.instance
6125
    target_node = self.target_node
6126
    source_node = self.source_node
6127

    
6128
    self.feedback_fn("* checking disk consistency between source and target")
6129
    for dev in instance.disks:
6130
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6131
        raise errors.OpExecError("Disk %s is degraded or not fully"
6132
                                 " synchronized on target node,"
6133
                                 " aborting migrate." % dev.iv_name)
6134

    
6135
    # First get the migration information from the remote node
6136
    result = self.rpc.call_migration_info(source_node, instance)
6137
    msg = result.fail_msg
6138
    if msg:
6139
      log_err = ("Failed fetching source migration information from %s: %s" %
6140
                 (source_node, msg))
6141
      logging.error(log_err)
6142
      raise errors.OpExecError(log_err)
6143

    
6144
    self.migration_info = migration_info = result.payload
6145

    
6146
    # Then switch the disks to master/master mode
6147
    self._EnsureSecondary(target_node)
6148
    self._GoStandalone()
6149
    self._GoReconnect(True)
6150
    self._WaitUntilSync()
6151

    
6152
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6153
    result = self.rpc.call_accept_instance(target_node,
6154
                                           instance,
6155
                                           migration_info,
6156
                                           self.nodes_ip[target_node])
6157

    
6158
    msg = result.fail_msg
6159
    if msg:
6160
      logging.error("Instance pre-migration failed, trying to revert"
6161
                    " disk status: %s", msg)
6162
      self.feedback_fn("Pre-migration failed, aborting")
6163
      self._AbortMigration()
6164
      self._RevertDiskStatus()
6165
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6166
                               (instance.name, msg))
6167

    
6168
    self.feedback_fn("* migrating instance to %s" % target_node)
6169
    time.sleep(10)
6170
    result = self.rpc.call_instance_migrate(source_node, instance,
6171
                                            self.nodes_ip[target_node],
6172
                                            self.live)
6173
    msg = result.fail_msg
6174
    if msg:
6175
      logging.error("Instance migration failed, trying to revert"
6176
                    " disk status: %s", msg)
6177
      self.feedback_fn("Migration failed, aborting")
6178
      self._AbortMigration()
6179
      self._RevertDiskStatus()
6180
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6181
                               (instance.name, msg))
6182
    time.sleep(10)
6183

    
6184
    instance.primary_node = target_node
6185
    # distribute new instance config to the other nodes
6186
    self.cfg.Update(instance, self.feedback_fn)
6187

    
6188
    result = self.rpc.call_finalize_migration(target_node,
6189
                                              instance,
6190
                                              migration_info,
6191
                                              True)
6192
    msg = result.fail_msg
6193
    if msg:
6194
      logging.error("Instance migration succeeded, but finalization failed:"
6195
                    " %s", msg)
6196
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6197
                               msg)
6198

    
6199
    self._EnsureSecondary(source_node)
6200
    self._WaitUntilSync()
6201
    self._GoStandalone()
6202
    self._GoReconnect(False)
6203
    self._WaitUntilSync()
6204

    
6205
    self.feedback_fn("* done")
6206

    
6207
  def Exec(self, feedback_fn):
6208
    """Perform the migration.
6209

6210
    """
6211
    feedback_fn("Migrating instance %s" % self.instance.name)
6212

    
6213
    self.feedback_fn = feedback_fn
6214

    
6215
    self.source_node = self.instance.primary_node
6216
    self.target_node = self.instance.secondary_nodes[0]
6217
    self.all_nodes = [self.source_node, self.target_node]
6218
    self.nodes_ip = {
6219
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6220
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6221
      }
6222

    
6223
    if self.cleanup:
6224
      return self._ExecCleanup()
6225
    else:
6226
      return self._ExecMigration()
6227

    
6228

    
6229
def _CreateBlockDev(lu, node, instance, device, force_create,
6230
                    info, force_open):
6231
  """Create a tree of block devices on a given node.
6232

6233
  If this device type has to be created on secondaries, create it and
6234
  all its children.
6235

6236
  If not, just recurse to children keeping the same 'force' value.
6237

6238
  @param lu: the lu on whose behalf we execute
6239
  @param node: the node on which to create the device
6240
  @type instance: L{objects.Instance}
6241
  @param instance: the instance which owns the device
6242
  @type device: L{objects.Disk}
6243
  @param device: the device to create
6244
  @type force_create: boolean
6245
  @param force_create: whether to force creation of this device; this
6246
      will be change to True whenever we find a device which has
6247
      CreateOnSecondary() attribute
6248
  @param info: the extra 'metadata' we should attach to the device
6249
      (this will be represented as a LVM tag)
6250
  @type force_open: boolean
6251
  @param force_open: this parameter will be passes to the
6252
      L{backend.BlockdevCreate} function where it specifies
6253
      whether we run on primary or not, and it affects both
6254
      the child assembly and the device own Open() execution
6255

6256
  """
6257
  if device.CreateOnSecondary():
6258
    force_create = True
6259

    
6260
  if device.children:
6261
    for child in device.children:
6262
      _CreateBlockDev(lu, node, instance, child, force_create,
6263
                      info, force_open)
6264

    
6265
  if not force_create:
6266
    return
6267

    
6268
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6269

    
6270

    
6271
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6272
  """Create a single block device on a given node.
6273

6274
  This will not recurse over children of the device, so they must be
6275
  created in advance.
6276

6277
  @param lu: the lu on whose behalf we execute
6278
  @param node: the node on which to create the device
6279
  @type instance: L{objects.Instance}
6280
  @param instance: the instance which owns the device
6281
  @type device: L{objects.Disk}
6282
  @param device: the device to create
6283
  @param info: the extra 'metadata' we should attach to the device
6284
      (this will be represented as a LVM tag)
6285
  @type force_open: boolean
6286
  @param force_open: this parameter will be passes to the
6287
      L{backend.BlockdevCreate} function where it specifies
6288
      whether we run on primary or not, and it affects both
6289
      the child assembly and the device own Open() execution
6290

6291
  """
6292
  lu.cfg.SetDiskID(device, node)
6293
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6294
                                       instance.name, force_open, info)
6295
  result.Raise("Can't create block device %s on"
6296
               " node %s for instance %s" % (device, node, instance.name))
6297
  if device.physical_id is None:
6298
    device.physical_id = result.payload
6299

    
6300

    
6301
def _GenerateUniqueNames(lu, exts):
6302
  """Generate a suitable LV name.
6303

6304
  This will generate a logical volume name for the given instance.
6305

6306
  """
6307
  results = []
6308
  for val in exts:
6309
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6310
    results.append("%s%s" % (new_id, val))
6311
  return results
6312

    
6313

    
6314
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6315
                         p_minor, s_minor):
6316
  """Generate a drbd8 device complete with its children.
6317

6318
  """
6319
  port = lu.cfg.AllocatePort()
6320
  vgname = lu.cfg.GetVGName()
6321
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6322
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6323
                          logical_id=(vgname, names[0]))
6324
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6325
                          logical_id=(vgname, names[1]))
6326
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6327
                          logical_id=(primary, secondary, port,
6328
                                      p_minor, s_minor,
6329
                                      shared_secret),
6330
                          children=[dev_data, dev_meta],
6331
                          iv_name=iv_name)
6332
  return drbd_dev
6333

    
6334

    
6335
def _GenerateDiskTemplate(lu, template_name,
6336
                          instance_name, primary_node,
6337
                          secondary_nodes, disk_info,
6338
                          file_storage_dir, file_driver,
6339
                          base_index):
6340
  """Generate the entire disk layout for a given template type.
6341

6342
  """
6343
  #TODO: compute space requirements
6344

    
6345
  vgname = lu.cfg.GetVGName()
6346
  disk_count = len(disk_info)
6347
  disks = []
6348
  if template_name == constants.DT_DISKLESS:
6349
    pass
6350
  elif template_name == constants.DT_PLAIN:
6351
    if len(secondary_nodes) != 0:
6352
      raise errors.ProgrammerError("Wrong template configuration")
6353

    
6354
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6355
                                      for i in range(disk_count)])
6356
    for idx, disk in enumerate(disk_info):
6357
      disk_index = idx + base_index
6358
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6359
                              logical_id=(vgname, names[idx]),
6360
                              iv_name="disk/%d" % disk_index,
6361
                              mode=disk["mode"])
6362
      disks.append(disk_dev)
6363
  elif template_name == constants.DT_DRBD8:
6364
    if len(secondary_nodes) != 1:
6365
      raise errors.ProgrammerError("Wrong template configuration")
6366
    remote_node = secondary_nodes[0]
6367
    minors = lu.cfg.AllocateDRBDMinor(
6368
      [primary_node, remote_node] * len(disk_info), instance_name)
6369

    
6370
    names = []
6371
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6372
                                               for i in range(disk_count)]):
6373
      names.append(lv_prefix + "_data")
6374
      names.append(lv_prefix + "_meta")
6375
    for idx, disk in enumerate(disk_info):
6376
      disk_index = idx + base_index
6377
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6378
                                      disk["size"], names[idx*2:idx*2+2],
6379
                                      "disk/%d" % disk_index,
6380
                                      minors[idx*2], minors[idx*2+1])
6381
      disk_dev.mode = disk["mode"]
6382
      disks.append(disk_dev)
6383
  elif template_name == constants.DT_FILE:
6384
    if len(secondary_nodes) != 0:
6385
      raise errors.ProgrammerError("Wrong template configuration")
6386

    
6387
    _RequireFileStorage()
6388

    
6389
    for idx, disk in enumerate(disk_info):
6390
      disk_index = idx + base_index
6391
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6392
                              iv_name="disk/%d" % disk_index,
6393
                              logical_id=(file_driver,
6394
                                          "%s/disk%d" % (file_storage_dir,
6395
                                                         disk_index)),
6396
                              mode=disk["mode"])
6397
      disks.append(disk_dev)
6398
  else:
6399
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6400
  return disks
6401

    
6402

    
6403
def _GetInstanceInfoText(instance):
6404
  """Compute that text that should be added to the disk's metadata.
6405

6406
  """
6407
  return "originstname+%s" % instance.name
6408

    
6409

    
6410
def _CalcEta(time_taken, written, total_size):
6411
  """Calculates the ETA based on size written and total size.
6412

6413
  @param time_taken: The time taken so far
6414
  @param written: amount written so far
6415
  @param total_size: The total size of data to be written
6416
  @return: The remaining time in seconds
6417

6418
  """
6419
  avg_time = time_taken / float(written)
6420
  return (total_size - written) * avg_time
6421

    
6422

    
6423
def _WipeDisks(lu, instance):
6424
  """Wipes instance disks.
6425

6426
  @type lu: L{LogicalUnit}
6427
  @param lu: the logical unit on whose behalf we execute
6428
  @type instance: L{objects.Instance}
6429
  @param instance: the instance whose disks we should create
6430
  @return: the success of the wipe
6431

6432
  """
6433
  node = instance.primary_node
6434
  for idx, device in enumerate(instance.disks):
6435
    lu.LogInfo("* Wiping disk %d", idx)
6436
    logging.info("Wiping disk %d for instance %s", idx, instance.name)
6437

    
6438
    # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6439
    # MAX_WIPE_CHUNK at max
6440
    wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6441
                          constants.MIN_WIPE_CHUNK_PERCENT)
6442

    
6443
    offset = 0
6444
    size = device.size
6445
    last_output = 0
6446
    start_time = time.time()
6447

    
6448
    while offset < size:
6449
      wipe_size = min(wipe_chunk_size, size - offset)
6450
      result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6451
      result.Raise("Could not wipe disk %d at offset %d for size %d" %
6452
                   (idx, offset, wipe_size))
6453
      now = time.time()
6454
      offset += wipe_size
6455
      if now - last_output >= 60:
6456
        eta = _CalcEta(now - start_time, offset, size)
6457
        lu.LogInfo(" - done: %.1f%% ETA: %s" %
6458
                   (offset / float(size) * 100, utils.FormatSeconds(eta)))
6459
        last_output = now
6460

    
6461

    
6462
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6463
  """Create all disks for an instance.
6464

6465
  This abstracts away some work from AddInstance.
6466

6467
  @type lu: L{LogicalUnit}
6468
  @param lu: the logical unit on whose behalf we execute
6469
  @type instance: L{objects.Instance}
6470
  @param instance: the instance whose disks we should create
6471
  @type to_skip: list
6472
  @param to_skip: list of indices to skip
6473
  @type target_node: string
6474
  @param target_node: if passed, overrides the target node for creation
6475
  @rtype: boolean
6476
  @return: the success of the creation
6477

6478
  """
6479
  info = _GetInstanceInfoText(instance)
6480
  if target_node is None:
6481
    pnode = instance.primary_node
6482
    all_nodes = instance.all_nodes
6483
  else:
6484
    pnode = target_node
6485
    all_nodes = [pnode]
6486

    
6487
  if instance.disk_template == constants.DT_FILE:
6488
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6489
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6490

    
6491
    result.Raise("Failed to create directory '%s' on"
6492
                 " node %s" % (file_storage_dir, pnode))
6493

    
6494
  # Note: this needs to be kept in sync with adding of disks in
6495
  # LUSetInstanceParams
6496
  for idx, device in enumerate(instance.disks):
6497
    if to_skip and idx in to_skip:
6498
      continue
6499
    logging.info("Creating volume %s for instance %s",
6500
                 device.iv_name, instance.name)
6501
    #HARDCODE
6502
    for node in all_nodes:
6503
      f_create = node == pnode
6504
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6505

    
6506

    
6507
def _RemoveDisks(lu, instance, target_node=None):
6508
  """Remove all disks for an instance.
6509

6510
  This abstracts away some work from `AddInstance()` and
6511
  `RemoveInstance()`. Note that in case some of the devices couldn't
6512
  be removed, the removal will continue with the other ones (compare
6513
  with `_CreateDisks()`).
6514

6515
  @type lu: L{LogicalUnit}
6516
  @param lu: the logical unit on whose behalf we execute
6517
  @type instance: L{objects.Instance}
6518
  @param instance: the instance whose disks we should remove
6519
  @type target_node: string
6520
  @param target_node: used to override the node on which to remove the disks
6521
  @rtype: boolean
6522
  @return: the success of the removal
6523

6524
  """
6525
  logging.info("Removing block devices for instance %s", instance.name)
6526

    
6527
  all_result = True
6528
  for device in instance.disks:
6529
    if target_node:
6530
      edata = [(target_node, device)]
6531
    else:
6532
      edata = device.ComputeNodeTree(instance.primary_node)
6533
    for node, disk in edata:
6534
      lu.cfg.SetDiskID(disk, node)
6535
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6536
      if msg:
6537
        lu.LogWarning("Could not remove block device %s on node %s,"
6538
                      " continuing anyway: %s", device.iv_name, node, msg)
6539
        all_result = False
6540

    
6541
  if instance.disk_template == constants.DT_FILE:
6542
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6543
    if target_node:
6544
      tgt = target_node
6545
    else:
6546
      tgt = instance.primary_node
6547
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6548
    if result.fail_msg:
6549
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6550
                    file_storage_dir, instance.primary_node, result.fail_msg)
6551
      all_result = False
6552

    
6553
  return all_result
6554

    
6555

    
6556
def _ComputeDiskSize(disk_template, disks):
6557
  """Compute disk size requirements in the volume group
6558

6559
  """
6560
  # Required free disk space as a function of disk and swap space
6561
  req_size_dict = {
6562
    constants.DT_DISKLESS: None,
6563
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6564
    # 128 MB are added for drbd metadata for each disk
6565
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6566
    constants.DT_FILE: None,
6567
  }
6568

    
6569
  if disk_template not in req_size_dict:
6570
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6571
                                 " is unknown" %  disk_template)
6572

    
6573
  return req_size_dict[disk_template]
6574

    
6575

    
6576
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6577
  """Hypervisor parameter validation.
6578

6579
  This function abstract the hypervisor parameter validation to be
6580
  used in both instance create and instance modify.
6581

6582
  @type lu: L{LogicalUnit}
6583
  @param lu: the logical unit for which we check
6584
  @type nodenames: list
6585
  @param nodenames: the list of nodes on which we should check
6586
  @type hvname: string
6587
  @param hvname: the name of the hypervisor we should use
6588
  @type hvparams: dict
6589
  @param hvparams: the parameters which we need to check
6590
  @raise errors.OpPrereqError: if the parameters are not valid
6591

6592
  """
6593
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6594
                                                  hvname,
6595
                                                  hvparams)
6596
  for node in nodenames:
6597
    info = hvinfo[node]
6598
    if info.offline:
6599
      continue
6600
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6601

    
6602

    
6603
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6604
  """OS parameters validation.
6605

6606
  @type lu: L{LogicalUnit}
6607
  @param lu: the logical unit for which we check
6608
  @type required: boolean
6609
  @param required: whether the validation should fail if the OS is not
6610
      found
6611
  @type nodenames: list
6612
  @param nodenames: the list of nodes on which we should check
6613
  @type osname: string
6614
  @param osname: the name of the hypervisor we should use
6615
  @type osparams: dict
6616
  @param osparams: the parameters which we need to check
6617
  @raise errors.OpPrereqError: if the parameters are not valid
6618

6619
  """
6620
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6621
                                   [constants.OS_VALIDATE_PARAMETERS],
6622
                                   osparams)
6623
  for node, nres in result.items():
6624
    # we don't check for offline cases since this should be run only
6625
    # against the master node and/or an instance's nodes
6626
    nres.Raise("OS Parameters validation failed on node %s" % node)
6627
    if not nres.payload:
6628
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6629
                 osname, node)
6630

    
6631

    
6632
class LUCreateInstance(LogicalUnit):
6633
  """Create an instance.
6634

6635
  """
6636
  HPATH = "instance-add"
6637
  HTYPE = constants.HTYPE_INSTANCE
6638
  _OP_PARAMS = [
6639
    _PInstanceName,
6640
    ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
6641
    ("start", True, ht.TBool),
6642
    ("wait_for_sync", True, ht.TBool),
6643
    ("ip_check", True, ht.TBool),
6644
    ("name_check", True, ht.TBool),
6645
    ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
6646
    ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
6647
    ("hvparams", ht.EmptyDict, ht.TDict),
6648
    ("beparams", ht.EmptyDict, ht.TDict),
6649
    ("osparams", ht.EmptyDict, ht.TDict),
6650
    ("no_install", None, ht.TMaybeBool),
6651
    ("os_type", None, ht.TMaybeString),
6652
    ("force_variant", False, ht.TBool),
6653
    ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
6654
    ("source_x509_ca", None, ht.TMaybeString),
6655
    ("source_instance_name", None, ht.TMaybeString),
6656
    ("src_node", None, ht.TMaybeString),
6657
    ("src_path", None, ht.TMaybeString),
6658
    ("pnode", None, ht.TMaybeString),
6659
    ("snode", None, ht.TMaybeString),
6660
    ("iallocator", None, ht.TMaybeString),
6661
    ("hypervisor", None, ht.TMaybeString),
6662
    ("disk_template", ht.NoDefault, _CheckDiskTemplate),
6663
    ("identify_defaults", False, ht.TBool),
6664
    ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
6665
    ("file_storage_dir", None, ht.TMaybeString),
6666
    ]
6667
  REQ_BGL = False
6668

    
6669
  def CheckArguments(self):
6670
    """Check arguments.
6671

6672
    """
6673
    # do not require name_check to ease forward/backward compatibility
6674
    # for tools
6675
    if self.op.no_install and self.op.start:
6676
      self.LogInfo("No-installation mode selected, disabling startup")
6677
      self.op.start = False
6678
    # validate/normalize the instance name
6679
    self.op.instance_name = \
6680
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
6681

    
6682
    if self.op.ip_check and not self.op.name_check:
6683
      # TODO: make the ip check more flexible and not depend on the name check
6684
      raise errors.OpPrereqError("Cannot do ip check without a name check",
6685
                                 errors.ECODE_INVAL)
6686

    
6687
    # check nics' parameter names
6688
    for nic in self.op.nics:
6689
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6690

    
6691
    # check disks. parameter names and consistent adopt/no-adopt strategy
6692
    has_adopt = has_no_adopt = False
6693
    for disk in self.op.disks:
6694
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6695
      if "adopt" in disk:
6696
        has_adopt = True
6697
      else:
6698
        has_no_adopt = True
6699
    if has_adopt and has_no_adopt:
6700
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6701
                                 errors.ECODE_INVAL)
6702
    if has_adopt:
6703
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6704
        raise errors.OpPrereqError("Disk adoption is not supported for the"
6705
                                   " '%s' disk template" %
6706
                                   self.op.disk_template,
6707
                                   errors.ECODE_INVAL)
6708
      if self.op.iallocator is not None:
6709
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6710
                                   " iallocator script", errors.ECODE_INVAL)
6711
      if self.op.mode == constants.INSTANCE_IMPORT:
6712
        raise errors.OpPrereqError("Disk adoption not allowed for"
6713
                                   " instance import", errors.ECODE_INVAL)
6714

    
6715
    self.adopt_disks = has_adopt
6716

    
6717
    # instance name verification
6718
    if self.op.name_check:
6719
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6720
      self.op.instance_name = self.hostname1.name
6721
      # used in CheckPrereq for ip ping check
6722
      self.check_ip = self.hostname1.ip
6723
    else:
6724
      self.check_ip = None
6725

    
6726
    # file storage checks
6727
    if (self.op.file_driver and
6728
        not self.op.file_driver in constants.FILE_DRIVER):
6729
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6730
                                 self.op.file_driver, errors.ECODE_INVAL)
6731

    
6732
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6733
      raise errors.OpPrereqError("File storage directory path not absolute",
6734
                                 errors.ECODE_INVAL)
6735

    
6736
    ### Node/iallocator related checks
6737
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6738

    
6739
    if self.op.pnode is not None:
6740
      if self.op.disk_template in constants.DTS_NET_MIRROR:
6741
        if self.op.snode is None:
6742
          raise errors.OpPrereqError("The networked disk templates need"
6743
                                     " a mirror node", errors.ECODE_INVAL)
6744
      elif self.op.snode:
6745
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6746
                        " template")
6747
        self.op.snode = None
6748

    
6749
    self._cds = _GetClusterDomainSecret()
6750

    
6751
    if self.op.mode == constants.INSTANCE_IMPORT:
6752
      # On import force_variant must be True, because if we forced it at
6753
      # initial install, our only chance when importing it back is that it
6754
      # works again!
6755
      self.op.force_variant = True
6756

    
6757
      if self.op.no_install:
6758
        self.LogInfo("No-installation mode has no effect during import")
6759

    
6760
    elif self.op.mode == constants.INSTANCE_CREATE:
6761
      if self.op.os_type is None:
6762
        raise errors.OpPrereqError("No guest OS specified",
6763
                                   errors.ECODE_INVAL)
6764
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6765
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6766
                                   " installation" % self.op.os_type,
6767
                                   errors.ECODE_STATE)
6768
      if self.op.disk_template is None:
6769
        raise errors.OpPrereqError("No disk template specified",
6770
                                   errors.ECODE_INVAL)
6771

    
6772
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6773
      # Check handshake to ensure both clusters have the same domain secret
6774
      src_handshake = self.op.source_handshake
6775
      if not src_handshake:
6776
        raise errors.OpPrereqError("Missing source handshake",
6777
                                   errors.ECODE_INVAL)
6778

    
6779
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6780
                                                           src_handshake)
6781
      if errmsg:
6782
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6783
                                   errors.ECODE_INVAL)
6784

    
6785
      # Load and check source CA
6786
      self.source_x509_ca_pem = self.op.source_x509_ca
6787
      if not self.source_x509_ca_pem:
6788
        raise errors.OpPrereqError("Missing source X509 CA",
6789
                                   errors.ECODE_INVAL)
6790

    
6791
      try:
6792
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6793
                                                    self._cds)
6794
      except OpenSSL.crypto.Error, err:
6795
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6796
                                   (err, ), errors.ECODE_INVAL)
6797

    
6798
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6799
      if errcode is not None:
6800
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6801
                                   errors.ECODE_INVAL)
6802

    
6803
      self.source_x509_ca = cert
6804

    
6805
      src_instance_name = self.op.source_instance_name
6806
      if not src_instance_name:
6807
        raise errors.OpPrereqError("Missing source instance name",
6808
                                   errors.ECODE_INVAL)
6809

    
6810
      self.source_instance_name = \
6811
          netutils.GetHostname(name=src_instance_name).name
6812

    
6813
    else:
6814
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6815
                                 self.op.mode, errors.ECODE_INVAL)
6816

    
6817
  def ExpandNames(self):
6818
    """ExpandNames for CreateInstance.
6819

6820
    Figure out the right locks for instance creation.
6821

6822
    """
6823
    self.needed_locks = {}
6824

    
6825
    instance_name = self.op.instance_name
6826
    # this is just a preventive check, but someone might still add this
6827
    # instance in the meantime, and creation will fail at lock-add time
6828
    if instance_name in self.cfg.GetInstanceList():
6829
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6830
                                 instance_name, errors.ECODE_EXISTS)
6831

    
6832
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6833

    
6834
    if self.op.iallocator:
6835
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6836
    else:
6837
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6838
      nodelist = [self.op.pnode]
6839
      if self.op.snode is not None:
6840
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6841
        nodelist.append(self.op.snode)
6842
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6843

    
6844
    # in case of import lock the source node too
6845
    if self.op.mode == constants.INSTANCE_IMPORT:
6846
      src_node = self.op.src_node
6847
      src_path = self.op.src_path
6848

    
6849
      if src_path is None:
6850
        self.op.src_path = src_path = self.op.instance_name
6851

    
6852
      if src_node is None:
6853
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6854
        self.op.src_node = None
6855
        if os.path.isabs(src_path):
6856
          raise errors.OpPrereqError("Importing an instance from an absolute"
6857
                                     " path requires a source node option.",
6858
                                     errors.ECODE_INVAL)
6859
      else:
6860
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6861
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6862
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6863
        if not os.path.isabs(src_path):
6864
          self.op.src_path = src_path = \
6865
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6866

    
6867
  def _RunAllocator(self):
6868
    """Run the allocator based on input opcode.
6869

6870
    """
6871
    nics = [n.ToDict() for n in self.nics]
6872
    ial = IAllocator(self.cfg, self.rpc,
6873
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6874
                     name=self.op.instance_name,
6875
                     disk_template=self.op.disk_template,
6876
                     tags=[],
6877
                     os=self.op.os_type,
6878
                     vcpus=self.be_full[constants.BE_VCPUS],
6879
                     mem_size=self.be_full[constants.BE_MEMORY],
6880
                     disks=self.disks,
6881
                     nics=nics,
6882
                     hypervisor=self.op.hypervisor,
6883
                     )
6884

    
6885
    ial.Run(self.op.iallocator)
6886

    
6887
    if not ial.success:
6888
      raise errors.OpPrereqError("Can't compute nodes using"
6889
                                 " iallocator '%s': %s" %
6890
                                 (self.op.iallocator, ial.info),
6891
                                 errors.ECODE_NORES)
6892
    if len(ial.result) != ial.required_nodes:
6893
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6894
                                 " of nodes (%s), required %s" %
6895
                                 (self.op.iallocator, len(ial.result),
6896
                                  ial.required_nodes), errors.ECODE_FAULT)
6897
    self.op.pnode = ial.result[0]
6898
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6899
                 self.op.instance_name, self.op.iallocator,
6900
                 utils.CommaJoin(ial.result))
6901
    if ial.required_nodes == 2:
6902
      self.op.snode = ial.result[1]
6903

    
6904
  def BuildHooksEnv(self):
6905
    """Build hooks env.
6906

6907
    This runs on master, primary and secondary nodes of the instance.
6908

6909
    """
6910
    env = {
6911
      "ADD_MODE": self.op.mode,
6912
      }
6913
    if self.op.mode == constants.INSTANCE_IMPORT:
6914
      env["SRC_NODE"] = self.op.src_node
6915
      env["SRC_PATH"] = self.op.src_path
6916
      env["SRC_IMAGES"] = self.src_images
6917

    
6918
    env.update(_BuildInstanceHookEnv(
6919
      name=self.op.instance_name,
6920
      primary_node=self.op.pnode,
6921
      secondary_nodes=self.secondaries,
6922
      status=self.op.start,
6923
      os_type=self.op.os_type,
6924
      memory=self.be_full[constants.BE_MEMORY],
6925
      vcpus=self.be_full[constants.BE_VCPUS],
6926
      nics=_NICListToTuple(self, self.nics),
6927
      disk_template=self.op.disk_template,
6928
      disks=[(d["size"], d["mode"]) for d in self.disks],
6929
      bep=self.be_full,
6930
      hvp=self.hv_full,
6931
      hypervisor_name=self.op.hypervisor,
6932
    ))
6933

    
6934
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6935
          self.secondaries)
6936
    return env, nl, nl
6937

    
6938
  def _ReadExportInfo(self):
6939
    """Reads the export information from disk.
6940

6941
    It will override the opcode source node and path with the actual
6942
    information, if these two were not specified before.
6943

6944
    @return: the export information
6945

6946
    """
6947
    assert self.op.mode == constants.INSTANCE_IMPORT
6948

    
6949
    src_node = self.op.src_node
6950
    src_path = self.op.src_path
6951

    
6952
    if src_node is None:
6953
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6954
      exp_list = self.rpc.call_export_list(locked_nodes)
6955
      found = False
6956
      for node in exp_list:
6957
        if exp_list[node].fail_msg:
6958
          continue
6959
        if src_path in exp_list[node].payload:
6960
          found = True
6961
          self.op.src_node = src_node = node
6962
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6963
                                                       src_path)
6964
          break
6965
      if not found:
6966
        raise errors.OpPrereqError("No export found for relative path %s" %
6967
                                    src_path, errors.ECODE_INVAL)
6968

    
6969
    _CheckNodeOnline(self, src_node)
6970
    result = self.rpc.call_export_info(src_node, src_path)
6971
    result.Raise("No export or invalid export found in dir %s" % src_path)
6972

    
6973
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6974
    if not export_info.has_section(constants.INISECT_EXP):
6975
      raise errors.ProgrammerError("Corrupted export config",
6976
                                   errors.ECODE_ENVIRON)
6977

    
6978
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6979
    if (int(ei_version) != constants.EXPORT_VERSION):
6980
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6981
                                 (ei_version, constants.EXPORT_VERSION),
6982
                                 errors.ECODE_ENVIRON)
6983
    return export_info
6984

    
6985
  def _ReadExportParams(self, einfo):
6986
    """Use export parameters as defaults.
6987

6988
    In case the opcode doesn't specify (as in override) some instance
6989
    parameters, then try to use them from the export information, if
6990
    that declares them.
6991

6992
    """
6993
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6994

    
6995
    if self.op.disk_template is None:
6996
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6997
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6998
                                          "disk_template")
6999
      else:
7000
        raise errors.OpPrereqError("No disk template specified and the export"
7001
                                   " is missing the disk_template information",
7002
                                   errors.ECODE_INVAL)
7003

    
7004
    if not self.op.disks:
7005
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7006
        disks = []
7007
        # TODO: import the disk iv_name too
7008
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7009
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7010
          disks.append({"size": disk_sz})
7011
        self.op.disks = disks
7012
      else:
7013
        raise errors.OpPrereqError("No disk info specified and the export"
7014
                                   " is missing the disk information",
7015
                                   errors.ECODE_INVAL)
7016

    
7017
    if (not self.op.nics and
7018
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7019
      nics = []
7020
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7021
        ndict = {}
7022
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7023
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7024
          ndict[name] = v
7025
        nics.append(ndict)
7026
      self.op.nics = nics
7027

    
7028
    if (self.op.hypervisor is None and
7029
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7030
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7031
    if einfo.has_section(constants.INISECT_HYP):
7032
      # use the export parameters but do not override the ones
7033
      # specified by the user
7034
      for name, value in einfo.items(constants.INISECT_HYP):
7035
        if name not in self.op.hvparams:
7036
          self.op.hvparams[name] = value
7037

    
7038
    if einfo.has_section(constants.INISECT_BEP):
7039
      # use the parameters, without overriding
7040
      for name, value in einfo.items(constants.INISECT_BEP):
7041
        if name not in self.op.beparams:
7042
          self.op.beparams[name] = value
7043
    else:
7044
      # try to read the parameters old style, from the main section
7045
      for name in constants.BES_PARAMETERS:
7046
        if (name not in self.op.beparams and
7047
            einfo.has_option(constants.INISECT_INS, name)):
7048
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7049

    
7050
    if einfo.has_section(constants.INISECT_OSP):
7051
      # use the parameters, without overriding
7052
      for name, value in einfo.items(constants.INISECT_OSP):
7053
        if name not in self.op.osparams:
7054
          self.op.osparams[name] = value
7055

    
7056
  def _RevertToDefaults(self, cluster):
7057
    """Revert the instance parameters to the default values.
7058

7059
    """
7060
    # hvparams
7061
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7062
    for name in self.op.hvparams.keys():
7063
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7064
        del self.op.hvparams[name]
7065
    # beparams
7066
    be_defs = cluster.SimpleFillBE({})
7067
    for name in self.op.beparams.keys():
7068
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7069
        del self.op.beparams[name]
7070
    # nic params
7071
    nic_defs = cluster.SimpleFillNIC({})
7072
    for nic in self.op.nics:
7073
      for name in constants.NICS_PARAMETERS:
7074
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7075
          del nic[name]
7076
    # osparams
7077
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7078
    for name in self.op.osparams.keys():
7079
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7080
        del self.op.osparams[name]
7081

    
7082
  def CheckPrereq(self):
7083
    """Check prerequisites.
7084

7085
    """
7086
    if self.op.mode == constants.INSTANCE_IMPORT:
7087
      export_info = self._ReadExportInfo()
7088
      self._ReadExportParams(export_info)
7089

    
7090
    _CheckDiskTemplate(self.op.disk_template)
7091

    
7092
    if (not self.cfg.GetVGName() and
7093
        self.op.disk_template not in constants.DTS_NOT_LVM):
7094
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7095
                                 " instances", errors.ECODE_STATE)
7096

    
7097
    if self.op.hypervisor is None:
7098
      self.op.hypervisor = self.cfg.GetHypervisorType()
7099

    
7100
    cluster = self.cfg.GetClusterInfo()
7101
    enabled_hvs = cluster.enabled_hypervisors
7102
    if self.op.hypervisor not in enabled_hvs:
7103
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7104
                                 " cluster (%s)" % (self.op.hypervisor,
7105
                                  ",".join(enabled_hvs)),
7106
                                 errors.ECODE_STATE)
7107

    
7108
    # check hypervisor parameter syntax (locally)
7109
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7110
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7111
                                      self.op.hvparams)
7112
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7113
    hv_type.CheckParameterSyntax(filled_hvp)
7114
    self.hv_full = filled_hvp
7115
    # check that we don't specify global parameters on an instance
7116
    _CheckGlobalHvParams(self.op.hvparams)
7117

    
7118
    # fill and remember the beparams dict
7119
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7120
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7121

    
7122
    # build os parameters
7123
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7124

    
7125
    # now that hvp/bep are in final format, let's reset to defaults,
7126
    # if told to do so
7127
    if self.op.identify_defaults:
7128
      self._RevertToDefaults(cluster)
7129

    
7130
    # NIC buildup
7131
    self.nics = []
7132
    for idx, nic in enumerate(self.op.nics):
7133
      nic_mode_req = nic.get("mode", None)
7134
      nic_mode = nic_mode_req
7135
      if nic_mode is None:
7136
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7137

    
7138
      # in routed mode, for the first nic, the default ip is 'auto'
7139
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7140
        default_ip_mode = constants.VALUE_AUTO
7141
      else:
7142
        default_ip_mode = constants.VALUE_NONE
7143

    
7144
      # ip validity checks
7145
      ip = nic.get("ip", default_ip_mode)
7146
      if ip is None or ip.lower() == constants.VALUE_NONE:
7147
        nic_ip = None
7148
      elif ip.lower() == constants.VALUE_AUTO:
7149
        if not self.op.name_check:
7150
          raise errors.OpPrereqError("IP address set to auto but name checks"
7151
                                     " have been skipped",
7152
                                     errors.ECODE_INVAL)
7153
        nic_ip = self.hostname1.ip
7154
      else:
7155
        if not netutils.IPAddress.IsValid(ip):
7156
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7157
                                     errors.ECODE_INVAL)
7158
        nic_ip = ip
7159

    
7160
      # TODO: check the ip address for uniqueness
7161
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7162
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7163
                                   errors.ECODE_INVAL)
7164

    
7165
      # MAC address verification
7166
      mac = nic.get("mac", constants.VALUE_AUTO)
7167
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7168
        mac = utils.NormalizeAndValidateMac(mac)
7169

    
7170
        try:
7171
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7172
        except errors.ReservationError:
7173
          raise errors.OpPrereqError("MAC address %s already in use"
7174
                                     " in cluster" % mac,
7175
                                     errors.ECODE_NOTUNIQUE)
7176

    
7177
      # bridge verification
7178
      bridge = nic.get("bridge", None)
7179
      link = nic.get("link", None)
7180
      if bridge and link:
7181
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7182
                                   " at the same time", errors.ECODE_INVAL)
7183
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7184
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7185
                                   errors.ECODE_INVAL)
7186
      elif bridge:
7187
        link = bridge
7188

    
7189
      nicparams = {}
7190
      if nic_mode_req:
7191
        nicparams[constants.NIC_MODE] = nic_mode_req
7192
      if link:
7193
        nicparams[constants.NIC_LINK] = link
7194

    
7195
      check_params = cluster.SimpleFillNIC(nicparams)
7196
      objects.NIC.CheckParameterSyntax(check_params)
7197
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7198

    
7199
    # disk checks/pre-build
7200
    self.disks = []
7201
    for disk in self.op.disks:
7202
      mode = disk.get("mode", constants.DISK_RDWR)
7203
      if mode not in constants.DISK_ACCESS_SET:
7204
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7205
                                   mode, errors.ECODE_INVAL)
7206
      size = disk.get("size", None)
7207
      if size is None:
7208
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7209
      try:
7210
        size = int(size)
7211
      except (TypeError, ValueError):
7212
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7213
                                   errors.ECODE_INVAL)
7214
      new_disk = {"size": size, "mode": mode}
7215
      if "adopt" in disk:
7216
        new_disk["adopt"] = disk["adopt"]
7217
      self.disks.append(new_disk)
7218

    
7219
    if self.op.mode == constants.INSTANCE_IMPORT:
7220

    
7221
      # Check that the new instance doesn't have less disks than the export
7222
      instance_disks = len(self.disks)
7223
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7224
      if instance_disks < export_disks:
7225
        raise errors.OpPrereqError("Not enough disks to import."
7226
                                   " (instance: %d, export: %d)" %
7227
                                   (instance_disks, export_disks),
7228
                                   errors.ECODE_INVAL)
7229

    
7230
      disk_images = []
7231
      for idx in range(export_disks):
7232
        option = 'disk%d_dump' % idx
7233
        if export_info.has_option(constants.INISECT_INS, option):
7234
          # FIXME: are the old os-es, disk sizes, etc. useful?
7235
          export_name = export_info.get(constants.INISECT_INS, option)
7236
          image = utils.PathJoin(self.op.src_path, export_name)
7237
          disk_images.append(image)
7238
        else:
7239
          disk_images.append(False)
7240

    
7241
      self.src_images = disk_images
7242

    
7243
      old_name = export_info.get(constants.INISECT_INS, 'name')
7244
      try:
7245
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7246
      except (TypeError, ValueError), err:
7247
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7248
                                   " an integer: %s" % str(err),
7249
                                   errors.ECODE_STATE)
7250
      if self.op.instance_name == old_name:
7251
        for idx, nic in enumerate(self.nics):
7252
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7253
            nic_mac_ini = 'nic%d_mac' % idx
7254
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7255

    
7256
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7257

    
7258
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7259
    if self.op.ip_check:
7260
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7261
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7262
                                   (self.check_ip, self.op.instance_name),
7263
                                   errors.ECODE_NOTUNIQUE)
7264

    
7265
    #### mac address generation
7266
    # By generating here the mac address both the allocator and the hooks get
7267
    # the real final mac address rather than the 'auto' or 'generate' value.
7268
    # There is a race condition between the generation and the instance object
7269
    # creation, which means that we know the mac is valid now, but we're not
7270
    # sure it will be when we actually add the instance. If things go bad
7271
    # adding the instance will abort because of a duplicate mac, and the
7272
    # creation job will fail.
7273
    for nic in self.nics:
7274
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7275
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7276

    
7277
    #### allocator run
7278

    
7279
    if self.op.iallocator is not None:
7280
      self._RunAllocator()
7281

    
7282
    #### node related checks
7283

    
7284
    # check primary node
7285
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7286
    assert self.pnode is not None, \
7287
      "Cannot retrieve locked node %s" % self.op.pnode
7288
    if pnode.offline:
7289
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7290
                                 pnode.name, errors.ECODE_STATE)
7291
    if pnode.drained:
7292
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7293
                                 pnode.name, errors.ECODE_STATE)
7294

    
7295
    self.secondaries = []
7296

    
7297
    # mirror node verification
7298
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7299
      if self.op.snode == pnode.name:
7300
        raise errors.OpPrereqError("The secondary node cannot be the"
7301
                                   " primary node.", errors.ECODE_INVAL)
7302
      _CheckNodeOnline(self, self.op.snode)
7303
      _CheckNodeNotDrained(self, self.op.snode)
7304
      self.secondaries.append(self.op.snode)
7305

    
7306
    nodenames = [pnode.name] + self.secondaries
7307

    
7308
    req_size = _ComputeDiskSize(self.op.disk_template,
7309
                                self.disks)
7310

    
7311
    # Check lv size requirements, if not adopting
7312
    if req_size is not None and not self.adopt_disks:
7313
      _CheckNodesFreeDisk(self, nodenames, req_size)
7314

    
7315
    if self.adopt_disks: # instead, we must check the adoption data
7316
      all_lvs = set([i["adopt"] for i in self.disks])
7317
      if len(all_lvs) != len(self.disks):
7318
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7319
                                   errors.ECODE_INVAL)
7320
      for lv_name in all_lvs:
7321
        try:
7322
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7323
        except errors.ReservationError:
7324
          raise errors.OpPrereqError("LV named %s used by another instance" %
7325
                                     lv_name, errors.ECODE_NOTUNIQUE)
7326

    
7327
      node_lvs = self.rpc.call_lv_list([pnode.name],
7328
                                       self.cfg.GetVGName())[pnode.name]
7329
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7330
      node_lvs = node_lvs.payload
7331
      delta = all_lvs.difference(node_lvs.keys())
7332
      if delta:
7333
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7334
                                   utils.CommaJoin(delta),
7335
                                   errors.ECODE_INVAL)
7336
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7337
      if online_lvs:
7338
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7339
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7340
                                   errors.ECODE_STATE)
7341
      # update the size of disk based on what is found
7342
      for dsk in self.disks:
7343
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7344

    
7345
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7346

    
7347
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7348
    # check OS parameters (remotely)
7349
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7350

    
7351
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7352

    
7353
    # memory check on primary node
7354
    if self.op.start:
7355
      _CheckNodeFreeMemory(self, self.pnode.name,
7356
                           "creating instance %s" % self.op.instance_name,
7357
                           self.be_full[constants.BE_MEMORY],
7358
                           self.op.hypervisor)
7359

    
7360
    self.dry_run_result = list(nodenames)
7361

    
7362
  def Exec(self, feedback_fn):
7363
    """Create and add the instance to the cluster.
7364

7365
    """
7366
    instance = self.op.instance_name
7367
    pnode_name = self.pnode.name
7368

    
7369
    ht_kind = self.op.hypervisor
7370
    if ht_kind in constants.HTS_REQ_PORT:
7371
      network_port = self.cfg.AllocatePort()
7372
    else:
7373
      network_port = None
7374

    
7375
    if constants.ENABLE_FILE_STORAGE:
7376
      # this is needed because os.path.join does not accept None arguments
7377
      if self.op.file_storage_dir is None:
7378
        string_file_storage_dir = ""
7379
      else:
7380
        string_file_storage_dir = self.op.file_storage_dir
7381

    
7382
      # build the full file storage dir path
7383
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7384
                                        string_file_storage_dir, instance)
7385
    else:
7386
      file_storage_dir = ""
7387

    
7388
    disks = _GenerateDiskTemplate(self,
7389
                                  self.op.disk_template,
7390
                                  instance, pnode_name,
7391
                                  self.secondaries,
7392
                                  self.disks,
7393
                                  file_storage_dir,
7394
                                  self.op.file_driver,
7395
                                  0)
7396

    
7397
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7398
                            primary_node=pnode_name,
7399
                            nics=self.nics, disks=disks,
7400
                            disk_template=self.op.disk_template,
7401
                            admin_up=False,
7402
                            network_port=network_port,
7403
                            beparams=self.op.beparams,
7404
                            hvparams=self.op.hvparams,
7405
                            hypervisor=self.op.hypervisor,
7406
                            osparams=self.op.osparams,
7407
                            )
7408

    
7409
    if self.adopt_disks:
7410
      # rename LVs to the newly-generated names; we need to construct
7411
      # 'fake' LV disks with the old data, plus the new unique_id
7412
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7413
      rename_to = []
7414
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7415
        rename_to.append(t_dsk.logical_id)
7416
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7417
        self.cfg.SetDiskID(t_dsk, pnode_name)
7418
      result = self.rpc.call_blockdev_rename(pnode_name,
7419
                                             zip(tmp_disks, rename_to))
7420
      result.Raise("Failed to rename adoped LVs")
7421
    else:
7422
      feedback_fn("* creating instance disks...")
7423
      try:
7424
        _CreateDisks(self, iobj)
7425
      except errors.OpExecError:
7426
        self.LogWarning("Device creation failed, reverting...")
7427
        try:
7428
          _RemoveDisks(self, iobj)
7429
        finally:
7430
          self.cfg.ReleaseDRBDMinors(instance)
7431
          raise
7432

    
7433
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7434
        feedback_fn("* wiping instance disks...")
7435
        try:
7436
          _WipeDisks(self, iobj)
7437
        except errors.OpExecError:
7438
          self.LogWarning("Device wiping failed, reverting...")
7439
          try:
7440
            _RemoveDisks(self, iobj)
7441
          finally:
7442
            self.cfg.ReleaseDRBDMinors(instance)
7443
            raise
7444

    
7445
    feedback_fn("adding instance %s to cluster config" % instance)
7446

    
7447
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7448

    
7449
    # Declare that we don't want to remove the instance lock anymore, as we've
7450
    # added the instance to the config
7451
    del self.remove_locks[locking.LEVEL_INSTANCE]
7452
    # Unlock all the nodes
7453
    if self.op.mode == constants.INSTANCE_IMPORT:
7454
      nodes_keep = [self.op.src_node]
7455
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7456
                       if node != self.op.src_node]
7457
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7458
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7459
    else:
7460
      self.context.glm.release(locking.LEVEL_NODE)
7461
      del self.acquired_locks[locking.LEVEL_NODE]
7462

    
7463
    if self.op.wait_for_sync:
7464
      disk_abort = not _WaitForSync(self, iobj)
7465
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7466
      # make sure the disks are not degraded (still sync-ing is ok)
7467
      time.sleep(15)
7468
      feedback_fn("* checking mirrors status")
7469
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7470
    else:
7471
      disk_abort = False
7472

    
7473
    if disk_abort:
7474
      _RemoveDisks(self, iobj)
7475
      self.cfg.RemoveInstance(iobj.name)
7476
      # Make sure the instance lock gets removed
7477
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7478
      raise errors.OpExecError("There are some degraded disks for"
7479
                               " this instance")
7480

    
7481
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7482
      if self.op.mode == constants.INSTANCE_CREATE:
7483
        if not self.op.no_install:
7484
          feedback_fn("* running the instance OS create scripts...")
7485
          # FIXME: pass debug option from opcode to backend
7486
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7487
                                                 self.op.debug_level)
7488
          result.Raise("Could not add os for instance %s"
7489
                       " on node %s" % (instance, pnode_name))
7490

    
7491
      elif self.op.mode == constants.INSTANCE_IMPORT:
7492
        feedback_fn("* running the instance OS import scripts...")
7493

    
7494
        transfers = []
7495

    
7496
        for idx, image in enumerate(self.src_images):
7497
          if not image:
7498
            continue
7499

    
7500
          # FIXME: pass debug option from opcode to backend
7501
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7502
                                             constants.IEIO_FILE, (image, ),
7503
                                             constants.IEIO_SCRIPT,
7504
                                             (iobj.disks[idx], idx),
7505
                                             None)
7506
          transfers.append(dt)
7507

    
7508
        import_result = \
7509
          masterd.instance.TransferInstanceData(self, feedback_fn,
7510
                                                self.op.src_node, pnode_name,
7511
                                                self.pnode.secondary_ip,
7512
                                                iobj, transfers)
7513
        if not compat.all(import_result):
7514
          self.LogWarning("Some disks for instance %s on node %s were not"
7515
                          " imported successfully" % (instance, pnode_name))
7516

    
7517
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7518
        feedback_fn("* preparing remote import...")
7519
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
7520
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7521

    
7522
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7523
                                                     self.source_x509_ca,
7524
                                                     self._cds, timeouts)
7525
        if not compat.all(disk_results):
7526
          # TODO: Should the instance still be started, even if some disks
7527
          # failed to import (valid for local imports, too)?
7528
          self.LogWarning("Some disks for instance %s on node %s were not"
7529
                          " imported successfully" % (instance, pnode_name))
7530

    
7531
        # Run rename script on newly imported instance
7532
        assert iobj.name == instance
7533
        feedback_fn("Running rename script for %s" % instance)
7534
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7535
                                                   self.source_instance_name,
7536
                                                   self.op.debug_level)
7537
        if result.fail_msg:
7538
          self.LogWarning("Failed to run rename script for %s on node"
7539
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7540

    
7541
      else:
7542
        # also checked in the prereq part
7543
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7544
                                     % self.op.mode)
7545

    
7546
    if self.op.start:
7547
      iobj.admin_up = True
7548
      self.cfg.Update(iobj, feedback_fn)
7549
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7550
      feedback_fn("* starting instance...")
7551
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7552
      result.Raise("Could not start instance")
7553

    
7554
    return list(iobj.all_nodes)
7555

    
7556

    
7557
class LUConnectConsole(NoHooksLU):
7558
  """Connect to an instance's console.
7559

7560
  This is somewhat special in that it returns the command line that
7561
  you need to run on the master node in order to connect to the
7562
  console.
7563

7564
  """
7565
  _OP_PARAMS = [
7566
    _PInstanceName
7567
    ]
7568
  REQ_BGL = False
7569

    
7570
  def ExpandNames(self):
7571
    self._ExpandAndLockInstance()
7572

    
7573
  def CheckPrereq(self):
7574
    """Check prerequisites.
7575

7576
    This checks that the instance is in the cluster.
7577

7578
    """
7579
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7580
    assert self.instance is not None, \
7581
      "Cannot retrieve locked instance %s" % self.op.instance_name
7582
    _CheckNodeOnline(self, self.instance.primary_node)
7583

    
7584
  def Exec(self, feedback_fn):
7585
    """Connect to the console of an instance
7586

7587
    """
7588
    instance = self.instance
7589
    node = instance.primary_node
7590

    
7591
    node_insts = self.rpc.call_instance_list([node],
7592
                                             [instance.hypervisor])[node]
7593
    node_insts.Raise("Can't get node information from %s" % node)
7594

    
7595
    if instance.name not in node_insts.payload:
7596
      if instance.admin_up:
7597
        state = "ERROR_down"
7598
      else:
7599
        state = "ADMIN_down"
7600
      raise errors.OpExecError("Instance %s is not running (state %s)" %
7601
                               (instance.name, state))
7602

    
7603
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7604

    
7605
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7606
    cluster = self.cfg.GetClusterInfo()
7607
    # beparams and hvparams are passed separately, to avoid editing the
7608
    # instance and then saving the defaults in the instance itself.
7609
    hvparams = cluster.FillHV(instance)
7610
    beparams = cluster.FillBE(instance)
7611
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7612

    
7613
    # build ssh cmdline
7614
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7615

    
7616

    
7617
class LUReplaceDisks(LogicalUnit):
7618
  """Replace the disks of an instance.
7619

7620
  """
7621
  HPATH = "mirrors-replace"
7622
  HTYPE = constants.HTYPE_INSTANCE
7623
  _OP_PARAMS = [
7624
    _PInstanceName,
7625
    ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
7626
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
7627
    ("remote_node", None, ht.TMaybeString),
7628
    ("iallocator", None, ht.TMaybeString),
7629
    ("early_release", False, ht.TBool),
7630
    ]
7631
  REQ_BGL = False
7632

    
7633
  def CheckArguments(self):
7634
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7635
                                  self.op.iallocator)
7636

    
7637
  def ExpandNames(self):
7638
    self._ExpandAndLockInstance()
7639

    
7640
    if self.op.iallocator is not None:
7641
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7642

    
7643
    elif self.op.remote_node is not None:
7644
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7645
      self.op.remote_node = remote_node
7646

    
7647
      # Warning: do not remove the locking of the new secondary here
7648
      # unless DRBD8.AddChildren is changed to work in parallel;
7649
      # currently it doesn't since parallel invocations of
7650
      # FindUnusedMinor will conflict
7651
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7652
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7653

    
7654
    else:
7655
      self.needed_locks[locking.LEVEL_NODE] = []
7656
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7657

    
7658
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7659
                                   self.op.iallocator, self.op.remote_node,
7660
                                   self.op.disks, False, self.op.early_release)
7661

    
7662
    self.tasklets = [self.replacer]
7663

    
7664
  def DeclareLocks(self, level):
7665
    # If we're not already locking all nodes in the set we have to declare the
7666
    # instance's primary/secondary nodes.
7667
    if (level == locking.LEVEL_NODE and
7668
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7669
      self._LockInstancesNodes()
7670

    
7671
  def BuildHooksEnv(self):
7672
    """Build hooks env.
7673

7674
    This runs on the master, the primary and all the secondaries.
7675

7676
    """
7677
    instance = self.replacer.instance
7678
    env = {
7679
      "MODE": self.op.mode,
7680
      "NEW_SECONDARY": self.op.remote_node,
7681
      "OLD_SECONDARY": instance.secondary_nodes[0],
7682
      }
7683
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7684
    nl = [
7685
      self.cfg.GetMasterNode(),
7686
      instance.primary_node,
7687
      ]
7688
    if self.op.remote_node is not None:
7689
      nl.append(self.op.remote_node)
7690
    return env, nl, nl
7691

    
7692

    
7693
class TLReplaceDisks(Tasklet):
7694
  """Replaces disks for an instance.
7695

7696
  Note: Locking is not within the scope of this class.
7697

7698
  """
7699
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7700
               disks, delay_iallocator, early_release):
7701
    """Initializes this class.
7702

7703
    """
7704
    Tasklet.__init__(self, lu)
7705

    
7706
    # Parameters
7707
    self.instance_name = instance_name
7708
    self.mode = mode
7709
    self.iallocator_name = iallocator_name
7710
    self.remote_node = remote_node
7711
    self.disks = disks
7712
    self.delay_iallocator = delay_iallocator
7713
    self.early_release = early_release
7714

    
7715
    # Runtime data
7716
    self.instance = None
7717
    self.new_node = None
7718
    self.target_node = None
7719
    self.other_node = None
7720
    self.remote_node_info = None
7721
    self.node_secondary_ip = None
7722

    
7723
  @staticmethod
7724
  def CheckArguments(mode, remote_node, iallocator):
7725
    """Helper function for users of this class.
7726

7727
    """
7728
    # check for valid parameter combination
7729
    if mode == constants.REPLACE_DISK_CHG:
7730
      if remote_node is None and iallocator is None:
7731
        raise errors.OpPrereqError("When changing the secondary either an"
7732
                                   " iallocator script must be used or the"
7733
                                   " new node given", errors.ECODE_INVAL)
7734

    
7735
      if remote_node is not None and iallocator is not None:
7736
        raise errors.OpPrereqError("Give either the iallocator or the new"
7737
                                   " secondary, not both", errors.ECODE_INVAL)
7738

    
7739
    elif remote_node is not None or iallocator is not None:
7740
      # Not replacing the secondary
7741
      raise errors.OpPrereqError("The iallocator and new node options can"
7742
                                 " only be used when changing the"
7743
                                 " secondary node", errors.ECODE_INVAL)
7744

    
7745
  @staticmethod
7746
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7747
    """Compute a new secondary node using an IAllocator.
7748

7749
    """
7750
    ial = IAllocator(lu.cfg, lu.rpc,
7751
                     mode=constants.IALLOCATOR_MODE_RELOC,
7752
                     name=instance_name,
7753
                     relocate_from=relocate_from)
7754

    
7755
    ial.Run(iallocator_name)
7756

    
7757
    if not ial.success:
7758
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7759
                                 " %s" % (iallocator_name, ial.info),
7760
                                 errors.ECODE_NORES)
7761

    
7762
    if len(ial.result) != ial.required_nodes:
7763
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7764
                                 " of nodes (%s), required %s" %
7765
                                 (iallocator_name,
7766
                                  len(ial.result), ial.required_nodes),
7767
                                 errors.ECODE_FAULT)
7768

    
7769
    remote_node_name = ial.result[0]
7770

    
7771
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7772
               instance_name, remote_node_name)
7773

    
7774
    return remote_node_name
7775

    
7776
  def _FindFaultyDisks(self, node_name):
7777
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7778
                                    node_name, True)
7779

    
7780
  def CheckPrereq(self):
7781
    """Check prerequisites.
7782

7783
    This checks that the instance is in the cluster.
7784

7785
    """
7786
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7787
    assert instance is not None, \
7788
      "Cannot retrieve locked instance %s" % self.instance_name
7789

    
7790
    if instance.disk_template != constants.DT_DRBD8:
7791
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7792
                                 " instances", errors.ECODE_INVAL)
7793

    
7794
    if len(instance.secondary_nodes) != 1:
7795
      raise errors.OpPrereqError("The instance has a strange layout,"
7796
                                 " expected one secondary but found %d" %
7797
                                 len(instance.secondary_nodes),
7798
                                 errors.ECODE_FAULT)
7799

    
7800
    if not self.delay_iallocator:
7801
      self._CheckPrereq2()
7802

    
7803
  def _CheckPrereq2(self):
7804
    """Check prerequisites, second part.
7805

7806
    This function should always be part of CheckPrereq. It was separated and is
7807
    now called from Exec because during node evacuation iallocator was only
7808
    called with an unmodified cluster model, not taking planned changes into
7809
    account.
7810

7811
    """
7812
    instance = self.instance
7813
    secondary_node = instance.secondary_nodes[0]
7814

    
7815
    if self.iallocator_name is None:
7816
      remote_node = self.remote_node
7817
    else:
7818
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7819
                                       instance.name, instance.secondary_nodes)
7820

    
7821
    if remote_node is not None:
7822
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7823
      assert self.remote_node_info is not None, \
7824
        "Cannot retrieve locked node %s" % remote_node
7825
    else:
7826
      self.remote_node_info = None
7827

    
7828
    if remote_node == self.instance.primary_node:
7829
      raise errors.OpPrereqError("The specified node is the primary node of"
7830
                                 " the instance.", errors.ECODE_INVAL)
7831

    
7832
    if remote_node == secondary_node:
7833
      raise errors.OpPrereqError("The specified node is already the"
7834
                                 " secondary node of the instance.",
7835
                                 errors.ECODE_INVAL)
7836

    
7837
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7838
                                    constants.REPLACE_DISK_CHG):
7839
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7840
                                 errors.ECODE_INVAL)
7841

    
7842
    if self.mode == constants.REPLACE_DISK_AUTO:
7843
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7844
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7845

    
7846
      if faulty_primary and faulty_secondary:
7847
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7848
                                   " one node and can not be repaired"
7849
                                   " automatically" % self.instance_name,
7850
                                   errors.ECODE_STATE)
7851

    
7852
      if faulty_primary:
7853
        self.disks = faulty_primary
7854
        self.target_node = instance.primary_node
7855
        self.other_node = secondary_node
7856
        check_nodes = [self.target_node, self.other_node]
7857
      elif faulty_secondary:
7858
        self.disks = faulty_secondary
7859
        self.target_node = secondary_node
7860
        self.other_node = instance.primary_node
7861
        check_nodes = [self.target_node, self.other_node]
7862
      else:
7863
        self.disks = []
7864
        check_nodes = []
7865

    
7866
    else:
7867
      # Non-automatic modes
7868
      if self.mode == constants.REPLACE_DISK_PRI:
7869
        self.target_node = instance.primary_node
7870
        self.other_node = secondary_node
7871
        check_nodes = [self.target_node, self.other_node]
7872

    
7873
      elif self.mode == constants.REPLACE_DISK_SEC:
7874
        self.target_node = secondary_node
7875
        self.other_node = instance.primary_node
7876
        check_nodes = [self.target_node, self.other_node]
7877

    
7878
      elif self.mode == constants.REPLACE_DISK_CHG:
7879
        self.new_node = remote_node
7880
        self.other_node = instance.primary_node
7881
        self.target_node = secondary_node
7882
        check_nodes = [self.new_node, self.other_node]
7883

    
7884
        _CheckNodeNotDrained(self.lu, remote_node)
7885

    
7886
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7887
        assert old_node_info is not None
7888
        if old_node_info.offline and not self.early_release:
7889
          # doesn't make sense to delay the release
7890
          self.early_release = True
7891
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7892
                          " early-release mode", secondary_node)
7893

    
7894
      else:
7895
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7896
                                     self.mode)
7897

    
7898
      # If not specified all disks should be replaced
7899
      if not self.disks:
7900
        self.disks = range(len(self.instance.disks))
7901

    
7902
    for node in check_nodes:
7903
      _CheckNodeOnline(self.lu, node)
7904

    
7905
    # Check whether disks are valid
7906
    for disk_idx in self.disks:
7907
      instance.FindDisk(disk_idx)
7908

    
7909
    # Get secondary node IP addresses
7910
    node_2nd_ip = {}
7911

    
7912
    for node_name in [self.target_node, self.other_node, self.new_node]:
7913
      if node_name is not None:
7914
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7915

    
7916
    self.node_secondary_ip = node_2nd_ip
7917

    
7918
  def Exec(self, feedback_fn):
7919
    """Execute disk replacement.
7920

7921
    This dispatches the disk replacement to the appropriate handler.
7922

7923
    """
7924
    if self.delay_iallocator:
7925
      self._CheckPrereq2()
7926

    
7927
    if not self.disks:
7928
      feedback_fn("No disks need replacement")
7929
      return
7930

    
7931
    feedback_fn("Replacing disk(s) %s for %s" %
7932
                (utils.CommaJoin(self.disks), self.instance.name))
7933

    
7934
    activate_disks = (not self.instance.admin_up)
7935

    
7936
    # Activate the instance disks if we're replacing them on a down instance
7937
    if activate_disks:
7938
      _StartInstanceDisks(self.lu, self.instance, True)
7939

    
7940
    try:
7941
      # Should we replace the secondary node?
7942
      if self.new_node is not None:
7943
        fn = self._ExecDrbd8Secondary
7944
      else:
7945
        fn = self._ExecDrbd8DiskOnly
7946

    
7947
      return fn(feedback_fn)
7948

    
7949
    finally:
7950
      # Deactivate the instance disks if we're replacing them on a
7951
      # down instance
7952
      if activate_disks:
7953
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7954

    
7955
  def _CheckVolumeGroup(self, nodes):
7956
    self.lu.LogInfo("Checking volume groups")
7957

    
7958
    vgname = self.cfg.GetVGName()
7959

    
7960
    # Make sure volume group exists on all involved nodes
7961
    results = self.rpc.call_vg_list(nodes)
7962
    if not results:
7963
      raise errors.OpExecError("Can't list volume groups on the nodes")
7964

    
7965
    for node in nodes:
7966
      res = results[node]
7967
      res.Raise("Error checking node %s" % node)
7968
      if vgname not in res.payload:
7969
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7970
                                 (vgname, node))
7971

    
7972
  def _CheckDisksExistence(self, nodes):
7973
    # Check disk existence
7974
    for idx, dev in enumerate(self.instance.disks):
7975
      if idx not in self.disks:
7976
        continue
7977

    
7978
      for node in nodes:
7979
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7980
        self.cfg.SetDiskID(dev, node)
7981

    
7982
        result = self.rpc.call_blockdev_find(node, dev)
7983

    
7984
        msg = result.fail_msg
7985
        if msg or not result.payload:
7986
          if not msg:
7987
            msg = "disk not found"
7988
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7989
                                   (idx, node, msg))
7990

    
7991
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7992
    for idx, dev in enumerate(self.instance.disks):
7993
      if idx not in self.disks:
7994
        continue
7995

    
7996
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7997
                      (idx, node_name))
7998

    
7999
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8000
                                   ldisk=ldisk):
8001
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8002
                                 " replace disks for instance %s" %
8003
                                 (node_name, self.instance.name))
8004

    
8005
  def _CreateNewStorage(self, node_name):
8006
    vgname = self.cfg.GetVGName()
8007
    iv_names = {}
8008

    
8009
    for idx, dev in enumerate(self.instance.disks):
8010
      if idx not in self.disks:
8011
        continue
8012

    
8013
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8014

    
8015
      self.cfg.SetDiskID(dev, node_name)
8016

    
8017
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8018
      names = _GenerateUniqueNames(self.lu, lv_names)
8019

    
8020
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8021
                             logical_id=(vgname, names[0]))
8022
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8023
                             logical_id=(vgname, names[1]))
8024

    
8025
      new_lvs = [lv_data, lv_meta]
8026
      old_lvs = dev.children
8027
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8028

    
8029
      # we pass force_create=True to force the LVM creation
8030
      for new_lv in new_lvs:
8031
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8032
                        _GetInstanceInfoText(self.instance), False)
8033

    
8034
    return iv_names
8035

    
8036
  def _CheckDevices(self, node_name, iv_names):
8037
    for name, (dev, _, _) in iv_names.iteritems():
8038
      self.cfg.SetDiskID(dev, node_name)
8039

    
8040
      result = self.rpc.call_blockdev_find(node_name, dev)
8041

    
8042
      msg = result.fail_msg
8043
      if msg or not result.payload:
8044
        if not msg:
8045
          msg = "disk not found"
8046
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8047
                                 (name, msg))
8048

    
8049
      if result.payload.is_degraded:
8050
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8051

    
8052
  def _RemoveOldStorage(self, node_name, iv_names):
8053
    for name, (_, old_lvs, _) in iv_names.iteritems():
8054
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8055

    
8056
      for lv in old_lvs:
8057
        self.cfg.SetDiskID(lv, node_name)
8058

    
8059
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8060
        if msg:
8061
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8062
                             hint="remove unused LVs manually")
8063

    
8064
  def _ReleaseNodeLock(self, node_name):
8065
    """Releases the lock for a given node."""
8066
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8067

    
8068
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8069
    """Replace a disk on the primary or secondary for DRBD 8.
8070

8071
    The algorithm for replace is quite complicated:
8072

8073
      1. for each disk to be replaced:
8074

8075
        1. create new LVs on the target node with unique names
8076
        1. detach old LVs from the drbd device
8077
        1. rename old LVs to name_replaced.<time_t>
8078
        1. rename new LVs to old LVs
8079
        1. attach the new LVs (with the old names now) to the drbd device
8080

8081
      1. wait for sync across all devices
8082

8083
      1. for each modified disk:
8084

8085
        1. remove old LVs (which have the name name_replaces.<time_t>)
8086

8087
    Failures are not very well handled.
8088

8089
    """
8090
    steps_total = 6
8091

    
8092
    # Step: check device activation
8093
    self.lu.LogStep(1, steps_total, "Check device existence")
8094
    self._CheckDisksExistence([self.other_node, self.target_node])
8095
    self._CheckVolumeGroup([self.target_node, self.other_node])
8096

    
8097
    # Step: check other node consistency
8098
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8099
    self._CheckDisksConsistency(self.other_node,
8100
                                self.other_node == self.instance.primary_node,
8101
                                False)
8102

    
8103
    # Step: create new storage
8104
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8105
    iv_names = self._CreateNewStorage(self.target_node)
8106

    
8107
    # Step: for each lv, detach+rename*2+attach
8108
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8109
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8110
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8111

    
8112
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8113
                                                     old_lvs)
8114
      result.Raise("Can't detach drbd from local storage on node"
8115
                   " %s for device %s" % (self.target_node, dev.iv_name))
8116
      #dev.children = []
8117
      #cfg.Update(instance)
8118

    
8119
      # ok, we created the new LVs, so now we know we have the needed
8120
      # storage; as such, we proceed on the target node to rename
8121
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8122
      # using the assumption that logical_id == physical_id (which in
8123
      # turn is the unique_id on that node)
8124

    
8125
      # FIXME(iustin): use a better name for the replaced LVs
8126
      temp_suffix = int(time.time())
8127
      ren_fn = lambda d, suff: (d.physical_id[0],
8128
                                d.physical_id[1] + "_replaced-%s" % suff)
8129

    
8130
      # Build the rename list based on what LVs exist on the node
8131
      rename_old_to_new = []
8132
      for to_ren in old_lvs:
8133
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8134
        if not result.fail_msg and result.payload:
8135
          # device exists
8136
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8137

    
8138
      self.lu.LogInfo("Renaming the old LVs on the target node")
8139
      result = self.rpc.call_blockdev_rename(self.target_node,
8140
                                             rename_old_to_new)
8141
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8142

    
8143
      # Now we rename the new LVs to the old LVs
8144
      self.lu.LogInfo("Renaming the new LVs on the target node")
8145
      rename_new_to_old = [(new, old.physical_id)
8146
                           for old, new in zip(old_lvs, new_lvs)]
8147
      result = self.rpc.call_blockdev_rename(self.target_node,
8148
                                             rename_new_to_old)
8149
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8150

    
8151
      for old, new in zip(old_lvs, new_lvs):
8152
        new.logical_id = old.logical_id
8153
        self.cfg.SetDiskID(new, self.target_node)
8154

    
8155
      for disk in old_lvs:
8156
        disk.logical_id = ren_fn(disk, temp_suffix)
8157
        self.cfg.SetDiskID(disk, self.target_node)
8158

    
8159
      # Now that the new lvs have the old name, we can add them to the device
8160
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8161
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8162
                                                  new_lvs)
8163
      msg = result.fail_msg
8164
      if msg:
8165
        for new_lv in new_lvs:
8166
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8167
                                               new_lv).fail_msg
8168
          if msg2:
8169
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8170
                               hint=("cleanup manually the unused logical"
8171
                                     "volumes"))
8172
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8173

    
8174
      dev.children = new_lvs
8175

    
8176
      self.cfg.Update(self.instance, feedback_fn)
8177

    
8178
    cstep = 5
8179
    if self.early_release:
8180
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8181
      cstep += 1
8182
      self._RemoveOldStorage(self.target_node, iv_names)
8183
      # WARNING: we release both node locks here, do not do other RPCs
8184
      # than WaitForSync to the primary node
8185
      self._ReleaseNodeLock([self.target_node, self.other_node])
8186

    
8187
    # Wait for sync
8188
    # This can fail as the old devices are degraded and _WaitForSync
8189
    # does a combined result over all disks, so we don't check its return value
8190
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8191
    cstep += 1
8192
    _WaitForSync(self.lu, self.instance)
8193

    
8194
    # Check all devices manually
8195
    self._CheckDevices(self.instance.primary_node, iv_names)
8196

    
8197
    # Step: remove old storage
8198
    if not self.early_release:
8199
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8200
      cstep += 1
8201
      self._RemoveOldStorage(self.target_node, iv_names)
8202

    
8203
  def _ExecDrbd8Secondary(self, feedback_fn):
8204
    """Replace the secondary node for DRBD 8.
8205

8206
    The algorithm for replace is quite complicated:
8207
      - for all disks of the instance:
8208
        - create new LVs on the new node with same names
8209
        - shutdown the drbd device on the old secondary
8210
        - disconnect the drbd network on the primary
8211
        - create the drbd device on the new secondary
8212
        - network attach the drbd on the primary, using an artifice:
8213
          the drbd code for Attach() will connect to the network if it
8214
          finds a device which is connected to the good local disks but
8215
          not network enabled
8216
      - wait for sync across all devices
8217
      - remove all disks from the old secondary
8218

8219
    Failures are not very well handled.
8220

8221
    """
8222
    steps_total = 6
8223

    
8224
    # Step: check device activation
8225
    self.lu.LogStep(1, steps_total, "Check device existence")
8226
    self._CheckDisksExistence([self.instance.primary_node])
8227
    self._CheckVolumeGroup([self.instance.primary_node])
8228

    
8229
    # Step: check other node consistency
8230
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8231
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8232

    
8233
    # Step: create new storage
8234
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8235
    for idx, dev in enumerate(self.instance.disks):
8236
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8237
                      (self.new_node, idx))
8238
      # we pass force_create=True to force LVM creation
8239
      for new_lv in dev.children:
8240
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8241
                        _GetInstanceInfoText(self.instance), False)
8242

    
8243
    # Step 4: dbrd minors and drbd setups changes
8244
    # after this, we must manually remove the drbd minors on both the
8245
    # error and the success paths
8246
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8247
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8248
                                         for dev in self.instance.disks],
8249
                                        self.instance.name)
8250
    logging.debug("Allocated minors %r", minors)
8251

    
8252
    iv_names = {}
8253
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8254
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8255
                      (self.new_node, idx))
8256
      # create new devices on new_node; note that we create two IDs:
8257
      # one without port, so the drbd will be activated without
8258
      # networking information on the new node at this stage, and one
8259
      # with network, for the latter activation in step 4
8260
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8261
      if self.instance.primary_node == o_node1:
8262
        p_minor = o_minor1
8263
      else:
8264
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8265
        p_minor = o_minor2
8266

    
8267
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8268
                      p_minor, new_minor, o_secret)
8269
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8270
                    p_minor, new_minor, o_secret)
8271

    
8272
      iv_names[idx] = (dev, dev.children, new_net_id)
8273
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8274
                    new_net_id)
8275
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8276
                              logical_id=new_alone_id,
8277
                              children=dev.children,
8278
                              size=dev.size)
8279
      try:
8280
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8281
                              _GetInstanceInfoText(self.instance), False)
8282
      except errors.GenericError:
8283
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8284
        raise
8285

    
8286
    # We have new devices, shutdown the drbd on the old secondary
8287
    for idx, dev in enumerate(self.instance.disks):
8288
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8289
      self.cfg.SetDiskID(dev, self.target_node)
8290
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8291
      if msg:
8292
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8293
                           "node: %s" % (idx, msg),
8294
                           hint=("Please cleanup this device manually as"
8295
                                 " soon as possible"))
8296

    
8297
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8298
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8299
                                               self.node_secondary_ip,
8300
                                               self.instance.disks)\
8301
                                              [self.instance.primary_node]
8302

    
8303
    msg = result.fail_msg
8304
    if msg:
8305
      # detaches didn't succeed (unlikely)
8306
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8307
      raise errors.OpExecError("Can't detach the disks from the network on"
8308
                               " old node: %s" % (msg,))
8309

    
8310
    # if we managed to detach at least one, we update all the disks of
8311
    # the instance to point to the new secondary
8312
    self.lu.LogInfo("Updating instance configuration")
8313
    for dev, _, new_logical_id in iv_names.itervalues():
8314
      dev.logical_id = new_logical_id
8315
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8316

    
8317
    self.cfg.Update(self.instance, feedback_fn)
8318

    
8319
    # and now perform the drbd attach
8320
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8321
                    " (standalone => connected)")
8322
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8323
                                            self.new_node],
8324
                                           self.node_secondary_ip,
8325
                                           self.instance.disks,
8326
                                           self.instance.name,
8327
                                           False)
8328
    for to_node, to_result in result.items():
8329
      msg = to_result.fail_msg
8330
      if msg:
8331
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8332
                           to_node, msg,
8333
                           hint=("please do a gnt-instance info to see the"
8334
                                 " status of disks"))
8335
    cstep = 5
8336
    if self.early_release:
8337
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8338
      cstep += 1
8339
      self._RemoveOldStorage(self.target_node, iv_names)
8340
      # WARNING: we release all node locks here, do not do other RPCs
8341
      # than WaitForSync to the primary node
8342
      self._ReleaseNodeLock([self.instance.primary_node,
8343
                             self.target_node,
8344
                             self.new_node])
8345

    
8346
    # Wait for sync
8347
    # This can fail as the old devices are degraded and _WaitForSync
8348
    # does a combined result over all disks, so we don't check its return value
8349
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8350
    cstep += 1
8351
    _WaitForSync(self.lu, self.instance)
8352

    
8353
    # Check all devices manually
8354
    self._CheckDevices(self.instance.primary_node, iv_names)
8355

    
8356
    # Step: remove old storage
8357
    if not self.early_release:
8358
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8359
      self._RemoveOldStorage(self.target_node, iv_names)
8360

    
8361

    
8362
class LURepairNodeStorage(NoHooksLU):
8363
  """Repairs the volume group on a node.
8364

8365
  """
8366
  _OP_PARAMS = [
8367
    _PNodeName,
8368
    ("storage_type", ht.NoDefault, _CheckStorageType),
8369
    ("name", ht.NoDefault, ht.TNonEmptyString),
8370
    ("ignore_consistency", False, ht.TBool),
8371
    ]
8372
  REQ_BGL = False
8373

    
8374
  def CheckArguments(self):
8375
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8376

    
8377
    storage_type = self.op.storage_type
8378

    
8379
    if (constants.SO_FIX_CONSISTENCY not in
8380
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8381
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8382
                                 " repaired" % storage_type,
8383
                                 errors.ECODE_INVAL)
8384

    
8385
  def ExpandNames(self):
8386
    self.needed_locks = {
8387
      locking.LEVEL_NODE: [self.op.node_name],
8388
      }
8389

    
8390
  def _CheckFaultyDisks(self, instance, node_name):
8391
    """Ensure faulty disks abort the opcode or at least warn."""
8392
    try:
8393
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8394
                                  node_name, True):
8395
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8396
                                   " node '%s'" % (instance.name, node_name),
8397
                                   errors.ECODE_STATE)
8398
    except errors.OpPrereqError, err:
8399
      if self.op.ignore_consistency:
8400
        self.proc.LogWarning(str(err.args[0]))
8401
      else:
8402
        raise
8403

    
8404
  def CheckPrereq(self):
8405
    """Check prerequisites.
8406

8407
    """
8408
    # Check whether any instance on this node has faulty disks
8409
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8410
      if not inst.admin_up:
8411
        continue
8412
      check_nodes = set(inst.all_nodes)
8413
      check_nodes.discard(self.op.node_name)
8414
      for inst_node_name in check_nodes:
8415
        self._CheckFaultyDisks(inst, inst_node_name)
8416

    
8417
  def Exec(self, feedback_fn):
8418
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8419
                (self.op.name, self.op.node_name))
8420

    
8421
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8422
    result = self.rpc.call_storage_execute(self.op.node_name,
8423
                                           self.op.storage_type, st_args,
8424
                                           self.op.name,
8425
                                           constants.SO_FIX_CONSISTENCY)
8426
    result.Raise("Failed to repair storage unit '%s' on %s" %
8427
                 (self.op.name, self.op.node_name))
8428

    
8429

    
8430
class LUNodeEvacuationStrategy(NoHooksLU):
8431
  """Computes the node evacuation strategy.
8432

8433
  """
8434
  _OP_PARAMS = [
8435
    ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8436
    ("remote_node", None, ht.TMaybeString),
8437
    ("iallocator", None, ht.TMaybeString),
8438
    ]
8439
  REQ_BGL = False
8440

    
8441
  def CheckArguments(self):
8442
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8443

    
8444
  def ExpandNames(self):
8445
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8446
    self.needed_locks = locks = {}
8447
    if self.op.remote_node is None:
8448
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8449
    else:
8450
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8451
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8452

    
8453
  def Exec(self, feedback_fn):
8454
    if self.op.remote_node is not None:
8455
      instances = []
8456
      for node in self.op.nodes:
8457
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8458
      result = []
8459
      for i in instances:
8460
        if i.primary_node == self.op.remote_node:
8461
          raise errors.OpPrereqError("Node %s is the primary node of"
8462
                                     " instance %s, cannot use it as"
8463
                                     " secondary" %
8464
                                     (self.op.remote_node, i.name),
8465
                                     errors.ECODE_INVAL)
8466
        result.append([i.name, self.op.remote_node])
8467
    else:
8468
      ial = IAllocator(self.cfg, self.rpc,
8469
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8470
                       evac_nodes=self.op.nodes)
8471
      ial.Run(self.op.iallocator, validate=True)
8472
      if not ial.success:
8473
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8474
                                 errors.ECODE_NORES)
8475
      result = ial.result
8476
    return result
8477

    
8478

    
8479
class LUGrowDisk(LogicalUnit):
8480
  """Grow a disk of an instance.
8481

8482
  """
8483
  HPATH = "disk-grow"
8484
  HTYPE = constants.HTYPE_INSTANCE
8485
  _OP_PARAMS = [
8486
    _PInstanceName,
8487
    ("disk", ht.NoDefault, ht.TInt),
8488
    ("amount", ht.NoDefault, ht.TInt),
8489
    ("wait_for_sync", True, ht.TBool),
8490
    ]
8491
  REQ_BGL = False
8492

    
8493
  def ExpandNames(self):
8494
    self._ExpandAndLockInstance()
8495
    self.needed_locks[locking.LEVEL_NODE] = []
8496
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8497

    
8498
  def DeclareLocks(self, level):
8499
    if level == locking.LEVEL_NODE:
8500
      self._LockInstancesNodes()
8501

    
8502
  def BuildHooksEnv(self):
8503
    """Build hooks env.
8504

8505
    This runs on the master, the primary and all the secondaries.
8506

8507
    """
8508
    env = {
8509
      "DISK": self.op.disk,
8510
      "AMOUNT": self.op.amount,
8511
      }
8512
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8513
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8514
    return env, nl, nl
8515

    
8516
  def CheckPrereq(self):
8517
    """Check prerequisites.
8518

8519
    This checks that the instance is in the cluster.
8520

8521
    """
8522
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8523
    assert instance is not None, \
8524
      "Cannot retrieve locked instance %s" % self.op.instance_name
8525
    nodenames = list(instance.all_nodes)
8526
    for node in nodenames:
8527
      _CheckNodeOnline(self, node)
8528

    
8529
    self.instance = instance
8530

    
8531
    if instance.disk_template not in constants.DTS_GROWABLE:
8532
      raise errors.OpPrereqError("Instance's disk layout does not support"
8533
                                 " growing.", errors.ECODE_INVAL)
8534

    
8535
    self.disk = instance.FindDisk(self.op.disk)
8536

    
8537
    if instance.disk_template != constants.DT_FILE:
8538
      # TODO: check the free disk space for file, when that feature will be
8539
      # supported
8540
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8541

    
8542
  def Exec(self, feedback_fn):
8543
    """Execute disk grow.
8544

8545
    """
8546
    instance = self.instance
8547
    disk = self.disk
8548

    
8549
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8550
    if not disks_ok:
8551
      raise errors.OpExecError("Cannot activate block device to grow")
8552

    
8553
    for node in instance.all_nodes:
8554
      self.cfg.SetDiskID(disk, node)
8555
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8556
      result.Raise("Grow request failed to node %s" % node)
8557

    
8558
      # TODO: Rewrite code to work properly
8559
      # DRBD goes into sync mode for a short amount of time after executing the
8560
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8561
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8562
      # time is a work-around.
8563
      time.sleep(5)
8564

    
8565
    disk.RecordGrow(self.op.amount)
8566
    self.cfg.Update(instance, feedback_fn)
8567
    if self.op.wait_for_sync:
8568
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8569
      if disk_abort:
8570
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8571
                             " status.\nPlease check the instance.")
8572
      if not instance.admin_up:
8573
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8574
    elif not instance.admin_up:
8575
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8576
                           " not supposed to be running because no wait for"
8577
                           " sync mode was requested.")
8578

    
8579

    
8580
class LUQueryInstanceData(NoHooksLU):
8581
  """Query runtime instance data.
8582

8583
  """
8584
  _OP_PARAMS = [
8585
    ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
8586
    ("static", False, ht.TBool),
8587
    ]
8588
  REQ_BGL = False
8589

    
8590
  def ExpandNames(self):
8591
    self.needed_locks = {}
8592
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8593

    
8594
    if self.op.instances:
8595
      self.wanted_names = []
8596
      for name in self.op.instances:
8597
        full_name = _ExpandInstanceName(self.cfg, name)
8598
        self.wanted_names.append(full_name)
8599
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8600
    else:
8601
      self.wanted_names = None
8602
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8603

    
8604
    self.needed_locks[locking.LEVEL_NODE] = []
8605
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8606

    
8607
  def DeclareLocks(self, level):
8608
    if level == locking.LEVEL_NODE:
8609
      self._LockInstancesNodes()
8610

    
8611
  def CheckPrereq(self):
8612
    """Check prerequisites.
8613

8614
    This only checks the optional instance list against the existing names.
8615

8616
    """
8617
    if self.wanted_names is None:
8618
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8619

    
8620
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8621
                             in self.wanted_names]
8622

    
8623
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8624
    """Returns the status of a block device
8625

8626
    """
8627
    if self.op.static or not node:
8628
      return None
8629

    
8630
    self.cfg.SetDiskID(dev, node)
8631

    
8632
    result = self.rpc.call_blockdev_find(node, dev)
8633
    if result.offline:
8634
      return None
8635

    
8636
    result.Raise("Can't compute disk status for %s" % instance_name)
8637

    
8638
    status = result.payload
8639
    if status is None:
8640
      return None
8641

    
8642
    return (status.dev_path, status.major, status.minor,
8643
            status.sync_percent, status.estimated_time,
8644
            status.is_degraded, status.ldisk_status)
8645

    
8646
  def _ComputeDiskStatus(self, instance, snode, dev):
8647
    """Compute block device status.
8648

8649
    """
8650
    if dev.dev_type in constants.LDS_DRBD:
8651
      # we change the snode then (otherwise we use the one passed in)
8652
      if dev.logical_id[0] == instance.primary_node:
8653
        snode = dev.logical_id[1]
8654
      else:
8655
        snode = dev.logical_id[0]
8656

    
8657
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8658
                                              instance.name, dev)
8659
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8660

    
8661
    if dev.children:
8662
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8663
                      for child in dev.children]
8664
    else:
8665
      dev_children = []
8666

    
8667
    data = {
8668
      "iv_name": dev.iv_name,
8669
      "dev_type": dev.dev_type,
8670
      "logical_id": dev.logical_id,
8671
      "physical_id": dev.physical_id,
8672
      "pstatus": dev_pstatus,
8673
      "sstatus": dev_sstatus,
8674
      "children": dev_children,
8675
      "mode": dev.mode,
8676
      "size": dev.size,
8677
      }
8678

    
8679
    return data
8680

    
8681
  def Exec(self, feedback_fn):
8682
    """Gather and return data"""
8683
    result = {}
8684

    
8685
    cluster = self.cfg.GetClusterInfo()
8686

    
8687
    for instance in self.wanted_instances:
8688
      if not self.op.static:
8689
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8690
                                                  instance.name,
8691
                                                  instance.hypervisor)
8692
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8693
        remote_info = remote_info.payload
8694
        if remote_info and "state" in remote_info:
8695
          remote_state = "up"
8696
        else:
8697
          remote_state = "down"
8698
      else:
8699
        remote_state = None
8700
      if instance.admin_up:
8701
        config_state = "up"
8702
      else:
8703
        config_state = "down"
8704

    
8705
      disks = [self._ComputeDiskStatus(instance, None, device)
8706
               for device in instance.disks]
8707

    
8708
      idict = {
8709
        "name": instance.name,
8710
        "config_state": config_state,
8711
        "run_state": remote_state,
8712
        "pnode": instance.primary_node,
8713
        "snodes": instance.secondary_nodes,
8714
        "os": instance.os,
8715
        # this happens to be the same format used for hooks
8716
        "nics": _NICListToTuple(self, instance.nics),
8717
        "disk_template": instance.disk_template,
8718
        "disks": disks,
8719
        "hypervisor": instance.hypervisor,
8720
        "network_port": instance.network_port,
8721
        "hv_instance": instance.hvparams,
8722
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8723
        "be_instance": instance.beparams,
8724
        "be_actual": cluster.FillBE(instance),
8725
        "os_instance": instance.osparams,
8726
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8727
        "serial_no": instance.serial_no,
8728
        "mtime": instance.mtime,
8729
        "ctime": instance.ctime,
8730
        "uuid": instance.uuid,
8731
        }
8732

    
8733
      result[instance.name] = idict
8734

    
8735
    return result
8736

    
8737

    
8738
class LUSetInstanceParams(LogicalUnit):
8739
  """Modifies an instances's parameters.
8740

8741
  """
8742
  HPATH = "instance-modify"
8743
  HTYPE = constants.HTYPE_INSTANCE
8744
  _OP_PARAMS = [
8745
    _PInstanceName,
8746
    ("nics", ht.EmptyList, ht.TList),
8747
    ("disks", ht.EmptyList, ht.TList),
8748
    ("beparams", ht.EmptyDict, ht.TDict),
8749
    ("hvparams", ht.EmptyDict, ht.TDict),
8750
    ("disk_template", None, ht.TMaybeString),
8751
    ("remote_node", None, ht.TMaybeString),
8752
    ("os_name", None, ht.TMaybeString),
8753
    ("force_variant", False, ht.TBool),
8754
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
8755
    _PForce,
8756
    ]
8757
  REQ_BGL = False
8758

    
8759
  def CheckArguments(self):
8760
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8761
            self.op.hvparams or self.op.beparams or self.op.os_name):
8762
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8763

    
8764
    if self.op.hvparams:
8765
      _CheckGlobalHvParams(self.op.hvparams)
8766

    
8767
    # Disk validation
8768
    disk_addremove = 0
8769
    for disk_op, disk_dict in self.op.disks:
8770
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8771
      if disk_op == constants.DDM_REMOVE:
8772
        disk_addremove += 1
8773
        continue
8774
      elif disk_op == constants.DDM_ADD:
8775
        disk_addremove += 1
8776
      else:
8777
        if not isinstance(disk_op, int):
8778
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8779
        if not isinstance(disk_dict, dict):
8780
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8781
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8782

    
8783
      if disk_op == constants.DDM_ADD:
8784
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8785
        if mode not in constants.DISK_ACCESS_SET:
8786
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8787
                                     errors.ECODE_INVAL)
8788
        size = disk_dict.get('size', None)
8789
        if size is None:
8790
          raise errors.OpPrereqError("Required disk parameter size missing",
8791
                                     errors.ECODE_INVAL)
8792
        try:
8793
          size = int(size)
8794
        except (TypeError, ValueError), err:
8795
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8796
                                     str(err), errors.ECODE_INVAL)
8797
        disk_dict['size'] = size
8798
      else:
8799
        # modification of disk
8800
        if 'size' in disk_dict:
8801
          raise errors.OpPrereqError("Disk size change not possible, use"
8802
                                     " grow-disk", errors.ECODE_INVAL)
8803

    
8804
    if disk_addremove > 1:
8805
      raise errors.OpPrereqError("Only one disk add or remove operation"
8806
                                 " supported at a time", errors.ECODE_INVAL)
8807

    
8808
    if self.op.disks and self.op.disk_template is not None:
8809
      raise errors.OpPrereqError("Disk template conversion and other disk"
8810
                                 " changes not supported at the same time",
8811
                                 errors.ECODE_INVAL)
8812

    
8813
    if self.op.disk_template:
8814
      _CheckDiskTemplate(self.op.disk_template)
8815
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8816
          self.op.remote_node is None):
8817
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8818
                                   " one requires specifying a secondary node",
8819
                                   errors.ECODE_INVAL)
8820

    
8821
    # NIC validation
8822
    nic_addremove = 0
8823
    for nic_op, nic_dict in self.op.nics:
8824
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8825
      if nic_op == constants.DDM_REMOVE:
8826
        nic_addremove += 1
8827
        continue
8828
      elif nic_op == constants.DDM_ADD:
8829
        nic_addremove += 1
8830
      else:
8831
        if not isinstance(nic_op, int):
8832
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8833
        if not isinstance(nic_dict, dict):
8834
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8835
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8836

    
8837
      # nic_dict should be a dict
8838
      nic_ip = nic_dict.get('ip', None)
8839
      if nic_ip is not None:
8840
        if nic_ip.lower() == constants.VALUE_NONE:
8841
          nic_dict['ip'] = None
8842
        else:
8843
          if not netutils.IPAddress.IsValid(nic_ip):
8844
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8845
                                       errors.ECODE_INVAL)
8846

    
8847
      nic_bridge = nic_dict.get('bridge', None)
8848
      nic_link = nic_dict.get('link', None)
8849
      if nic_bridge and nic_link:
8850
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8851
                                   " at the same time", errors.ECODE_INVAL)
8852
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8853
        nic_dict['bridge'] = None
8854
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8855
        nic_dict['link'] = None
8856

    
8857
      if nic_op == constants.DDM_ADD:
8858
        nic_mac = nic_dict.get('mac', None)
8859
        if nic_mac is None:
8860
          nic_dict['mac'] = constants.VALUE_AUTO
8861

    
8862
      if 'mac' in nic_dict:
8863
        nic_mac = nic_dict['mac']
8864
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8865
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8866

    
8867
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8868
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8869
                                     " modifying an existing nic",
8870
                                     errors.ECODE_INVAL)
8871

    
8872
    if nic_addremove > 1:
8873
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8874
                                 " supported at a time", errors.ECODE_INVAL)
8875

    
8876
  def ExpandNames(self):
8877
    self._ExpandAndLockInstance()
8878
    self.needed_locks[locking.LEVEL_NODE] = []
8879
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8880

    
8881
  def DeclareLocks(self, level):
8882
    if level == locking.LEVEL_NODE:
8883
      self._LockInstancesNodes()
8884
      if self.op.disk_template and self.op.remote_node:
8885
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8886
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8887

    
8888
  def BuildHooksEnv(self):
8889
    """Build hooks env.
8890

8891
    This runs on the master, primary and secondaries.
8892

8893
    """
8894
    args = dict()
8895
    if constants.BE_MEMORY in self.be_new:
8896
      args['memory'] = self.be_new[constants.BE_MEMORY]
8897
    if constants.BE_VCPUS in self.be_new:
8898
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8899
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8900
    # information at all.
8901
    if self.op.nics:
8902
      args['nics'] = []
8903
      nic_override = dict(self.op.nics)
8904
      for idx, nic in enumerate(self.instance.nics):
8905
        if idx in nic_override:
8906
          this_nic_override = nic_override[idx]
8907
        else:
8908
          this_nic_override = {}
8909
        if 'ip' in this_nic_override:
8910
          ip = this_nic_override['ip']
8911
        else:
8912
          ip = nic.ip
8913
        if 'mac' in this_nic_override:
8914
          mac = this_nic_override['mac']
8915
        else:
8916
          mac = nic.mac
8917
        if idx in self.nic_pnew:
8918
          nicparams = self.nic_pnew[idx]
8919
        else:
8920
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8921
        mode = nicparams[constants.NIC_MODE]
8922
        link = nicparams[constants.NIC_LINK]
8923
        args['nics'].append((ip, mac, mode, link))
8924
      if constants.DDM_ADD in nic_override:
8925
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8926
        mac = nic_override[constants.DDM_ADD]['mac']
8927
        nicparams = self.nic_pnew[constants.DDM_ADD]
8928
        mode = nicparams[constants.NIC_MODE]
8929
        link = nicparams[constants.NIC_LINK]
8930
        args['nics'].append((ip, mac, mode, link))
8931
      elif constants.DDM_REMOVE in nic_override:
8932
        del args['nics'][-1]
8933

    
8934
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8935
    if self.op.disk_template:
8936
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8937
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8938
    return env, nl, nl
8939

    
8940
  def CheckPrereq(self):
8941
    """Check prerequisites.
8942

8943
    This only checks the instance list against the existing names.
8944

8945
    """
8946
    # checking the new params on the primary/secondary nodes
8947

    
8948
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8949
    cluster = self.cluster = self.cfg.GetClusterInfo()
8950
    assert self.instance is not None, \
8951
      "Cannot retrieve locked instance %s" % self.op.instance_name
8952
    pnode = instance.primary_node
8953
    nodelist = list(instance.all_nodes)
8954

    
8955
    # OS change
8956
    if self.op.os_name and not self.op.force:
8957
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8958
                      self.op.force_variant)
8959
      instance_os = self.op.os_name
8960
    else:
8961
      instance_os = instance.os
8962

    
8963
    if self.op.disk_template:
8964
      if instance.disk_template == self.op.disk_template:
8965
        raise errors.OpPrereqError("Instance already has disk template %s" %
8966
                                   instance.disk_template, errors.ECODE_INVAL)
8967

    
8968
      if (instance.disk_template,
8969
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8970
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8971
                                   " %s to %s" % (instance.disk_template,
8972
                                                  self.op.disk_template),
8973
                                   errors.ECODE_INVAL)
8974
      _CheckInstanceDown(self, instance, "cannot change disk template")
8975
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8976
        if self.op.remote_node == pnode:
8977
          raise errors.OpPrereqError("Given new secondary node %s is the same"
8978
                                     " as the primary node of the instance" %
8979
                                     self.op.remote_node, errors.ECODE_STATE)
8980
        _CheckNodeOnline(self, self.op.remote_node)
8981
        _CheckNodeNotDrained(self, self.op.remote_node)
8982
        disks = [{"size": d.size} for d in instance.disks]
8983
        required = _ComputeDiskSize(self.op.disk_template, disks)
8984
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8985

    
8986
    # hvparams processing
8987
    if self.op.hvparams:
8988
      hv_type = instance.hypervisor
8989
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8990
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8991
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8992

    
8993
      # local check
8994
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8995
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8996
      self.hv_new = hv_new # the new actual values
8997
      self.hv_inst = i_hvdict # the new dict (without defaults)
8998
    else:
8999
      self.hv_new = self.hv_inst = {}
9000

    
9001
    # beparams processing
9002
    if self.op.beparams:
9003
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9004
                                   use_none=True)
9005
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9006
      be_new = cluster.SimpleFillBE(i_bedict)
9007
      self.be_new = be_new # the new actual values
9008
      self.be_inst = i_bedict # the new dict (without defaults)
9009
    else:
9010
      self.be_new = self.be_inst = {}
9011

    
9012
    # osparams processing
9013
    if self.op.osparams:
9014
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9015
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9016
      self.os_inst = i_osdict # the new dict (without defaults)
9017
    else:
9018
      self.os_inst = {}
9019

    
9020
    self.warn = []
9021

    
9022
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9023
      mem_check_list = [pnode]
9024
      if be_new[constants.BE_AUTO_BALANCE]:
9025
        # either we changed auto_balance to yes or it was from before
9026
        mem_check_list.extend(instance.secondary_nodes)
9027
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9028
                                                  instance.hypervisor)
9029
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
9030
                                         instance.hypervisor)
9031
      pninfo = nodeinfo[pnode]
9032
      msg = pninfo.fail_msg
9033
      if msg:
9034
        # Assume the primary node is unreachable and go ahead
9035
        self.warn.append("Can't get info from primary node %s: %s" %
9036
                         (pnode,  msg))
9037
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9038
        self.warn.append("Node data from primary node %s doesn't contain"
9039
                         " free memory information" % pnode)
9040
      elif instance_info.fail_msg:
9041
        self.warn.append("Can't get instance runtime information: %s" %
9042
                        instance_info.fail_msg)
9043
      else:
9044
        if instance_info.payload:
9045
          current_mem = int(instance_info.payload['memory'])
9046
        else:
9047
          # Assume instance not running
9048
          # (there is a slight race condition here, but it's not very probable,
9049
          # and we have no other way to check)
9050
          current_mem = 0
9051
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9052
                    pninfo.payload['memory_free'])
9053
        if miss_mem > 0:
9054
          raise errors.OpPrereqError("This change will prevent the instance"
9055
                                     " from starting, due to %d MB of memory"
9056
                                     " missing on its primary node" % miss_mem,
9057
                                     errors.ECODE_NORES)
9058

    
9059
      if be_new[constants.BE_AUTO_BALANCE]:
9060
        for node, nres in nodeinfo.items():
9061
          if node not in instance.secondary_nodes:
9062
            continue
9063
          msg = nres.fail_msg
9064
          if msg:
9065
            self.warn.append("Can't get info from secondary node %s: %s" %
9066
                             (node, msg))
9067
          elif not isinstance(nres.payload.get('memory_free', None), int):
9068
            self.warn.append("Secondary node %s didn't return free"
9069
                             " memory information" % node)
9070
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9071
            self.warn.append("Not enough memory to failover instance to"
9072
                             " secondary node %s" % node)
9073

    
9074
    # NIC processing
9075
    self.nic_pnew = {}
9076
    self.nic_pinst = {}
9077
    for nic_op, nic_dict in self.op.nics:
9078
      if nic_op == constants.DDM_REMOVE:
9079
        if not instance.nics:
9080
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9081
                                     errors.ECODE_INVAL)
9082
        continue
9083
      if nic_op != constants.DDM_ADD:
9084
        # an existing nic
9085
        if not instance.nics:
9086
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9087
                                     " no NICs" % nic_op,
9088
                                     errors.ECODE_INVAL)
9089
        if nic_op < 0 or nic_op >= len(instance.nics):
9090
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9091
                                     " are 0 to %d" %
9092
                                     (nic_op, len(instance.nics) - 1),
9093
                                     errors.ECODE_INVAL)
9094
        old_nic_params = instance.nics[nic_op].nicparams
9095
        old_nic_ip = instance.nics[nic_op].ip
9096
      else:
9097
        old_nic_params = {}
9098
        old_nic_ip = None
9099

    
9100
      update_params_dict = dict([(key, nic_dict[key])
9101
                                 for key in constants.NICS_PARAMETERS
9102
                                 if key in nic_dict])
9103

    
9104
      if 'bridge' in nic_dict:
9105
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9106

    
9107
      new_nic_params = _GetUpdatedParams(old_nic_params,
9108
                                         update_params_dict)
9109
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9110
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9111
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9112
      self.nic_pinst[nic_op] = new_nic_params
9113
      self.nic_pnew[nic_op] = new_filled_nic_params
9114
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9115

    
9116
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9117
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9118
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9119
        if msg:
9120
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9121
          if self.op.force:
9122
            self.warn.append(msg)
9123
          else:
9124
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9125
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9126
        if 'ip' in nic_dict:
9127
          nic_ip = nic_dict['ip']
9128
        else:
9129
          nic_ip = old_nic_ip
9130
        if nic_ip is None:
9131
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9132
                                     ' on a routed nic', errors.ECODE_INVAL)
9133
      if 'mac' in nic_dict:
9134
        nic_mac = nic_dict['mac']
9135
        if nic_mac is None:
9136
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9137
                                     errors.ECODE_INVAL)
9138
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9139
          # otherwise generate the mac
9140
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9141
        else:
9142
          # or validate/reserve the current one
9143
          try:
9144
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9145
          except errors.ReservationError:
9146
            raise errors.OpPrereqError("MAC address %s already in use"
9147
                                       " in cluster" % nic_mac,
9148
                                       errors.ECODE_NOTUNIQUE)
9149

    
9150
    # DISK processing
9151
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9152
      raise errors.OpPrereqError("Disk operations not supported for"
9153
                                 " diskless instances",
9154
                                 errors.ECODE_INVAL)
9155
    for disk_op, _ in self.op.disks:
9156
      if disk_op == constants.DDM_REMOVE:
9157
        if len(instance.disks) == 1:
9158
          raise errors.OpPrereqError("Cannot remove the last disk of"
9159
                                     " an instance", errors.ECODE_INVAL)
9160
        _CheckInstanceDown(self, instance, "cannot remove disks")
9161

    
9162
      if (disk_op == constants.DDM_ADD and
9163
          len(instance.nics) >= constants.MAX_DISKS):
9164
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9165
                                   " add more" % constants.MAX_DISKS,
9166
                                   errors.ECODE_STATE)
9167
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9168
        # an existing disk
9169
        if disk_op < 0 or disk_op >= len(instance.disks):
9170
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9171
                                     " are 0 to %d" %
9172
                                     (disk_op, len(instance.disks)),
9173
                                     errors.ECODE_INVAL)
9174

    
9175
    return
9176

    
9177
  def _ConvertPlainToDrbd(self, feedback_fn):
9178
    """Converts an instance from plain to drbd.
9179

9180
    """
9181
    feedback_fn("Converting template to drbd")
9182
    instance = self.instance
9183
    pnode = instance.primary_node
9184
    snode = self.op.remote_node
9185

    
9186
    # create a fake disk info for _GenerateDiskTemplate
9187
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9188
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9189
                                      instance.name, pnode, [snode],
9190
                                      disk_info, None, None, 0)
9191
    info = _GetInstanceInfoText(instance)
9192
    feedback_fn("Creating aditional volumes...")
9193
    # first, create the missing data and meta devices
9194
    for disk in new_disks:
9195
      # unfortunately this is... not too nice
9196
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9197
                            info, True)
9198
      for child in disk.children:
9199
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9200
    # at this stage, all new LVs have been created, we can rename the
9201
    # old ones
9202
    feedback_fn("Renaming original volumes...")
9203
    rename_list = [(o, n.children[0].logical_id)
9204
                   for (o, n) in zip(instance.disks, new_disks)]
9205
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9206
    result.Raise("Failed to rename original LVs")
9207

    
9208
    feedback_fn("Initializing DRBD devices...")
9209
    # all child devices are in place, we can now create the DRBD devices
9210
    for disk in new_disks:
9211
      for node in [pnode, snode]:
9212
        f_create = node == pnode
9213
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9214

    
9215
    # at this point, the instance has been modified
9216
    instance.disk_template = constants.DT_DRBD8
9217
    instance.disks = new_disks
9218
    self.cfg.Update(instance, feedback_fn)
9219

    
9220
    # disks are created, waiting for sync
9221
    disk_abort = not _WaitForSync(self, instance)
9222
    if disk_abort:
9223
      raise errors.OpExecError("There are some degraded disks for"
9224
                               " this instance, please cleanup manually")
9225

    
9226
  def _ConvertDrbdToPlain(self, feedback_fn):
9227
    """Converts an instance from drbd to plain.
9228

9229
    """
9230
    instance = self.instance
9231
    assert len(instance.secondary_nodes) == 1
9232
    pnode = instance.primary_node
9233
    snode = instance.secondary_nodes[0]
9234
    feedback_fn("Converting template to plain")
9235

    
9236
    old_disks = instance.disks
9237
    new_disks = [d.children[0] for d in old_disks]
9238

    
9239
    # copy over size and mode
9240
    for parent, child in zip(old_disks, new_disks):
9241
      child.size = parent.size
9242
      child.mode = parent.mode
9243

    
9244
    # update instance structure
9245
    instance.disks = new_disks
9246
    instance.disk_template = constants.DT_PLAIN
9247
    self.cfg.Update(instance, feedback_fn)
9248

    
9249
    feedback_fn("Removing volumes on the secondary node...")
9250
    for disk in old_disks:
9251
      self.cfg.SetDiskID(disk, snode)
9252
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9253
      if msg:
9254
        self.LogWarning("Could not remove block device %s on node %s,"
9255
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9256

    
9257
    feedback_fn("Removing unneeded volumes on the primary node...")
9258
    for idx, disk in enumerate(old_disks):
9259
      meta = disk.children[1]
9260
      self.cfg.SetDiskID(meta, pnode)
9261
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9262
      if msg:
9263
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9264
                        " continuing anyway: %s", idx, pnode, msg)
9265

    
9266

    
9267
  def Exec(self, feedback_fn):
9268
    """Modifies an instance.
9269

9270
    All parameters take effect only at the next restart of the instance.
9271

9272
    """
9273
    # Process here the warnings from CheckPrereq, as we don't have a
9274
    # feedback_fn there.
9275
    for warn in self.warn:
9276
      feedback_fn("WARNING: %s" % warn)
9277

    
9278
    result = []
9279
    instance = self.instance
9280
    # disk changes
9281
    for disk_op, disk_dict in self.op.disks:
9282
      if disk_op == constants.DDM_REMOVE:
9283
        # remove the last disk
9284
        device = instance.disks.pop()
9285
        device_idx = len(instance.disks)
9286
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9287
          self.cfg.SetDiskID(disk, node)
9288
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9289
          if msg:
9290
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9291
                            " continuing anyway", device_idx, node, msg)
9292
        result.append(("disk/%d" % device_idx, "remove"))
9293
      elif disk_op == constants.DDM_ADD:
9294
        # add a new disk
9295
        if instance.disk_template == constants.DT_FILE:
9296
          file_driver, file_path = instance.disks[0].logical_id
9297
          file_path = os.path.dirname(file_path)
9298
        else:
9299
          file_driver = file_path = None
9300
        disk_idx_base = len(instance.disks)
9301
        new_disk = _GenerateDiskTemplate(self,
9302
                                         instance.disk_template,
9303
                                         instance.name, instance.primary_node,
9304
                                         instance.secondary_nodes,
9305
                                         [disk_dict],
9306
                                         file_path,
9307
                                         file_driver,
9308
                                         disk_idx_base)[0]
9309
        instance.disks.append(new_disk)
9310
        info = _GetInstanceInfoText(instance)
9311

    
9312
        logging.info("Creating volume %s for instance %s",
9313
                     new_disk.iv_name, instance.name)
9314
        # Note: this needs to be kept in sync with _CreateDisks
9315
        #HARDCODE
9316
        for node in instance.all_nodes:
9317
          f_create = node == instance.primary_node
9318
          try:
9319
            _CreateBlockDev(self, node, instance, new_disk,
9320
                            f_create, info, f_create)
9321
          except errors.OpExecError, err:
9322
            self.LogWarning("Failed to create volume %s (%s) on"
9323
                            " node %s: %s",
9324
                            new_disk.iv_name, new_disk, node, err)
9325
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9326
                       (new_disk.size, new_disk.mode)))
9327
      else:
9328
        # change a given disk
9329
        instance.disks[disk_op].mode = disk_dict['mode']
9330
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9331

    
9332
    if self.op.disk_template:
9333
      r_shut = _ShutdownInstanceDisks(self, instance)
9334
      if not r_shut:
9335
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9336
                                 " proceed with disk template conversion")
9337
      mode = (instance.disk_template, self.op.disk_template)
9338
      try:
9339
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9340
      except:
9341
        self.cfg.ReleaseDRBDMinors(instance.name)
9342
        raise
9343
      result.append(("disk_template", self.op.disk_template))
9344

    
9345
    # NIC changes
9346
    for nic_op, nic_dict in self.op.nics:
9347
      if nic_op == constants.DDM_REMOVE:
9348
        # remove the last nic
9349
        del instance.nics[-1]
9350
        result.append(("nic.%d" % len(instance.nics), "remove"))
9351
      elif nic_op == constants.DDM_ADD:
9352
        # mac and bridge should be set, by now
9353
        mac = nic_dict['mac']
9354
        ip = nic_dict.get('ip', None)
9355
        nicparams = self.nic_pinst[constants.DDM_ADD]
9356
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9357
        instance.nics.append(new_nic)
9358
        result.append(("nic.%d" % (len(instance.nics) - 1),
9359
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9360
                       (new_nic.mac, new_nic.ip,
9361
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9362
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9363
                       )))
9364
      else:
9365
        for key in 'mac', 'ip':
9366
          if key in nic_dict:
9367
            setattr(instance.nics[nic_op], key, nic_dict[key])
9368
        if nic_op in self.nic_pinst:
9369
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9370
        for key, val in nic_dict.iteritems():
9371
          result.append(("nic.%s/%d" % (key, nic_op), val))
9372

    
9373
    # hvparams changes
9374
    if self.op.hvparams:
9375
      instance.hvparams = self.hv_inst
9376
      for key, val in self.op.hvparams.iteritems():
9377
        result.append(("hv/%s" % key, val))
9378

    
9379
    # beparams changes
9380
    if self.op.beparams:
9381
      instance.beparams = self.be_inst
9382
      for key, val in self.op.beparams.iteritems():
9383
        result.append(("be/%s" % key, val))
9384

    
9385
    # OS change
9386
    if self.op.os_name:
9387
      instance.os = self.op.os_name
9388

    
9389
    # osparams changes
9390
    if self.op.osparams:
9391
      instance.osparams = self.os_inst
9392
      for key, val in self.op.osparams.iteritems():
9393
        result.append(("os/%s" % key, val))
9394

    
9395
    self.cfg.Update(instance, feedback_fn)
9396

    
9397
    return result
9398

    
9399
  _DISK_CONVERSIONS = {
9400
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9401
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9402
    }
9403

    
9404

    
9405
class LUQueryExports(NoHooksLU):
9406
  """Query the exports list
9407

9408
  """
9409
  _OP_PARAMS = [
9410
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9411
    ("use_locking", False, ht.TBool),
9412
    ]
9413
  REQ_BGL = False
9414

    
9415
  def ExpandNames(self):
9416
    self.needed_locks = {}
9417
    self.share_locks[locking.LEVEL_NODE] = 1
9418
    if not self.op.nodes:
9419
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9420
    else:
9421
      self.needed_locks[locking.LEVEL_NODE] = \
9422
        _GetWantedNodes(self, self.op.nodes)
9423

    
9424
  def Exec(self, feedback_fn):
9425
    """Compute the list of all the exported system images.
9426

9427
    @rtype: dict
9428
    @return: a dictionary with the structure node->(export-list)
9429
        where export-list is a list of the instances exported on
9430
        that node.
9431

9432
    """
9433
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9434
    rpcresult = self.rpc.call_export_list(self.nodes)
9435
    result = {}
9436
    for node in rpcresult:
9437
      if rpcresult[node].fail_msg:
9438
        result[node] = False
9439
      else:
9440
        result[node] = rpcresult[node].payload
9441

    
9442
    return result
9443

    
9444

    
9445
class LUPrepareExport(NoHooksLU):
9446
  """Prepares an instance for an export and returns useful information.
9447

9448
  """
9449
  _OP_PARAMS = [
9450
    _PInstanceName,
9451
    ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9452
    ]
9453
  REQ_BGL = False
9454

    
9455
  def ExpandNames(self):
9456
    self._ExpandAndLockInstance()
9457

    
9458
  def CheckPrereq(self):
9459
    """Check prerequisites.
9460

9461
    """
9462
    instance_name = self.op.instance_name
9463

    
9464
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9465
    assert self.instance is not None, \
9466
          "Cannot retrieve locked instance %s" % self.op.instance_name
9467
    _CheckNodeOnline(self, self.instance.primary_node)
9468

    
9469
    self._cds = _GetClusterDomainSecret()
9470

    
9471
  def Exec(self, feedback_fn):
9472
    """Prepares an instance for an export.
9473

9474
    """
9475
    instance = self.instance
9476

    
9477
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9478
      salt = utils.GenerateSecret(8)
9479

    
9480
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9481
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9482
                                              constants.RIE_CERT_VALIDITY)
9483
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9484

    
9485
      (name, cert_pem) = result.payload
9486

    
9487
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9488
                                             cert_pem)
9489

    
9490
      return {
9491
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9492
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9493
                          salt),
9494
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9495
        }
9496

    
9497
    return None
9498

    
9499

    
9500
class LUExportInstance(LogicalUnit):
9501
  """Export an instance to an image in the cluster.
9502

9503
  """
9504
  HPATH = "instance-export"
9505
  HTYPE = constants.HTYPE_INSTANCE
9506
  _OP_PARAMS = [
9507
    _PInstanceName,
9508
    ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9509
    ("shutdown", True, ht.TBool),
9510
    _PShutdownTimeout,
9511
    ("remove_instance", False, ht.TBool),
9512
    ("ignore_remove_failures", False, ht.TBool),
9513
    ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9514
    ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9515
    ("destination_x509_ca", None, ht.TMaybeString),
9516
    ]
9517
  REQ_BGL = False
9518

    
9519
  def CheckArguments(self):
9520
    """Check the arguments.
9521

9522
    """
9523
    self.x509_key_name = self.op.x509_key_name
9524
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9525

    
9526
    if self.op.remove_instance and not self.op.shutdown:
9527
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9528
                                 " down before")
9529

    
9530
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9531
      if not self.x509_key_name:
9532
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9533
                                   errors.ECODE_INVAL)
9534

    
9535
      if not self.dest_x509_ca_pem:
9536
        raise errors.OpPrereqError("Missing destination X509 CA",
9537
                                   errors.ECODE_INVAL)
9538

    
9539
  def ExpandNames(self):
9540
    self._ExpandAndLockInstance()
9541

    
9542
    # Lock all nodes for local exports
9543
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9544
      # FIXME: lock only instance primary and destination node
9545
      #
9546
      # Sad but true, for now we have do lock all nodes, as we don't know where
9547
      # the previous export might be, and in this LU we search for it and
9548
      # remove it from its current node. In the future we could fix this by:
9549
      #  - making a tasklet to search (share-lock all), then create the
9550
      #    new one, then one to remove, after
9551
      #  - removing the removal operation altogether
9552
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9553

    
9554
  def DeclareLocks(self, level):
9555
    """Last minute lock declaration."""
9556
    # All nodes are locked anyway, so nothing to do here.
9557

    
9558
  def BuildHooksEnv(self):
9559
    """Build hooks env.
9560

9561
    This will run on the master, primary node and target node.
9562

9563
    """
9564
    env = {
9565
      "EXPORT_MODE": self.op.mode,
9566
      "EXPORT_NODE": self.op.target_node,
9567
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9568
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9569
      # TODO: Generic function for boolean env variables
9570
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9571
      }
9572

    
9573
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9574

    
9575
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9576

    
9577
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9578
      nl.append(self.op.target_node)
9579

    
9580
    return env, nl, nl
9581

    
9582
  def CheckPrereq(self):
9583
    """Check prerequisites.
9584

9585
    This checks that the instance and node names are valid.
9586

9587
    """
9588
    instance_name = self.op.instance_name
9589

    
9590
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9591
    assert self.instance is not None, \
9592
          "Cannot retrieve locked instance %s" % self.op.instance_name
9593
    _CheckNodeOnline(self, self.instance.primary_node)
9594

    
9595
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9596
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9597
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9598
      assert self.dst_node is not None
9599

    
9600
      _CheckNodeOnline(self, self.dst_node.name)
9601
      _CheckNodeNotDrained(self, self.dst_node.name)
9602

    
9603
      self._cds = None
9604
      self.dest_disk_info = None
9605
      self.dest_x509_ca = None
9606

    
9607
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9608
      self.dst_node = None
9609

    
9610
      if len(self.op.target_node) != len(self.instance.disks):
9611
        raise errors.OpPrereqError(("Received destination information for %s"
9612
                                    " disks, but instance %s has %s disks") %
9613
                                   (len(self.op.target_node), instance_name,
9614
                                    len(self.instance.disks)),
9615
                                   errors.ECODE_INVAL)
9616

    
9617
      cds = _GetClusterDomainSecret()
9618

    
9619
      # Check X509 key name
9620
      try:
9621
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9622
      except (TypeError, ValueError), err:
9623
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9624

    
9625
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9626
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9627
                                   errors.ECODE_INVAL)
9628

    
9629
      # Load and verify CA
9630
      try:
9631
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9632
      except OpenSSL.crypto.Error, err:
9633
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9634
                                   (err, ), errors.ECODE_INVAL)
9635

    
9636
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9637
      if errcode is not None:
9638
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9639
                                   (msg, ), errors.ECODE_INVAL)
9640

    
9641
      self.dest_x509_ca = cert
9642

    
9643
      # Verify target information
9644
      disk_info = []
9645
      for idx, disk_data in enumerate(self.op.target_node):
9646
        try:
9647
          (host, port, magic) = \
9648
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9649
        except errors.GenericError, err:
9650
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9651
                                     (idx, err), errors.ECODE_INVAL)
9652

    
9653
        disk_info.append((host, port, magic))
9654

    
9655
      assert len(disk_info) == len(self.op.target_node)
9656
      self.dest_disk_info = disk_info
9657

    
9658
    else:
9659
      raise errors.ProgrammerError("Unhandled export mode %r" %
9660
                                   self.op.mode)
9661

    
9662
    # instance disk type verification
9663
    # TODO: Implement export support for file-based disks
9664
    for disk in self.instance.disks:
9665
      if disk.dev_type == constants.LD_FILE:
9666
        raise errors.OpPrereqError("Export not supported for instances with"
9667
                                   " file-based disks", errors.ECODE_INVAL)
9668

    
9669
  def _CleanupExports(self, feedback_fn):
9670
    """Removes exports of current instance from all other nodes.
9671

9672
    If an instance in a cluster with nodes A..D was exported to node C, its
9673
    exports will be removed from the nodes A, B and D.
9674

9675
    """
9676
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9677

    
9678
    nodelist = self.cfg.GetNodeList()
9679
    nodelist.remove(self.dst_node.name)
9680

    
9681
    # on one-node clusters nodelist will be empty after the removal
9682
    # if we proceed the backup would be removed because OpQueryExports
9683
    # substitutes an empty list with the full cluster node list.
9684
    iname = self.instance.name
9685
    if nodelist:
9686
      feedback_fn("Removing old exports for instance %s" % iname)
9687
      exportlist = self.rpc.call_export_list(nodelist)
9688
      for node in exportlist:
9689
        if exportlist[node].fail_msg:
9690
          continue
9691
        if iname in exportlist[node].payload:
9692
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9693
          if msg:
9694
            self.LogWarning("Could not remove older export for instance %s"
9695
                            " on node %s: %s", iname, node, msg)
9696

    
9697
  def Exec(self, feedback_fn):
9698
    """Export an instance to an image in the cluster.
9699

9700
    """
9701
    assert self.op.mode in constants.EXPORT_MODES
9702

    
9703
    instance = self.instance
9704
    src_node = instance.primary_node
9705

    
9706
    if self.op.shutdown:
9707
      # shutdown the instance, but not the disks
9708
      feedback_fn("Shutting down instance %s" % instance.name)
9709
      result = self.rpc.call_instance_shutdown(src_node, instance,
9710
                                               self.op.shutdown_timeout)
9711
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9712
      result.Raise("Could not shutdown instance %s on"
9713
                   " node %s" % (instance.name, src_node))
9714

    
9715
    # set the disks ID correctly since call_instance_start needs the
9716
    # correct drbd minor to create the symlinks
9717
    for disk in instance.disks:
9718
      self.cfg.SetDiskID(disk, src_node)
9719

    
9720
    activate_disks = (not instance.admin_up)
9721

    
9722
    if activate_disks:
9723
      # Activate the instance disks if we'exporting a stopped instance
9724
      feedback_fn("Activating disks for %s" % instance.name)
9725
      _StartInstanceDisks(self, instance, None)
9726

    
9727
    try:
9728
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9729
                                                     instance)
9730

    
9731
      helper.CreateSnapshots()
9732
      try:
9733
        if (self.op.shutdown and instance.admin_up and
9734
            not self.op.remove_instance):
9735
          assert not activate_disks
9736
          feedback_fn("Starting instance %s" % instance.name)
9737
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9738
          msg = result.fail_msg
9739
          if msg:
9740
            feedback_fn("Failed to start instance: %s" % msg)
9741
            _ShutdownInstanceDisks(self, instance)
9742
            raise errors.OpExecError("Could not start instance: %s" % msg)
9743

    
9744
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9745
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9746
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9747
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9748
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9749

    
9750
          (key_name, _, _) = self.x509_key_name
9751

    
9752
          dest_ca_pem = \
9753
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9754
                                            self.dest_x509_ca)
9755

    
9756
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9757
                                                     key_name, dest_ca_pem,
9758
                                                     timeouts)
9759
      finally:
9760
        helper.Cleanup()
9761

    
9762
      # Check for backwards compatibility
9763
      assert len(dresults) == len(instance.disks)
9764
      assert compat.all(isinstance(i, bool) for i in dresults), \
9765
             "Not all results are boolean: %r" % dresults
9766

    
9767
    finally:
9768
      if activate_disks:
9769
        feedback_fn("Deactivating disks for %s" % instance.name)
9770
        _ShutdownInstanceDisks(self, instance)
9771

    
9772
    if not (compat.all(dresults) and fin_resu):
9773
      failures = []
9774
      if not fin_resu:
9775
        failures.append("export finalization")
9776
      if not compat.all(dresults):
9777
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9778
                               if not dsk)
9779
        failures.append("disk export: disk(s) %s" % fdsk)
9780

    
9781
      raise errors.OpExecError("Export failed, errors in %s" %
9782
                               utils.CommaJoin(failures))
9783

    
9784
    # At this point, the export was successful, we can cleanup/finish
9785

    
9786
    # Remove instance if requested
9787
    if self.op.remove_instance:
9788
      feedback_fn("Removing instance %s" % instance.name)
9789
      _RemoveInstance(self, feedback_fn, instance,
9790
                      self.op.ignore_remove_failures)
9791

    
9792
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9793
      self._CleanupExports(feedback_fn)
9794

    
9795
    return fin_resu, dresults
9796

    
9797

    
9798
class LURemoveExport(NoHooksLU):
9799
  """Remove exports related to the named instance.
9800

9801
  """
9802
  _OP_PARAMS = [
9803
    _PInstanceName,
9804
    ]
9805
  REQ_BGL = False
9806

    
9807
  def ExpandNames(self):
9808
    self.needed_locks = {}
9809
    # We need all nodes to be locked in order for RemoveExport to work, but we
9810
    # don't need to lock the instance itself, as nothing will happen to it (and
9811
    # we can remove exports also for a removed instance)
9812
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9813

    
9814
  def Exec(self, feedback_fn):
9815
    """Remove any export.
9816

9817
    """
9818
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9819
    # If the instance was not found we'll try with the name that was passed in.
9820
    # This will only work if it was an FQDN, though.
9821
    fqdn_warn = False
9822
    if not instance_name:
9823
      fqdn_warn = True
9824
      instance_name = self.op.instance_name
9825

    
9826
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9827
    exportlist = self.rpc.call_export_list(locked_nodes)
9828
    found = False
9829
    for node in exportlist:
9830
      msg = exportlist[node].fail_msg
9831
      if msg:
9832
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9833
        continue
9834
      if instance_name in exportlist[node].payload:
9835
        found = True
9836
        result = self.rpc.call_export_remove(node, instance_name)
9837
        msg = result.fail_msg
9838
        if msg:
9839
          logging.error("Could not remove export for instance %s"
9840
                        " on node %s: %s", instance_name, node, msg)
9841

    
9842
    if fqdn_warn and not found:
9843
      feedback_fn("Export not found. If trying to remove an export belonging"
9844
                  " to a deleted instance please use its Fully Qualified"
9845
                  " Domain Name.")
9846

    
9847

    
9848
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9849
  """Generic tags LU.
9850

9851
  This is an abstract class which is the parent of all the other tags LUs.
9852

9853
  """
9854

    
9855
  def ExpandNames(self):
9856
    self.needed_locks = {}
9857
    if self.op.kind == constants.TAG_NODE:
9858
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9859
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9860
    elif self.op.kind == constants.TAG_INSTANCE:
9861
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9862
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9863

    
9864
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
9865
    # not possible to acquire the BGL based on opcode parameters)
9866

    
9867
  def CheckPrereq(self):
9868
    """Check prerequisites.
9869

9870
    """
9871
    if self.op.kind == constants.TAG_CLUSTER:
9872
      self.target = self.cfg.GetClusterInfo()
9873
    elif self.op.kind == constants.TAG_NODE:
9874
      self.target = self.cfg.GetNodeInfo(self.op.name)
9875
    elif self.op.kind == constants.TAG_INSTANCE:
9876
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9877
    else:
9878
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9879
                                 str(self.op.kind), errors.ECODE_INVAL)
9880

    
9881

    
9882
class LUGetTags(TagsLU):
9883
  """Returns the tags of a given object.
9884

9885
  """
9886
  _OP_PARAMS = [
9887
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
9888
    # Name is only meaningful for nodes and instances
9889
    ("name", ht.NoDefault, ht.TMaybeString),
9890
    ]
9891
  REQ_BGL = False
9892

    
9893
  def ExpandNames(self):
9894
    TagsLU.ExpandNames(self)
9895

    
9896
    # Share locks as this is only a read operation
9897
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9898

    
9899
  def Exec(self, feedback_fn):
9900
    """Returns the tag list.
9901

9902
    """
9903
    return list(self.target.GetTags())
9904

    
9905

    
9906
class LUSearchTags(NoHooksLU):
9907
  """Searches the tags for a given pattern.
9908

9909
  """
9910
  _OP_PARAMS = [
9911
    ("pattern", ht.NoDefault, ht.TNonEmptyString),
9912
    ]
9913
  REQ_BGL = False
9914

    
9915
  def ExpandNames(self):
9916
    self.needed_locks = {}
9917

    
9918
  def CheckPrereq(self):
9919
    """Check prerequisites.
9920

9921
    This checks the pattern passed for validity by compiling it.
9922

9923
    """
9924
    try:
9925
      self.re = re.compile(self.op.pattern)
9926
    except re.error, err:
9927
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9928
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9929

    
9930
  def Exec(self, feedback_fn):
9931
    """Returns the tag list.
9932

9933
    """
9934
    cfg = self.cfg
9935
    tgts = [("/cluster", cfg.GetClusterInfo())]
9936
    ilist = cfg.GetAllInstancesInfo().values()
9937
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9938
    nlist = cfg.GetAllNodesInfo().values()
9939
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9940
    results = []
9941
    for path, target in tgts:
9942
      for tag in target.GetTags():
9943
        if self.re.search(tag):
9944
          results.append((path, tag))
9945
    return results
9946

    
9947

    
9948
class LUAddTags(TagsLU):
9949
  """Sets a tag on a given object.
9950

9951
  """
9952
  _OP_PARAMS = [
9953
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
9954
    # Name is only meaningful for nodes and instances
9955
    ("name", ht.NoDefault, ht.TMaybeString),
9956
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
9957
    ]
9958
  REQ_BGL = False
9959

    
9960
  def CheckPrereq(self):
9961
    """Check prerequisites.
9962

9963
    This checks the type and length of the tag name and value.
9964

9965
    """
9966
    TagsLU.CheckPrereq(self)
9967
    for tag in self.op.tags:
9968
      objects.TaggableObject.ValidateTag(tag)
9969

    
9970
  def Exec(self, feedback_fn):
9971
    """Sets the tag.
9972

9973
    """
9974
    try:
9975
      for tag in self.op.tags:
9976
        self.target.AddTag(tag)
9977
    except errors.TagError, err:
9978
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9979
    self.cfg.Update(self.target, feedback_fn)
9980

    
9981

    
9982
class LUDelTags(TagsLU):
9983
  """Delete a list of tags from a given object.
9984

9985
  """
9986
  _OP_PARAMS = [
9987
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
9988
    # Name is only meaningful for nodes and instances
9989
    ("name", ht.NoDefault, ht.TMaybeString),
9990
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
9991
    ]
9992
  REQ_BGL = False
9993

    
9994
  def CheckPrereq(self):
9995
    """Check prerequisites.
9996

9997
    This checks that we have the given tag.
9998

9999
    """
10000
    TagsLU.CheckPrereq(self)
10001
    for tag in self.op.tags:
10002
      objects.TaggableObject.ValidateTag(tag)
10003
    del_tags = frozenset(self.op.tags)
10004
    cur_tags = self.target.GetTags()
10005

    
10006
    diff_tags = del_tags - cur_tags
10007
    if diff_tags:
10008
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
10009
      raise errors.OpPrereqError("Tag(s) %s not found" %
10010
                                 (utils.CommaJoin(diff_names), ),
10011
                                 errors.ECODE_NOENT)
10012

    
10013
  def Exec(self, feedback_fn):
10014
    """Remove the tag from the object.
10015

10016
    """
10017
    for tag in self.op.tags:
10018
      self.target.RemoveTag(tag)
10019
    self.cfg.Update(self.target, feedback_fn)
10020

    
10021

    
10022
class LUTestDelay(NoHooksLU):
10023
  """Sleep for a specified amount of time.
10024

10025
  This LU sleeps on the master and/or nodes for a specified amount of
10026
  time.
10027

10028
  """
10029
  _OP_PARAMS = [
10030
    ("duration", ht.NoDefault, ht.TFloat),
10031
    ("on_master", True, ht.TBool),
10032
    ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10033
    ("repeat", 0, ht.TPositiveInt)
10034
    ]
10035
  REQ_BGL = False
10036

    
10037
  def ExpandNames(self):
10038
    """Expand names and set required locks.
10039

10040
    This expands the node list, if any.
10041

10042
    """
10043
    self.needed_locks = {}
10044
    if self.op.on_nodes:
10045
      # _GetWantedNodes can be used here, but is not always appropriate to use
10046
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10047
      # more information.
10048
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10049
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10050

    
10051
  def _TestDelay(self):
10052
    """Do the actual sleep.
10053

10054
    """
10055
    if self.op.on_master:
10056
      if not utils.TestDelay(self.op.duration):
10057
        raise errors.OpExecError("Error during master delay test")
10058
    if self.op.on_nodes:
10059
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10060
      for node, node_result in result.items():
10061
        node_result.Raise("Failure during rpc call to node %s" % node)
10062

    
10063
  def Exec(self, feedback_fn):
10064
    """Execute the test delay opcode, with the wanted repetitions.
10065

10066
    """
10067
    if self.op.repeat == 0:
10068
      self._TestDelay()
10069
    else:
10070
      top_value = self.op.repeat - 1
10071
      for i in range(self.op.repeat):
10072
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10073
        self._TestDelay()
10074

    
10075

    
10076
class LUTestJobqueue(NoHooksLU):
10077
  """Utility LU to test some aspects of the job queue.
10078

10079
  """
10080
  _OP_PARAMS = [
10081
    ("notify_waitlock", False, ht.TBool),
10082
    ("notify_exec", False, ht.TBool),
10083
    ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
10084
    ("fail", False, ht.TBool),
10085
    ]
10086
  REQ_BGL = False
10087

    
10088
  # Must be lower than default timeout for WaitForJobChange to see whether it
10089
  # notices changed jobs
10090
  _CLIENT_CONNECT_TIMEOUT = 20.0
10091
  _CLIENT_CONFIRM_TIMEOUT = 60.0
10092

    
10093
  @classmethod
10094
  def _NotifyUsingSocket(cls, cb, errcls):
10095
    """Opens a Unix socket and waits for another program to connect.
10096

10097
    @type cb: callable
10098
    @param cb: Callback to send socket name to client
10099
    @type errcls: class
10100
    @param errcls: Exception class to use for errors
10101

10102
    """
10103
    # Using a temporary directory as there's no easy way to create temporary
10104
    # sockets without writing a custom loop around tempfile.mktemp and
10105
    # socket.bind
10106
    tmpdir = tempfile.mkdtemp()
10107
    try:
10108
      tmpsock = utils.PathJoin(tmpdir, "sock")
10109

    
10110
      logging.debug("Creating temporary socket at %s", tmpsock)
10111
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10112
      try:
10113
        sock.bind(tmpsock)
10114
        sock.listen(1)
10115

    
10116
        # Send details to client
10117
        cb(tmpsock)
10118

    
10119
        # Wait for client to connect before continuing
10120
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10121
        try:
10122
          (conn, _) = sock.accept()
10123
        except socket.error, err:
10124
          raise errcls("Client didn't connect in time (%s)" % err)
10125
      finally:
10126
        sock.close()
10127
    finally:
10128
      # Remove as soon as client is connected
10129
      shutil.rmtree(tmpdir)
10130

    
10131
    # Wait for client to close
10132
    try:
10133
      try:
10134
        # pylint: disable-msg=E1101
10135
        # Instance of '_socketobject' has no ... member
10136
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10137
        conn.recv(1)
10138
      except socket.error, err:
10139
        raise errcls("Client failed to confirm notification (%s)" % err)
10140
    finally:
10141
      conn.close()
10142

    
10143
  def _SendNotification(self, test, arg, sockname):
10144
    """Sends a notification to the client.
10145

10146
    @type test: string
10147
    @param test: Test name
10148
    @param arg: Test argument (depends on test)
10149
    @type sockname: string
10150
    @param sockname: Socket path
10151

10152
    """
10153
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10154

    
10155
  def _Notify(self, prereq, test, arg):
10156
    """Notifies the client of a test.
10157

10158
    @type prereq: bool
10159
    @param prereq: Whether this is a prereq-phase test
10160
    @type test: string
10161
    @param test: Test name
10162
    @param arg: Test argument (depends on test)
10163

10164
    """
10165
    if prereq:
10166
      errcls = errors.OpPrereqError
10167
    else:
10168
      errcls = errors.OpExecError
10169

    
10170
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10171
                                                  test, arg),
10172
                                   errcls)
10173

    
10174
  def CheckArguments(self):
10175
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10176
    self.expandnames_calls = 0
10177

    
10178
  def ExpandNames(self):
10179
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10180
    if checkargs_calls < 1:
10181
      raise errors.ProgrammerError("CheckArguments was not called")
10182

    
10183
    self.expandnames_calls += 1
10184

    
10185
    if self.op.notify_waitlock:
10186
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10187

    
10188
    self.LogInfo("Expanding names")
10189

    
10190
    # Get lock on master node (just to get a lock, not for a particular reason)
10191
    self.needed_locks = {
10192
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10193
      }
10194

    
10195
  def Exec(self, feedback_fn):
10196
    if self.expandnames_calls < 1:
10197
      raise errors.ProgrammerError("ExpandNames was not called")
10198

    
10199
    if self.op.notify_exec:
10200
      self._Notify(False, constants.JQT_EXEC, None)
10201

    
10202
    self.LogInfo("Executing")
10203

    
10204
    if self.op.log_messages:
10205
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10206
      for idx, msg in enumerate(self.op.log_messages):
10207
        self.LogInfo("Sending log message %s", idx + 1)
10208
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10209
        # Report how many test messages have been sent
10210
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10211

    
10212
    if self.op.fail:
10213
      raise errors.OpExecError("Opcode failure was requested")
10214

    
10215
    return True
10216

    
10217

    
10218
class IAllocator(object):
10219
  """IAllocator framework.
10220

10221
  An IAllocator instance has three sets of attributes:
10222
    - cfg that is needed to query the cluster
10223
    - input data (all members of the _KEYS class attribute are required)
10224
    - four buffer attributes (in|out_data|text), that represent the
10225
      input (to the external script) in text and data structure format,
10226
      and the output from it, again in two formats
10227
    - the result variables from the script (success, info, nodes) for
10228
      easy usage
10229

10230
  """
10231
  # pylint: disable-msg=R0902
10232
  # lots of instance attributes
10233
  _ALLO_KEYS = [
10234
    "name", "mem_size", "disks", "disk_template",
10235
    "os", "tags", "nics", "vcpus", "hypervisor",
10236
    ]
10237
  _RELO_KEYS = [
10238
    "name", "relocate_from",
10239
    ]
10240
  _EVAC_KEYS = [
10241
    "evac_nodes",
10242
    ]
10243

    
10244
  def __init__(self, cfg, rpc, mode, **kwargs):
10245
    self.cfg = cfg
10246
    self.rpc = rpc
10247
    # init buffer variables
10248
    self.in_text = self.out_text = self.in_data = self.out_data = None
10249
    # init all input fields so that pylint is happy
10250
    self.mode = mode
10251
    self.mem_size = self.disks = self.disk_template = None
10252
    self.os = self.tags = self.nics = self.vcpus = None
10253
    self.hypervisor = None
10254
    self.relocate_from = None
10255
    self.name = None
10256
    self.evac_nodes = None
10257
    # computed fields
10258
    self.required_nodes = None
10259
    # init result fields
10260
    self.success = self.info = self.result = None
10261
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10262
      keyset = self._ALLO_KEYS
10263
      fn = self._AddNewInstance
10264
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10265
      keyset = self._RELO_KEYS
10266
      fn = self._AddRelocateInstance
10267
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10268
      keyset = self._EVAC_KEYS
10269
      fn = self._AddEvacuateNodes
10270
    else:
10271
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10272
                                   " IAllocator" % self.mode)
10273
    for key in kwargs:
10274
      if key not in keyset:
10275
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
10276
                                     " IAllocator" % key)
10277
      setattr(self, key, kwargs[key])
10278

    
10279
    for key in keyset:
10280
      if key not in kwargs:
10281
        raise errors.ProgrammerError("Missing input parameter '%s' to"
10282
                                     " IAllocator" % key)
10283
    self._BuildInputData(fn)
10284

    
10285
  def _ComputeClusterData(self):
10286
    """Compute the generic allocator input data.
10287

10288
    This is the data that is independent of the actual operation.
10289

10290
    """
10291
    cfg = self.cfg
10292
    cluster_info = cfg.GetClusterInfo()
10293
    # cluster data
10294
    data = {
10295
      "version": constants.IALLOCATOR_VERSION,
10296
      "cluster_name": cfg.GetClusterName(),
10297
      "cluster_tags": list(cluster_info.GetTags()),
10298
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10299
      # we don't have job IDs
10300
      }
10301
    iinfo = cfg.GetAllInstancesInfo().values()
10302
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10303

    
10304
    # node data
10305
    node_list = cfg.GetNodeList()
10306

    
10307
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10308
      hypervisor_name = self.hypervisor
10309
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10310
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10311
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10312
      hypervisor_name = cluster_info.enabled_hypervisors[0]
10313

    
10314
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10315
                                        hypervisor_name)
10316
    node_iinfo = \
10317
      self.rpc.call_all_instances_info(node_list,
10318
                                       cluster_info.enabled_hypervisors)
10319

    
10320
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10321

    
10322
    data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
10323

    
10324
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10325

    
10326
    self.in_data = data
10327

    
10328
  @staticmethod
10329
  def _ComputeNodeGroupData(cfg):
10330
    """Compute node groups data.
10331

10332
    """
10333
    ng = {}
10334
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10335
      ng[guuid] = { "name": gdata.name }
10336
    return ng
10337

    
10338
  @staticmethod
10339
  def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
10340
    """Compute global node data.
10341

10342
    """
10343
    node_results = {}
10344
    for nname, nresult in node_data.items():
10345
      # first fill in static (config-based) values
10346
      ninfo = cfg.GetNodeInfo(nname)
10347
      pnr = {
10348
        "tags": list(ninfo.GetTags()),
10349
        "primary_ip": ninfo.primary_ip,
10350
        "secondary_ip": ninfo.secondary_ip,
10351
        "offline": ninfo.offline,
10352
        "drained": ninfo.drained,
10353
        "master_candidate": ninfo.master_candidate,
10354
        "group": ninfo.group,
10355
        "master_capable": ninfo.master_capable,
10356
        "vm_capable": ninfo.vm_capable,
10357
        }
10358

    
10359
      if not (ninfo.offline or ninfo.drained):
10360
        nresult.Raise("Can't get data for node %s" % nname)
10361
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10362
                                nname)
10363
        remote_info = nresult.payload
10364

    
10365
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
10366
                     'vg_size', 'vg_free', 'cpu_total']:
10367
          if attr not in remote_info:
10368
            raise errors.OpExecError("Node '%s' didn't return attribute"
10369
                                     " '%s'" % (nname, attr))
10370
          if not isinstance(remote_info[attr], int):
10371
            raise errors.OpExecError("Node '%s' returned invalid value"
10372
                                     " for '%s': %s" %
10373
                                     (nname, attr, remote_info[attr]))
10374
        # compute memory used by primary instances
10375
        i_p_mem = i_p_up_mem = 0
10376
        for iinfo, beinfo in i_list:
10377
          if iinfo.primary_node == nname:
10378
            i_p_mem += beinfo[constants.BE_MEMORY]
10379
            if iinfo.name not in node_iinfo[nname].payload:
10380
              i_used_mem = 0
10381
            else:
10382
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10383
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10384
            remote_info['memory_free'] -= max(0, i_mem_diff)
10385

    
10386
            if iinfo.admin_up:
10387
              i_p_up_mem += beinfo[constants.BE_MEMORY]
10388

    
10389
        # compute memory used by instances
10390
        pnr_dyn = {
10391
          "total_memory": remote_info['memory_total'],
10392
          "reserved_memory": remote_info['memory_dom0'],
10393
          "free_memory": remote_info['memory_free'],
10394
          "total_disk": remote_info['vg_size'],
10395
          "free_disk": remote_info['vg_free'],
10396
          "total_cpus": remote_info['cpu_total'],
10397
          "i_pri_memory": i_p_mem,
10398
          "i_pri_up_memory": i_p_up_mem,
10399
          }
10400
        pnr.update(pnr_dyn)
10401

    
10402
      node_results[nname] = pnr
10403

    
10404
    return node_results
10405

    
10406
  @staticmethod
10407
  def _ComputeInstanceData(cluster_info, i_list):
10408
    """Compute global instance data.
10409

10410
    """
10411
    instance_data = {}
10412
    for iinfo, beinfo in i_list:
10413
      nic_data = []
10414
      for nic in iinfo.nics:
10415
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10416
        nic_dict = {"mac": nic.mac,
10417
                    "ip": nic.ip,
10418
                    "mode": filled_params[constants.NIC_MODE],
10419
                    "link": filled_params[constants.NIC_LINK],
10420
                   }
10421
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10422
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10423
        nic_data.append(nic_dict)
10424
      pir = {
10425
        "tags": list(iinfo.GetTags()),
10426
        "admin_up": iinfo.admin_up,
10427
        "vcpus": beinfo[constants.BE_VCPUS],
10428
        "memory": beinfo[constants.BE_MEMORY],
10429
        "os": iinfo.os,
10430
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10431
        "nics": nic_data,
10432
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10433
        "disk_template": iinfo.disk_template,
10434
        "hypervisor": iinfo.hypervisor,
10435
        }
10436
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10437
                                                 pir["disks"])
10438
      instance_data[iinfo.name] = pir
10439

    
10440
    return instance_data
10441

    
10442
  def _AddNewInstance(self):
10443
    """Add new instance data to allocator structure.
10444

10445
    This in combination with _AllocatorGetClusterData will create the
10446
    correct structure needed as input for the allocator.
10447

10448
    The checks for the completeness of the opcode must have already been
10449
    done.
10450

10451
    """
10452
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10453

    
10454
    if self.disk_template in constants.DTS_NET_MIRROR:
10455
      self.required_nodes = 2
10456
    else:
10457
      self.required_nodes = 1
10458
    request = {
10459
      "name": self.name,
10460
      "disk_template": self.disk_template,
10461
      "tags": self.tags,
10462
      "os": self.os,
10463
      "vcpus": self.vcpus,
10464
      "memory": self.mem_size,
10465
      "disks": self.disks,
10466
      "disk_space_total": disk_space,
10467
      "nics": self.nics,
10468
      "required_nodes": self.required_nodes,
10469
      }
10470
    return request
10471

    
10472
  def _AddRelocateInstance(self):
10473
    """Add relocate instance data to allocator structure.
10474

10475
    This in combination with _IAllocatorGetClusterData will create the
10476
    correct structure needed as input for the allocator.
10477

10478
    The checks for the completeness of the opcode must have already been
10479
    done.
10480

10481
    """
10482
    instance = self.cfg.GetInstanceInfo(self.name)
10483
    if instance is None:
10484
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
10485
                                   " IAllocator" % self.name)
10486

    
10487
    if instance.disk_template not in constants.DTS_NET_MIRROR:
10488
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10489
                                 errors.ECODE_INVAL)
10490

    
10491
    if len(instance.secondary_nodes) != 1:
10492
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
10493
                                 errors.ECODE_STATE)
10494

    
10495
    self.required_nodes = 1
10496
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
10497
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10498

    
10499
    request = {
10500
      "name": self.name,
10501
      "disk_space_total": disk_space,
10502
      "required_nodes": self.required_nodes,
10503
      "relocate_from": self.relocate_from,
10504
      }
10505
    return request
10506

    
10507
  def _AddEvacuateNodes(self):
10508
    """Add evacuate nodes data to allocator structure.
10509

10510
    """
10511
    request = {
10512
      "evac_nodes": self.evac_nodes
10513
      }
10514
    return request
10515

    
10516
  def _BuildInputData(self, fn):
10517
    """Build input data structures.
10518

10519
    """
10520
    self._ComputeClusterData()
10521

    
10522
    request = fn()
10523
    request["type"] = self.mode
10524
    self.in_data["request"] = request
10525

    
10526
    self.in_text = serializer.Dump(self.in_data)
10527

    
10528
  def Run(self, name, validate=True, call_fn=None):
10529
    """Run an instance allocator and return the results.
10530

10531
    """
10532
    if call_fn is None:
10533
      call_fn = self.rpc.call_iallocator_runner
10534

    
10535
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10536
    result.Raise("Failure while running the iallocator script")
10537

    
10538
    self.out_text = result.payload
10539
    if validate:
10540
      self._ValidateResult()
10541

    
10542
  def _ValidateResult(self):
10543
    """Process the allocator results.
10544

10545
    This will process and if successful save the result in
10546
    self.out_data and the other parameters.
10547

10548
    """
10549
    try:
10550
      rdict = serializer.Load(self.out_text)
10551
    except Exception, err:
10552
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10553

    
10554
    if not isinstance(rdict, dict):
10555
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10556

    
10557
    # TODO: remove backwards compatiblity in later versions
10558
    if "nodes" in rdict and "result" not in rdict:
10559
      rdict["result"] = rdict["nodes"]
10560
      del rdict["nodes"]
10561

    
10562
    for key in "success", "info", "result":
10563
      if key not in rdict:
10564
        raise errors.OpExecError("Can't parse iallocator results:"
10565
                                 " missing key '%s'" % key)
10566
      setattr(self, key, rdict[key])
10567

    
10568
    if not isinstance(rdict["result"], list):
10569
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10570
                               " is not a list")
10571
    self.out_data = rdict
10572

    
10573

    
10574
class LUTestAllocator(NoHooksLU):
10575
  """Run allocator tests.
10576

10577
  This LU runs the allocator tests
10578

10579
  """
10580
  _OP_PARAMS = [
10581
    ("direction", ht.NoDefault,
10582
     ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10583
    ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
10584
    ("name", ht.NoDefault, ht.TNonEmptyString),
10585
    ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
10586
      ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
10587
               ht.TOr(ht.TNone, ht.TNonEmptyString))))),
10588
    ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
10589
    ("hypervisor", None, ht.TMaybeString),
10590
    ("allocator", None, ht.TMaybeString),
10591
    ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10592
    ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10593
    ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10594
    ("os", None, ht.TMaybeString),
10595
    ("disk_template", None, ht.TMaybeString),
10596
    ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
10597
    ]
10598

    
10599
  def CheckPrereq(self):
10600
    """Check prerequisites.
10601

10602
    This checks the opcode parameters depending on the director and mode test.
10603

10604
    """
10605
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10606
      for attr in ["mem_size", "disks", "disk_template",
10607
                   "os", "tags", "nics", "vcpus"]:
10608
        if not hasattr(self.op, attr):
10609
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10610
                                     attr, errors.ECODE_INVAL)
10611
      iname = self.cfg.ExpandInstanceName(self.op.name)
10612
      if iname is not None:
10613
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10614
                                   iname, errors.ECODE_EXISTS)
10615
      if not isinstance(self.op.nics, list):
10616
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10617
                                   errors.ECODE_INVAL)
10618
      if not isinstance(self.op.disks, list):
10619
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10620
                                   errors.ECODE_INVAL)
10621
      for row in self.op.disks:
10622
        if (not isinstance(row, dict) or
10623
            "size" not in row or
10624
            not isinstance(row["size"], int) or
10625
            "mode" not in row or
10626
            row["mode"] not in ['r', 'w']):
10627
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10628
                                     " parameter", errors.ECODE_INVAL)
10629
      if self.op.hypervisor is None:
10630
        self.op.hypervisor = self.cfg.GetHypervisorType()
10631
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10632
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10633
      self.op.name = fname
10634
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10635
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10636
      if not hasattr(self.op, "evac_nodes"):
10637
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10638
                                   " opcode input", errors.ECODE_INVAL)
10639
    else:
10640
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10641
                                 self.op.mode, errors.ECODE_INVAL)
10642

    
10643
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10644
      if self.op.allocator is None:
10645
        raise errors.OpPrereqError("Missing allocator name",
10646
                                   errors.ECODE_INVAL)
10647
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10648
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10649
                                 self.op.direction, errors.ECODE_INVAL)
10650

    
10651
  def Exec(self, feedback_fn):
10652
    """Run the allocator test.
10653

10654
    """
10655
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10656
      ial = IAllocator(self.cfg, self.rpc,
10657
                       mode=self.op.mode,
10658
                       name=self.op.name,
10659
                       mem_size=self.op.mem_size,
10660
                       disks=self.op.disks,
10661
                       disk_template=self.op.disk_template,
10662
                       os=self.op.os,
10663
                       tags=self.op.tags,
10664
                       nics=self.op.nics,
10665
                       vcpus=self.op.vcpus,
10666
                       hypervisor=self.op.hypervisor,
10667
                       )
10668
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10669
      ial = IAllocator(self.cfg, self.rpc,
10670
                       mode=self.op.mode,
10671
                       name=self.op.name,
10672
                       relocate_from=list(self.relocate_from),
10673
                       )
10674
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10675
      ial = IAllocator(self.cfg, self.rpc,
10676
                       mode=self.op.mode,
10677
                       evac_nodes=self.op.evac_nodes)
10678
    else:
10679
      raise errors.ProgrammerError("Uncatched mode %s in"
10680
                                   " LUTestAllocator.Exec", self.op.mode)
10681

    
10682
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10683
      result = ial.in_text
10684
    else:
10685
      ial.Run(self.op.allocator, validate=False)
10686
      result = ial.out_text
10687
    return result