Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ bf4af505

History | View | Annotate | Download (357.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39

    
40
from ganeti import ssh
41
from ganeti import utils
42
from ganeti import errors
43
from ganeti import hypervisor
44
from ganeti import locking
45
from ganeti import constants
46
from ganeti import objects
47
from ganeti import serializer
48
from ganeti import ssconf
49
from ganeti import uidpool
50
from ganeti import compat
51
from ganeti import masterd
52

    
53
import ganeti.masterd.instance # pylint: disable-msg=W0611
54

    
55

    
56
# Modifiable default values; need to define these here before the
57
# actual LUs
58

    
59
def _EmptyList():
60
  """Returns an empty list.
61

62
  """
63
  return []
64

    
65

    
66
def _EmptyDict():
67
  """Returns an empty dict.
68

69
  """
70
  return {}
71

    
72

    
73
#: The without-default default value
74
_NoDefault = object()
75

    
76

    
77
#: The no-type (value to complex to check it in the type system)
78
_NoType = object()
79

    
80

    
81
# Some basic types
82
def _TNotNone(val):
83
  """Checks if the given value is not None.
84

85
  """
86
  return val is not None
87

    
88

    
89
def _TNone(val):
90
  """Checks if the given value is None.
91

92
  """
93
  return val is None
94

    
95

    
96
def _TBool(val):
97
  """Checks if the given value is a boolean.
98

99
  """
100
  return isinstance(val, bool)
101

    
102

    
103
def _TInt(val):
104
  """Checks if the given value is an integer.
105

106
  """
107
  return isinstance(val, int)
108

    
109

    
110
def _TFloat(val):
111
  """Checks if the given value is a float.
112

113
  """
114
  return isinstance(val, float)
115

    
116

    
117
def _TString(val):
118
  """Checks if the given value is a string.
119

120
  """
121
  return isinstance(val, basestring)
122

    
123

    
124
def _TTrue(val):
125
  """Checks if a given value evaluates to a boolean True value.
126

127
  """
128
  return bool(val)
129

    
130

    
131
def _TElemOf(target_list):
132
  """Builds a function that checks if a given value is a member of a list.
133

134
  """
135
  return lambda val: val in target_list
136

    
137

    
138
# Container types
139
def _TList(val):
140
  """Checks if the given value is a list.
141

142
  """
143
  return isinstance(val, list)
144

    
145

    
146
def _TDict(val):
147
  """Checks if the given value is a dictionary.
148

149
  """
150
  return isinstance(val, dict)
151

    
152

    
153
# Combinator types
154
def _TAnd(*args):
155
  """Combine multiple functions using an AND operation.
156

157
  """
158
  def fn(val):
159
    return compat.all(t(val) for t in args)
160
  return fn
161

    
162

    
163
def _TOr(*args):
164
  """Combine multiple functions using an AND operation.
165

166
  """
167
  def fn(val):
168
    return compat.any(t(val) for t in args)
169
  return fn
170

    
171

    
172
# Type aliases
173

    
174
#: a non-empty string
175
_TNonEmptyString = _TAnd(_TString, _TTrue)
176

    
177

    
178
#: a maybe non-empty string
179
_TMaybeString = _TOr(_TNonEmptyString, _TNone)
180

    
181

    
182
#: a maybe boolean (bool or none)
183
_TMaybeBool = _TOr(_TBool, _TNone)
184

    
185

    
186
#: a positive integer
187
_TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
188

    
189
#: a strictly positive integer
190
_TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
191

    
192

    
193
def _TListOf(my_type):
194
  """Checks if a given value is a list with all elements of the same type.
195

196
  """
197
  return _TAnd(_TList,
198
               lambda lst: compat.all(my_type(v) for v in lst))
199

    
200

    
201
def _TDictOf(key_type, val_type):
202
  """Checks a dict type for the type of its key/values.
203

204
  """
205
  return _TAnd(_TDict,
206
               lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
207
                                and compat.all(val_type(v)
208
                                               for v in my_dict.values())))
209

    
210

    
211
# Common opcode attributes
212

    
213
#: output fields for a query operation
214
_POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
215

    
216

    
217
#: the shutdown timeout
218
_PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
219
                     _TPositiveInt)
220

    
221
#: the force parameter
222
_PForce = ("force", False, _TBool)
223

    
224
#: a required instance name (for single-instance LUs)
225
_PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
226

    
227

    
228
#: a required node name (for single-node LUs)
229
_PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
230

    
231

    
232
# End types
233
class LogicalUnit(object):
234
  """Logical Unit base class.
235

236
  Subclasses must follow these rules:
237
    - implement ExpandNames
238
    - implement CheckPrereq (except when tasklets are used)
239
    - implement Exec (except when tasklets are used)
240
    - implement BuildHooksEnv
241
    - redefine HPATH and HTYPE
242
    - optionally redefine their run requirements:
243
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
244

245
  Note that all commands require root permissions.
246

247
  @ivar dry_run_result: the value (if any) that will be returned to the caller
248
      in dry-run mode (signalled by opcode dry_run parameter)
249
  @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
250
      they should get if not already defined, and types they must match
251

252
  """
253
  HPATH = None
254
  HTYPE = None
255
  _OP_PARAMS = []
256
  REQ_BGL = True
257

    
258
  def __init__(self, processor, op, context, rpc):
259
    """Constructor for LogicalUnit.
260

261
    This needs to be overridden in derived classes in order to check op
262
    validity.
263

264
    """
265
    self.proc = processor
266
    self.op = op
267
    self.cfg = context.cfg
268
    self.context = context
269
    self.rpc = rpc
270
    # Dicts used to declare locking needs to mcpu
271
    self.needed_locks = None
272
    self.acquired_locks = {}
273
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
274
    self.add_locks = {}
275
    self.remove_locks = {}
276
    # Used to force good behavior when calling helper functions
277
    self.recalculate_locks = {}
278
    self.__ssh = None
279
    # logging
280
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
281
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
282
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
283
    # support for dry-run
284
    self.dry_run_result = None
285
    # support for generic debug attribute
286
    if (not hasattr(self.op, "debug_level") or
287
        not isinstance(self.op.debug_level, int)):
288
      self.op.debug_level = 0
289

    
290
    # Tasklets
291
    self.tasklets = None
292

    
293
    # The new kind-of-type-system
294
    op_id = self.op.OP_ID
295
    for attr_name, aval, test in self._OP_PARAMS:
296
      if not hasattr(op, attr_name):
297
        if aval == _NoDefault:
298
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
299
                                     (op_id, attr_name), errors.ECODE_INVAL)
300
        else:
301
          if callable(aval):
302
            dval = aval()
303
          else:
304
            dval = aval
305
          setattr(self.op, attr_name, dval)
306
      attr_val = getattr(op, attr_name)
307
      if test == _NoType:
308
        # no tests here
309
        continue
310
      if not callable(test):
311
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
312
                                     " given type is not a proper type (%s)" %
313
                                     (op_id, attr_name, test))
314
      if not test(attr_val):
315
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
316
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
317
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
318
                                   (op_id, attr_name), errors.ECODE_INVAL)
319

    
320
    self.CheckArguments()
321

    
322
  def __GetSSH(self):
323
    """Returns the SshRunner object
324

325
    """
326
    if not self.__ssh:
327
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
328
    return self.__ssh
329

    
330
  ssh = property(fget=__GetSSH)
331

    
332
  def CheckArguments(self):
333
    """Check syntactic validity for the opcode arguments.
334

335
    This method is for doing a simple syntactic check and ensure
336
    validity of opcode parameters, without any cluster-related
337
    checks. While the same can be accomplished in ExpandNames and/or
338
    CheckPrereq, doing these separate is better because:
339

340
      - ExpandNames is left as as purely a lock-related function
341
      - CheckPrereq is run after we have acquired locks (and possible
342
        waited for them)
343

344
    The function is allowed to change the self.op attribute so that
345
    later methods can no longer worry about missing parameters.
346

347
    """
348
    pass
349

    
350
  def ExpandNames(self):
351
    """Expand names for this LU.
352

353
    This method is called before starting to execute the opcode, and it should
354
    update all the parameters of the opcode to their canonical form (e.g. a
355
    short node name must be fully expanded after this method has successfully
356
    completed). This way locking, hooks, logging, ecc. can work correctly.
357

358
    LUs which implement this method must also populate the self.needed_locks
359
    member, as a dict with lock levels as keys, and a list of needed lock names
360
    as values. Rules:
361

362
      - use an empty dict if you don't need any lock
363
      - if you don't need any lock at a particular level omit that level
364
      - don't put anything for the BGL level
365
      - if you want all locks at a level use locking.ALL_SET as a value
366

367
    If you need to share locks (rather than acquire them exclusively) at one
368
    level you can modify self.share_locks, setting a true value (usually 1) for
369
    that level. By default locks are not shared.
370

371
    This function can also define a list of tasklets, which then will be
372
    executed in order instead of the usual LU-level CheckPrereq and Exec
373
    functions, if those are not defined by the LU.
374

375
    Examples::
376

377
      # Acquire all nodes and one instance
378
      self.needed_locks = {
379
        locking.LEVEL_NODE: locking.ALL_SET,
380
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
381
      }
382
      # Acquire just two nodes
383
      self.needed_locks = {
384
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
385
      }
386
      # Acquire no locks
387
      self.needed_locks = {} # No, you can't leave it to the default value None
388

389
    """
390
    # The implementation of this method is mandatory only if the new LU is
391
    # concurrent, so that old LUs don't need to be changed all at the same
392
    # time.
393
    if self.REQ_BGL:
394
      self.needed_locks = {} # Exclusive LUs don't need locks.
395
    else:
396
      raise NotImplementedError
397

    
398
  def DeclareLocks(self, level):
399
    """Declare LU locking needs for a level
400

401
    While most LUs can just declare their locking needs at ExpandNames time,
402
    sometimes there's the need to calculate some locks after having acquired
403
    the ones before. This function is called just before acquiring locks at a
404
    particular level, but after acquiring the ones at lower levels, and permits
405
    such calculations. It can be used to modify self.needed_locks, and by
406
    default it does nothing.
407

408
    This function is only called if you have something already set in
409
    self.needed_locks for the level.
410

411
    @param level: Locking level which is going to be locked
412
    @type level: member of ganeti.locking.LEVELS
413

414
    """
415

    
416
  def CheckPrereq(self):
417
    """Check prerequisites for this LU.
418

419
    This method should check that the prerequisites for the execution
420
    of this LU are fulfilled. It can do internode communication, but
421
    it should be idempotent - no cluster or system changes are
422
    allowed.
423

424
    The method should raise errors.OpPrereqError in case something is
425
    not fulfilled. Its return value is ignored.
426

427
    This method should also update all the parameters of the opcode to
428
    their canonical form if it hasn't been done by ExpandNames before.
429

430
    """
431
    if self.tasklets is not None:
432
      for (idx, tl) in enumerate(self.tasklets):
433
        logging.debug("Checking prerequisites for tasklet %s/%s",
434
                      idx + 1, len(self.tasklets))
435
        tl.CheckPrereq()
436
    else:
437
      pass
438

    
439
  def Exec(self, feedback_fn):
440
    """Execute the LU.
441

442
    This method should implement the actual work. It should raise
443
    errors.OpExecError for failures that are somewhat dealt with in
444
    code, or expected.
445

446
    """
447
    if self.tasklets is not None:
448
      for (idx, tl) in enumerate(self.tasklets):
449
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
450
        tl.Exec(feedback_fn)
451
    else:
452
      raise NotImplementedError
453

    
454
  def BuildHooksEnv(self):
455
    """Build hooks environment for this LU.
456

457
    This method should return a three-node tuple consisting of: a dict
458
    containing the environment that will be used for running the
459
    specific hook for this LU, a list of node names on which the hook
460
    should run before the execution, and a list of node names on which
461
    the hook should run after the execution.
462

463
    The keys of the dict must not have 'GANETI_' prefixed as this will
464
    be handled in the hooks runner. Also note additional keys will be
465
    added by the hooks runner. If the LU doesn't define any
466
    environment, an empty dict (and not None) should be returned.
467

468
    No nodes should be returned as an empty list (and not None).
469

470
    Note that if the HPATH for a LU class is None, this function will
471
    not be called.
472

473
    """
474
    raise NotImplementedError
475

    
476
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
477
    """Notify the LU about the results of its hooks.
478

479
    This method is called every time a hooks phase is executed, and notifies
480
    the Logical Unit about the hooks' result. The LU can then use it to alter
481
    its result based on the hooks.  By default the method does nothing and the
482
    previous result is passed back unchanged but any LU can define it if it
483
    wants to use the local cluster hook-scripts somehow.
484

485
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
486
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
487
    @param hook_results: the results of the multi-node hooks rpc call
488
    @param feedback_fn: function used send feedback back to the caller
489
    @param lu_result: the previous Exec result this LU had, or None
490
        in the PRE phase
491
    @return: the new Exec result, based on the previous result
492
        and hook results
493

494
    """
495
    # API must be kept, thus we ignore the unused argument and could
496
    # be a function warnings
497
    # pylint: disable-msg=W0613,R0201
498
    return lu_result
499

    
500
  def _ExpandAndLockInstance(self):
501
    """Helper function to expand and lock an instance.
502

503
    Many LUs that work on an instance take its name in self.op.instance_name
504
    and need to expand it and then declare the expanded name for locking. This
505
    function does it, and then updates self.op.instance_name to the expanded
506
    name. It also initializes needed_locks as a dict, if this hasn't been done
507
    before.
508

509
    """
510
    if self.needed_locks is None:
511
      self.needed_locks = {}
512
    else:
513
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
514
        "_ExpandAndLockInstance called with instance-level locks set"
515
    self.op.instance_name = _ExpandInstanceName(self.cfg,
516
                                                self.op.instance_name)
517
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
518

    
519
  def _LockInstancesNodes(self, primary_only=False):
520
    """Helper function to declare instances' nodes for locking.
521

522
    This function should be called after locking one or more instances to lock
523
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
524
    with all primary or secondary nodes for instances already locked and
525
    present in self.needed_locks[locking.LEVEL_INSTANCE].
526

527
    It should be called from DeclareLocks, and for safety only works if
528
    self.recalculate_locks[locking.LEVEL_NODE] is set.
529

530
    In the future it may grow parameters to just lock some instance's nodes, or
531
    to just lock primaries or secondary nodes, if needed.
532

533
    If should be called in DeclareLocks in a way similar to::
534

535
      if level == locking.LEVEL_NODE:
536
        self._LockInstancesNodes()
537

538
    @type primary_only: boolean
539
    @param primary_only: only lock primary nodes of locked instances
540

541
    """
542
    assert locking.LEVEL_NODE in self.recalculate_locks, \
543
      "_LockInstancesNodes helper function called with no nodes to recalculate"
544

    
545
    # TODO: check if we're really been called with the instance locks held
546

    
547
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
548
    # future we might want to have different behaviors depending on the value
549
    # of self.recalculate_locks[locking.LEVEL_NODE]
550
    wanted_nodes = []
551
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
552
      instance = self.context.cfg.GetInstanceInfo(instance_name)
553
      wanted_nodes.append(instance.primary_node)
554
      if not primary_only:
555
        wanted_nodes.extend(instance.secondary_nodes)
556

    
557
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
558
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
559
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
560
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
561

    
562
    del self.recalculate_locks[locking.LEVEL_NODE]
563

    
564

    
565
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
566
  """Simple LU which runs no hooks.
567

568
  This LU is intended as a parent for other LogicalUnits which will
569
  run no hooks, in order to reduce duplicate code.
570

571
  """
572
  HPATH = None
573
  HTYPE = None
574

    
575
  def BuildHooksEnv(self):
576
    """Empty BuildHooksEnv for NoHooksLu.
577

578
    This just raises an error.
579

580
    """
581
    assert False, "BuildHooksEnv called for NoHooksLUs"
582

    
583

    
584
class Tasklet:
585
  """Tasklet base class.
586

587
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
588
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
589
  tasklets know nothing about locks.
590

591
  Subclasses must follow these rules:
592
    - Implement CheckPrereq
593
    - Implement Exec
594

595
  """
596
  def __init__(self, lu):
597
    self.lu = lu
598

    
599
    # Shortcuts
600
    self.cfg = lu.cfg
601
    self.rpc = lu.rpc
602

    
603
  def CheckPrereq(self):
604
    """Check prerequisites for this tasklets.
605

606
    This method should check whether the prerequisites for the execution of
607
    this tasklet are fulfilled. It can do internode communication, but it
608
    should be idempotent - no cluster or system changes are allowed.
609

610
    The method should raise errors.OpPrereqError in case something is not
611
    fulfilled. Its return value is ignored.
612

613
    This method should also update all parameters to their canonical form if it
614
    hasn't been done before.
615

616
    """
617
    pass
618

    
619
  def Exec(self, feedback_fn):
620
    """Execute the tasklet.
621

622
    This method should implement the actual work. It should raise
623
    errors.OpExecError for failures that are somewhat dealt with in code, or
624
    expected.
625

626
    """
627
    raise NotImplementedError
628

    
629

    
630
def _GetWantedNodes(lu, nodes):
631
  """Returns list of checked and expanded node names.
632

633
  @type lu: L{LogicalUnit}
634
  @param lu: the logical unit on whose behalf we execute
635
  @type nodes: list
636
  @param nodes: list of node names or None for all nodes
637
  @rtype: list
638
  @return: the list of nodes, sorted
639
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
640

641
  """
642
  if not nodes:
643
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
644
      " non-empty list of nodes whose name is to be expanded.")
645

    
646
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
647
  return utils.NiceSort(wanted)
648

    
649

    
650
def _GetWantedInstances(lu, instances):
651
  """Returns list of checked and expanded instance names.
652

653
  @type lu: L{LogicalUnit}
654
  @param lu: the logical unit on whose behalf we execute
655
  @type instances: list
656
  @param instances: list of instance names or None for all instances
657
  @rtype: list
658
  @return: the list of instances, sorted
659
  @raise errors.OpPrereqError: if the instances parameter is wrong type
660
  @raise errors.OpPrereqError: if any of the passed instances is not found
661

662
  """
663
  if instances:
664
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
665
  else:
666
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
667
  return wanted
668

    
669

    
670
def _GetUpdatedParams(old_params, update_dict,
671
                      use_default=True, use_none=False):
672
  """Return the new version of a parameter dictionary.
673

674
  @type old_params: dict
675
  @param old_params: old parameters
676
  @type update_dict: dict
677
  @param update_dict: dict containing new parameter values, or
678
      constants.VALUE_DEFAULT to reset the parameter to its default
679
      value
680
  @param use_default: boolean
681
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
682
      values as 'to be deleted' values
683
  @param use_none: boolean
684
  @type use_none: whether to recognise C{None} values as 'to be
685
      deleted' values
686
  @rtype: dict
687
  @return: the new parameter dictionary
688

689
  """
690
  params_copy = copy.deepcopy(old_params)
691
  for key, val in update_dict.iteritems():
692
    if ((use_default and val == constants.VALUE_DEFAULT) or
693
        (use_none and val is None)):
694
      try:
695
        del params_copy[key]
696
      except KeyError:
697
        pass
698
    else:
699
      params_copy[key] = val
700
  return params_copy
701

    
702

    
703
def _CheckOutputFields(static, dynamic, selected):
704
  """Checks whether all selected fields are valid.
705

706
  @type static: L{utils.FieldSet}
707
  @param static: static fields set
708
  @type dynamic: L{utils.FieldSet}
709
  @param dynamic: dynamic fields set
710

711
  """
712
  f = utils.FieldSet()
713
  f.Extend(static)
714
  f.Extend(dynamic)
715

    
716
  delta = f.NonMatching(selected)
717
  if delta:
718
    raise errors.OpPrereqError("Unknown output fields selected: %s"
719
                               % ",".join(delta), errors.ECODE_INVAL)
720

    
721

    
722
def _CheckGlobalHvParams(params):
723
  """Validates that given hypervisor params are not global ones.
724

725
  This will ensure that instances don't get customised versions of
726
  global params.
727

728
  """
729
  used_globals = constants.HVC_GLOBALS.intersection(params)
730
  if used_globals:
731
    msg = ("The following hypervisor parameters are global and cannot"
732
           " be customized at instance level, please modify them at"
733
           " cluster level: %s" % utils.CommaJoin(used_globals))
734
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
735

    
736

    
737
def _CheckNodeOnline(lu, node):
738
  """Ensure that a given node is online.
739

740
  @param lu: the LU on behalf of which we make the check
741
  @param node: the node to check
742
  @raise errors.OpPrereqError: if the node is offline
743

744
  """
745
  if lu.cfg.GetNodeInfo(node).offline:
746
    raise errors.OpPrereqError("Can't use offline node %s" % node,
747
                               errors.ECODE_INVAL)
748

    
749

    
750
def _CheckNodeNotDrained(lu, node):
751
  """Ensure that a given node is not drained.
752

753
  @param lu: the LU on behalf of which we make the check
754
  @param node: the node to check
755
  @raise errors.OpPrereqError: if the node is drained
756

757
  """
758
  if lu.cfg.GetNodeInfo(node).drained:
759
    raise errors.OpPrereqError("Can't use drained node %s" % node,
760
                               errors.ECODE_INVAL)
761

    
762

    
763
def _CheckNodeHasOS(lu, node, os_name, force_variant):
764
  """Ensure that a node supports a given OS.
765

766
  @param lu: the LU on behalf of which we make the check
767
  @param node: the node to check
768
  @param os_name: the OS to query about
769
  @param force_variant: whether to ignore variant errors
770
  @raise errors.OpPrereqError: if the node is not supporting the OS
771

772
  """
773
  result = lu.rpc.call_os_get(node, os_name)
774
  result.Raise("OS '%s' not in supported OS list for node %s" %
775
               (os_name, node),
776
               prereq=True, ecode=errors.ECODE_INVAL)
777
  if not force_variant:
778
    _CheckOSVariant(result.payload, os_name)
779

    
780

    
781
def _RequireFileStorage():
782
  """Checks that file storage is enabled.
783

784
  @raise errors.OpPrereqError: when file storage is disabled
785

786
  """
787
  if not constants.ENABLE_FILE_STORAGE:
788
    raise errors.OpPrereqError("File storage disabled at configure time",
789
                               errors.ECODE_INVAL)
790

    
791

    
792
def _CheckDiskTemplate(template):
793
  """Ensure a given disk template is valid.
794

795
  """
796
  if template not in constants.DISK_TEMPLATES:
797
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
798
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
799
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
800
  if template == constants.DT_FILE:
801
    _RequireFileStorage()
802
  return True
803

    
804

    
805
def _CheckStorageType(storage_type):
806
  """Ensure a given storage type is valid.
807

808
  """
809
  if storage_type not in constants.VALID_STORAGE_TYPES:
810
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
811
                               errors.ECODE_INVAL)
812
  if storage_type == constants.ST_FILE:
813
    _RequireFileStorage()
814
  return True
815

    
816

    
817
def _GetClusterDomainSecret():
818
  """Reads the cluster domain secret.
819

820
  """
821
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
822
                               strict=True)
823

    
824

    
825
def _CheckInstanceDown(lu, instance, reason):
826
  """Ensure that an instance is not running."""
827
  if instance.admin_up:
828
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
829
                               (instance.name, reason), errors.ECODE_STATE)
830

    
831
  pnode = instance.primary_node
832
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
833
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
834
              prereq=True, ecode=errors.ECODE_ENVIRON)
835

    
836
  if instance.name in ins_l.payload:
837
    raise errors.OpPrereqError("Instance %s is running, %s" %
838
                               (instance.name, reason), errors.ECODE_STATE)
839

    
840

    
841
def _ExpandItemName(fn, name, kind):
842
  """Expand an item name.
843

844
  @param fn: the function to use for expansion
845
  @param name: requested item name
846
  @param kind: text description ('Node' or 'Instance')
847
  @return: the resolved (full) name
848
  @raise errors.OpPrereqError: if the item is not found
849

850
  """
851
  full_name = fn(name)
852
  if full_name is None:
853
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
854
                               errors.ECODE_NOENT)
855
  return full_name
856

    
857

    
858
def _ExpandNodeName(cfg, name):
859
  """Wrapper over L{_ExpandItemName} for nodes."""
860
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
861

    
862

    
863
def _ExpandInstanceName(cfg, name):
864
  """Wrapper over L{_ExpandItemName} for instance."""
865
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
866

    
867

    
868
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
869
                          memory, vcpus, nics, disk_template, disks,
870
                          bep, hvp, hypervisor_name):
871
  """Builds instance related env variables for hooks
872

873
  This builds the hook environment from individual variables.
874

875
  @type name: string
876
  @param name: the name of the instance
877
  @type primary_node: string
878
  @param primary_node: the name of the instance's primary node
879
  @type secondary_nodes: list
880
  @param secondary_nodes: list of secondary nodes as strings
881
  @type os_type: string
882
  @param os_type: the name of the instance's OS
883
  @type status: boolean
884
  @param status: the should_run status of the instance
885
  @type memory: string
886
  @param memory: the memory size of the instance
887
  @type vcpus: string
888
  @param vcpus: the count of VCPUs the instance has
889
  @type nics: list
890
  @param nics: list of tuples (ip, mac, mode, link) representing
891
      the NICs the instance has
892
  @type disk_template: string
893
  @param disk_template: the disk template of the instance
894
  @type disks: list
895
  @param disks: the list of (size, mode) pairs
896
  @type bep: dict
897
  @param bep: the backend parameters for the instance
898
  @type hvp: dict
899
  @param hvp: the hypervisor parameters for the instance
900
  @type hypervisor_name: string
901
  @param hypervisor_name: the hypervisor for the instance
902
  @rtype: dict
903
  @return: the hook environment for this instance
904

905
  """
906
  if status:
907
    str_status = "up"
908
  else:
909
    str_status = "down"
910
  env = {
911
    "OP_TARGET": name,
912
    "INSTANCE_NAME": name,
913
    "INSTANCE_PRIMARY": primary_node,
914
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
915
    "INSTANCE_OS_TYPE": os_type,
916
    "INSTANCE_STATUS": str_status,
917
    "INSTANCE_MEMORY": memory,
918
    "INSTANCE_VCPUS": vcpus,
919
    "INSTANCE_DISK_TEMPLATE": disk_template,
920
    "INSTANCE_HYPERVISOR": hypervisor_name,
921
  }
922

    
923
  if nics:
924
    nic_count = len(nics)
925
    for idx, (ip, mac, mode, link) in enumerate(nics):
926
      if ip is None:
927
        ip = ""
928
      env["INSTANCE_NIC%d_IP" % idx] = ip
929
      env["INSTANCE_NIC%d_MAC" % idx] = mac
930
      env["INSTANCE_NIC%d_MODE" % idx] = mode
931
      env["INSTANCE_NIC%d_LINK" % idx] = link
932
      if mode == constants.NIC_MODE_BRIDGED:
933
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
934
  else:
935
    nic_count = 0
936

    
937
  env["INSTANCE_NIC_COUNT"] = nic_count
938

    
939
  if disks:
940
    disk_count = len(disks)
941
    for idx, (size, mode) in enumerate(disks):
942
      env["INSTANCE_DISK%d_SIZE" % idx] = size
943
      env["INSTANCE_DISK%d_MODE" % idx] = mode
944
  else:
945
    disk_count = 0
946

    
947
  env["INSTANCE_DISK_COUNT"] = disk_count
948

    
949
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
950
    for key, value in source.items():
951
      env["INSTANCE_%s_%s" % (kind, key)] = value
952

    
953
  return env
954

    
955

    
956
def _NICListToTuple(lu, nics):
957
  """Build a list of nic information tuples.
958

959
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
960
  value in LUQueryInstanceData.
961

962
  @type lu:  L{LogicalUnit}
963
  @param lu: the logical unit on whose behalf we execute
964
  @type nics: list of L{objects.NIC}
965
  @param nics: list of nics to convert to hooks tuples
966

967
  """
968
  hooks_nics = []
969
  cluster = lu.cfg.GetClusterInfo()
970
  for nic in nics:
971
    ip = nic.ip
972
    mac = nic.mac
973
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
974
    mode = filled_params[constants.NIC_MODE]
975
    link = filled_params[constants.NIC_LINK]
976
    hooks_nics.append((ip, mac, mode, link))
977
  return hooks_nics
978

    
979

    
980
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
981
  """Builds instance related env variables for hooks from an object.
982

983
  @type lu: L{LogicalUnit}
984
  @param lu: the logical unit on whose behalf we execute
985
  @type instance: L{objects.Instance}
986
  @param instance: the instance for which we should build the
987
      environment
988
  @type override: dict
989
  @param override: dictionary with key/values that will override
990
      our values
991
  @rtype: dict
992
  @return: the hook environment dictionary
993

994
  """
995
  cluster = lu.cfg.GetClusterInfo()
996
  bep = cluster.FillBE(instance)
997
  hvp = cluster.FillHV(instance)
998
  args = {
999
    'name': instance.name,
1000
    'primary_node': instance.primary_node,
1001
    'secondary_nodes': instance.secondary_nodes,
1002
    'os_type': instance.os,
1003
    'status': instance.admin_up,
1004
    'memory': bep[constants.BE_MEMORY],
1005
    'vcpus': bep[constants.BE_VCPUS],
1006
    'nics': _NICListToTuple(lu, instance.nics),
1007
    'disk_template': instance.disk_template,
1008
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1009
    'bep': bep,
1010
    'hvp': hvp,
1011
    'hypervisor_name': instance.hypervisor,
1012
  }
1013
  if override:
1014
    args.update(override)
1015
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1016

    
1017

    
1018
def _AdjustCandidatePool(lu, exceptions):
1019
  """Adjust the candidate pool after node operations.
1020

1021
  """
1022
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1023
  if mod_list:
1024
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1025
               utils.CommaJoin(node.name for node in mod_list))
1026
    for name in mod_list:
1027
      lu.context.ReaddNode(name)
1028
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1029
  if mc_now > mc_max:
1030
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1031
               (mc_now, mc_max))
1032

    
1033

    
1034
def _DecideSelfPromotion(lu, exceptions=None):
1035
  """Decide whether I should promote myself as a master candidate.
1036

1037
  """
1038
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1039
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1040
  # the new node will increase mc_max with one, so:
1041
  mc_should = min(mc_should + 1, cp_size)
1042
  return mc_now < mc_should
1043

    
1044

    
1045
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1046
  """Check that the brigdes needed by a list of nics exist.
1047

1048
  """
1049
  cluster = lu.cfg.GetClusterInfo()
1050
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1051
  brlist = [params[constants.NIC_LINK] for params in paramslist
1052
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1053
  if brlist:
1054
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1055
    result.Raise("Error checking bridges on destination node '%s'" %
1056
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1057

    
1058

    
1059
def _CheckInstanceBridgesExist(lu, instance, node=None):
1060
  """Check that the brigdes needed by an instance exist.
1061

1062
  """
1063
  if node is None:
1064
    node = instance.primary_node
1065
  _CheckNicsBridgesExist(lu, instance.nics, node)
1066

    
1067

    
1068
def _CheckOSVariant(os_obj, name):
1069
  """Check whether an OS name conforms to the os variants specification.
1070

1071
  @type os_obj: L{objects.OS}
1072
  @param os_obj: OS object to check
1073
  @type name: string
1074
  @param name: OS name passed by the user, to check for validity
1075

1076
  """
1077
  if not os_obj.supported_variants:
1078
    return
1079
  try:
1080
    variant = name.split("+", 1)[1]
1081
  except IndexError:
1082
    raise errors.OpPrereqError("OS name must include a variant",
1083
                               errors.ECODE_INVAL)
1084

    
1085
  if variant not in os_obj.supported_variants:
1086
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1087

    
1088

    
1089
def _GetNodeInstancesInner(cfg, fn):
1090
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1091

    
1092

    
1093
def _GetNodeInstances(cfg, node_name):
1094
  """Returns a list of all primary and secondary instances on a node.
1095

1096
  """
1097

    
1098
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1099

    
1100

    
1101
def _GetNodePrimaryInstances(cfg, node_name):
1102
  """Returns primary instances on a node.
1103

1104
  """
1105
  return _GetNodeInstancesInner(cfg,
1106
                                lambda inst: node_name == inst.primary_node)
1107

    
1108

    
1109
def _GetNodeSecondaryInstances(cfg, node_name):
1110
  """Returns secondary instances on a node.
1111

1112
  """
1113
  return _GetNodeInstancesInner(cfg,
1114
                                lambda inst: node_name in inst.secondary_nodes)
1115

    
1116

    
1117
def _GetStorageTypeArgs(cfg, storage_type):
1118
  """Returns the arguments for a storage type.
1119

1120
  """
1121
  # Special case for file storage
1122
  if storage_type == constants.ST_FILE:
1123
    # storage.FileStorage wants a list of storage directories
1124
    return [[cfg.GetFileStorageDir()]]
1125

    
1126
  return []
1127

    
1128

    
1129
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1130
  faulty = []
1131

    
1132
  for dev in instance.disks:
1133
    cfg.SetDiskID(dev, node_name)
1134

    
1135
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1136
  result.Raise("Failed to get disk status from node %s" % node_name,
1137
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1138

    
1139
  for idx, bdev_status in enumerate(result.payload):
1140
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1141
      faulty.append(idx)
1142

    
1143
  return faulty
1144

    
1145

    
1146
class LUPostInitCluster(LogicalUnit):
1147
  """Logical unit for running hooks after cluster initialization.
1148

1149
  """
1150
  HPATH = "cluster-init"
1151
  HTYPE = constants.HTYPE_CLUSTER
1152

    
1153
  def BuildHooksEnv(self):
1154
    """Build hooks env.
1155

1156
    """
1157
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1158
    mn = self.cfg.GetMasterNode()
1159
    return env, [], [mn]
1160

    
1161
  def Exec(self, feedback_fn):
1162
    """Nothing to do.
1163

1164
    """
1165
    return True
1166

    
1167

    
1168
class LUDestroyCluster(LogicalUnit):
1169
  """Logical unit for destroying the cluster.
1170

1171
  """
1172
  HPATH = "cluster-destroy"
1173
  HTYPE = constants.HTYPE_CLUSTER
1174

    
1175
  def BuildHooksEnv(self):
1176
    """Build hooks env.
1177

1178
    """
1179
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1180
    return env, [], []
1181

    
1182
  def CheckPrereq(self):
1183
    """Check prerequisites.
1184

1185
    This checks whether the cluster is empty.
1186

1187
    Any errors are signaled by raising errors.OpPrereqError.
1188

1189
    """
1190
    master = self.cfg.GetMasterNode()
1191

    
1192
    nodelist = self.cfg.GetNodeList()
1193
    if len(nodelist) != 1 or nodelist[0] != master:
1194
      raise errors.OpPrereqError("There are still %d node(s) in"
1195
                                 " this cluster." % (len(nodelist) - 1),
1196
                                 errors.ECODE_INVAL)
1197
    instancelist = self.cfg.GetInstanceList()
1198
    if instancelist:
1199
      raise errors.OpPrereqError("There are still %d instance(s) in"
1200
                                 " this cluster." % len(instancelist),
1201
                                 errors.ECODE_INVAL)
1202

    
1203
  def Exec(self, feedback_fn):
1204
    """Destroys the cluster.
1205

1206
    """
1207
    master = self.cfg.GetMasterNode()
1208
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1209

    
1210
    # Run post hooks on master node before it's removed
1211
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1212
    try:
1213
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1214
    except:
1215
      # pylint: disable-msg=W0702
1216
      self.LogWarning("Errors occurred running hooks on %s" % master)
1217

    
1218
    result = self.rpc.call_node_stop_master(master, False)
1219
    result.Raise("Could not disable the master role")
1220

    
1221
    if modify_ssh_setup:
1222
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1223
      utils.CreateBackup(priv_key)
1224
      utils.CreateBackup(pub_key)
1225

    
1226
    return master
1227

    
1228

    
1229
def _VerifyCertificate(filename):
1230
  """Verifies a certificate for LUVerifyCluster.
1231

1232
  @type filename: string
1233
  @param filename: Path to PEM file
1234

1235
  """
1236
  try:
1237
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1238
                                           utils.ReadFile(filename))
1239
  except Exception, err: # pylint: disable-msg=W0703
1240
    return (LUVerifyCluster.ETYPE_ERROR,
1241
            "Failed to load X509 certificate %s: %s" % (filename, err))
1242

    
1243
  (errcode, msg) = \
1244
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1245
                                constants.SSL_CERT_EXPIRATION_ERROR)
1246

    
1247
  if msg:
1248
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1249
  else:
1250
    fnamemsg = None
1251

    
1252
  if errcode is None:
1253
    return (None, fnamemsg)
1254
  elif errcode == utils.CERT_WARNING:
1255
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1256
  elif errcode == utils.CERT_ERROR:
1257
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1258

    
1259
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1260

    
1261

    
1262
class LUVerifyCluster(LogicalUnit):
1263
  """Verifies the cluster status.
1264

1265
  """
1266
  HPATH = "cluster-verify"
1267
  HTYPE = constants.HTYPE_CLUSTER
1268
  _OP_PARAMS = [
1269
    ("skip_checks", _EmptyList,
1270
     _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1271
    ("verbose", False, _TBool),
1272
    ("error_codes", False, _TBool),
1273
    ("debug_simulate_errors", False, _TBool),
1274
    ]
1275
  REQ_BGL = False
1276

    
1277
  TCLUSTER = "cluster"
1278
  TNODE = "node"
1279
  TINSTANCE = "instance"
1280

    
1281
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1282
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1283
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1284
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1285
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1286
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1287
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1288
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1289
  ENODEDRBD = (TNODE, "ENODEDRBD")
1290
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1291
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1292
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1293
  ENODEHV = (TNODE, "ENODEHV")
1294
  ENODELVM = (TNODE, "ENODELVM")
1295
  ENODEN1 = (TNODE, "ENODEN1")
1296
  ENODENET = (TNODE, "ENODENET")
1297
  ENODEOS = (TNODE, "ENODEOS")
1298
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1299
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1300
  ENODERPC = (TNODE, "ENODERPC")
1301
  ENODESSH = (TNODE, "ENODESSH")
1302
  ENODEVERSION = (TNODE, "ENODEVERSION")
1303
  ENODESETUP = (TNODE, "ENODESETUP")
1304
  ENODETIME = (TNODE, "ENODETIME")
1305

    
1306
  ETYPE_FIELD = "code"
1307
  ETYPE_ERROR = "ERROR"
1308
  ETYPE_WARNING = "WARNING"
1309

    
1310
  class NodeImage(object):
1311
    """A class representing the logical and physical status of a node.
1312

1313
    @type name: string
1314
    @ivar name: the node name to which this object refers
1315
    @ivar volumes: a structure as returned from
1316
        L{ganeti.backend.GetVolumeList} (runtime)
1317
    @ivar instances: a list of running instances (runtime)
1318
    @ivar pinst: list of configured primary instances (config)
1319
    @ivar sinst: list of configured secondary instances (config)
1320
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1321
        of this node (config)
1322
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1323
    @ivar dfree: free disk, as reported by the node (runtime)
1324
    @ivar offline: the offline status (config)
1325
    @type rpc_fail: boolean
1326
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1327
        not whether the individual keys were correct) (runtime)
1328
    @type lvm_fail: boolean
1329
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1330
    @type hyp_fail: boolean
1331
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1332
    @type ghost: boolean
1333
    @ivar ghost: whether this is a known node or not (config)
1334
    @type os_fail: boolean
1335
    @ivar os_fail: whether the RPC call didn't return valid OS data
1336
    @type oslist: list
1337
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1338

1339
    """
1340
    def __init__(self, offline=False, name=None):
1341
      self.name = name
1342
      self.volumes = {}
1343
      self.instances = []
1344
      self.pinst = []
1345
      self.sinst = []
1346
      self.sbp = {}
1347
      self.mfree = 0
1348
      self.dfree = 0
1349
      self.offline = offline
1350
      self.rpc_fail = False
1351
      self.lvm_fail = False
1352
      self.hyp_fail = False
1353
      self.ghost = False
1354
      self.os_fail = False
1355
      self.oslist = {}
1356

    
1357
  def ExpandNames(self):
1358
    self.needed_locks = {
1359
      locking.LEVEL_NODE: locking.ALL_SET,
1360
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1361
    }
1362
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1363

    
1364
  def _Error(self, ecode, item, msg, *args, **kwargs):
1365
    """Format an error message.
1366

1367
    Based on the opcode's error_codes parameter, either format a
1368
    parseable error code, or a simpler error string.
1369

1370
    This must be called only from Exec and functions called from Exec.
1371

1372
    """
1373
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1374
    itype, etxt = ecode
1375
    # first complete the msg
1376
    if args:
1377
      msg = msg % args
1378
    # then format the whole message
1379
    if self.op.error_codes:
1380
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1381
    else:
1382
      if item:
1383
        item = " " + item
1384
      else:
1385
        item = ""
1386
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1387
    # and finally report it via the feedback_fn
1388
    self._feedback_fn("  - %s" % msg)
1389

    
1390
  def _ErrorIf(self, cond, *args, **kwargs):
1391
    """Log an error message if the passed condition is True.
1392

1393
    """
1394
    cond = bool(cond) or self.op.debug_simulate_errors
1395
    if cond:
1396
      self._Error(*args, **kwargs)
1397
    # do not mark the operation as failed for WARN cases only
1398
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1399
      self.bad = self.bad or cond
1400

    
1401
  def _VerifyNode(self, ninfo, nresult):
1402
    """Run multiple tests against a node.
1403

1404
    Test list:
1405

1406
      - compares ganeti version
1407
      - checks vg existence and size > 20G
1408
      - checks config file checksum
1409
      - checks ssh to other nodes
1410

1411
    @type ninfo: L{objects.Node}
1412
    @param ninfo: the node to check
1413
    @param nresult: the results from the node
1414
    @rtype: boolean
1415
    @return: whether overall this call was successful (and we can expect
1416
         reasonable values in the respose)
1417

1418
    """
1419
    node = ninfo.name
1420
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1421

    
1422
    # main result, nresult should be a non-empty dict
1423
    test = not nresult or not isinstance(nresult, dict)
1424
    _ErrorIf(test, self.ENODERPC, node,
1425
                  "unable to verify node: no data returned")
1426
    if test:
1427
      return False
1428

    
1429
    # compares ganeti version
1430
    local_version = constants.PROTOCOL_VERSION
1431
    remote_version = nresult.get("version", None)
1432
    test = not (remote_version and
1433
                isinstance(remote_version, (list, tuple)) and
1434
                len(remote_version) == 2)
1435
    _ErrorIf(test, self.ENODERPC, node,
1436
             "connection to node returned invalid data")
1437
    if test:
1438
      return False
1439

    
1440
    test = local_version != remote_version[0]
1441
    _ErrorIf(test, self.ENODEVERSION, node,
1442
             "incompatible protocol versions: master %s,"
1443
             " node %s", local_version, remote_version[0])
1444
    if test:
1445
      return False
1446

    
1447
    # node seems compatible, we can actually try to look into its results
1448

    
1449
    # full package version
1450
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1451
                  self.ENODEVERSION, node,
1452
                  "software version mismatch: master %s, node %s",
1453
                  constants.RELEASE_VERSION, remote_version[1],
1454
                  code=self.ETYPE_WARNING)
1455

    
1456
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1457
    if isinstance(hyp_result, dict):
1458
      for hv_name, hv_result in hyp_result.iteritems():
1459
        test = hv_result is not None
1460
        _ErrorIf(test, self.ENODEHV, node,
1461
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1462

    
1463

    
1464
    test = nresult.get(constants.NV_NODESETUP,
1465
                           ["Missing NODESETUP results"])
1466
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1467
             "; ".join(test))
1468

    
1469
    return True
1470

    
1471
  def _VerifyNodeTime(self, ninfo, nresult,
1472
                      nvinfo_starttime, nvinfo_endtime):
1473
    """Check the node time.
1474

1475
    @type ninfo: L{objects.Node}
1476
    @param ninfo: the node to check
1477
    @param nresult: the remote results for the node
1478
    @param nvinfo_starttime: the start time of the RPC call
1479
    @param nvinfo_endtime: the end time of the RPC call
1480

1481
    """
1482
    node = ninfo.name
1483
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1484

    
1485
    ntime = nresult.get(constants.NV_TIME, None)
1486
    try:
1487
      ntime_merged = utils.MergeTime(ntime)
1488
    except (ValueError, TypeError):
1489
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1490
      return
1491

    
1492
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1493
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1494
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1495
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1496
    else:
1497
      ntime_diff = None
1498

    
1499
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1500
             "Node time diverges by at least %s from master node time",
1501
             ntime_diff)
1502

    
1503
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1504
    """Check the node time.
1505

1506
    @type ninfo: L{objects.Node}
1507
    @param ninfo: the node to check
1508
    @param nresult: the remote results for the node
1509
    @param vg_name: the configured VG name
1510

1511
    """
1512
    if vg_name is None:
1513
      return
1514

    
1515
    node = ninfo.name
1516
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1517

    
1518
    # checks vg existence and size > 20G
1519
    vglist = nresult.get(constants.NV_VGLIST, None)
1520
    test = not vglist
1521
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1522
    if not test:
1523
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1524
                                            constants.MIN_VG_SIZE)
1525
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1526

    
1527
    # check pv names
1528
    pvlist = nresult.get(constants.NV_PVLIST, None)
1529
    test = pvlist is None
1530
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1531
    if not test:
1532
      # check that ':' is not present in PV names, since it's a
1533
      # special character for lvcreate (denotes the range of PEs to
1534
      # use on the PV)
1535
      for _, pvname, owner_vg in pvlist:
1536
        test = ":" in pvname
1537
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1538
                 " '%s' of VG '%s'", pvname, owner_vg)
1539

    
1540
  def _VerifyNodeNetwork(self, ninfo, nresult):
1541
    """Check the node time.
1542

1543
    @type ninfo: L{objects.Node}
1544
    @param ninfo: the node to check
1545
    @param nresult: the remote results for the node
1546

1547
    """
1548
    node = ninfo.name
1549
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1550

    
1551
    test = constants.NV_NODELIST not in nresult
1552
    _ErrorIf(test, self.ENODESSH, node,
1553
             "node hasn't returned node ssh connectivity data")
1554
    if not test:
1555
      if nresult[constants.NV_NODELIST]:
1556
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1557
          _ErrorIf(True, self.ENODESSH, node,
1558
                   "ssh communication with node '%s': %s", a_node, a_msg)
1559

    
1560
    test = constants.NV_NODENETTEST not in nresult
1561
    _ErrorIf(test, self.ENODENET, node,
1562
             "node hasn't returned node tcp connectivity data")
1563
    if not test:
1564
      if nresult[constants.NV_NODENETTEST]:
1565
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1566
        for anode in nlist:
1567
          _ErrorIf(True, self.ENODENET, node,
1568
                   "tcp communication with node '%s': %s",
1569
                   anode, nresult[constants.NV_NODENETTEST][anode])
1570

    
1571
    test = constants.NV_MASTERIP not in nresult
1572
    _ErrorIf(test, self.ENODENET, node,
1573
             "node hasn't returned node master IP reachability data")
1574
    if not test:
1575
      if not nresult[constants.NV_MASTERIP]:
1576
        if node == self.master_node:
1577
          msg = "the master node cannot reach the master IP (not configured?)"
1578
        else:
1579
          msg = "cannot reach the master IP"
1580
        _ErrorIf(True, self.ENODENET, node, msg)
1581

    
1582

    
1583
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1584
    """Verify an instance.
1585

1586
    This function checks to see if the required block devices are
1587
    available on the instance's node.
1588

1589
    """
1590
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1591
    node_current = instanceconfig.primary_node
1592

    
1593
    node_vol_should = {}
1594
    instanceconfig.MapLVsByNode(node_vol_should)
1595

    
1596
    for node in node_vol_should:
1597
      n_img = node_image[node]
1598
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1599
        # ignore missing volumes on offline or broken nodes
1600
        continue
1601
      for volume in node_vol_should[node]:
1602
        test = volume not in n_img.volumes
1603
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1604
                 "volume %s missing on node %s", volume, node)
1605

    
1606
    if instanceconfig.admin_up:
1607
      pri_img = node_image[node_current]
1608
      test = instance not in pri_img.instances and not pri_img.offline
1609
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1610
               "instance not running on its primary node %s",
1611
               node_current)
1612

    
1613
    for node, n_img in node_image.items():
1614
      if (not node == node_current):
1615
        test = instance in n_img.instances
1616
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1617
                 "instance should not run on node %s", node)
1618

    
1619
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1620
    """Verify if there are any unknown volumes in the cluster.
1621

1622
    The .os, .swap and backup volumes are ignored. All other volumes are
1623
    reported as unknown.
1624

1625
    """
1626
    for node, n_img in node_image.items():
1627
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1628
        # skip non-healthy nodes
1629
        continue
1630
      for volume in n_img.volumes:
1631
        test = (node not in node_vol_should or
1632
                volume not in node_vol_should[node])
1633
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1634
                      "volume %s is unknown", volume)
1635

    
1636
  def _VerifyOrphanInstances(self, instancelist, node_image):
1637
    """Verify the list of running instances.
1638

1639
    This checks what instances are running but unknown to the cluster.
1640

1641
    """
1642
    for node, n_img in node_image.items():
1643
      for o_inst in n_img.instances:
1644
        test = o_inst not in instancelist
1645
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1646
                      "instance %s on node %s should not exist", o_inst, node)
1647

    
1648
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1649
    """Verify N+1 Memory Resilience.
1650

1651
    Check that if one single node dies we can still start all the
1652
    instances it was primary for.
1653

1654
    """
1655
    for node, n_img in node_image.items():
1656
      # This code checks that every node which is now listed as
1657
      # secondary has enough memory to host all instances it is
1658
      # supposed to should a single other node in the cluster fail.
1659
      # FIXME: not ready for failover to an arbitrary node
1660
      # FIXME: does not support file-backed instances
1661
      # WARNING: we currently take into account down instances as well
1662
      # as up ones, considering that even if they're down someone
1663
      # might want to start them even in the event of a node failure.
1664
      for prinode, instances in n_img.sbp.items():
1665
        needed_mem = 0
1666
        for instance in instances:
1667
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1668
          if bep[constants.BE_AUTO_BALANCE]:
1669
            needed_mem += bep[constants.BE_MEMORY]
1670
        test = n_img.mfree < needed_mem
1671
        self._ErrorIf(test, self.ENODEN1, node,
1672
                      "not enough memory on to accommodate"
1673
                      " failovers should peer node %s fail", prinode)
1674

    
1675
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1676
                       master_files):
1677
    """Verifies and computes the node required file checksums.
1678

1679
    @type ninfo: L{objects.Node}
1680
    @param ninfo: the node to check
1681
    @param nresult: the remote results for the node
1682
    @param file_list: required list of files
1683
    @param local_cksum: dictionary of local files and their checksums
1684
    @param master_files: list of files that only masters should have
1685

1686
    """
1687
    node = ninfo.name
1688
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1689

    
1690
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1691
    test = not isinstance(remote_cksum, dict)
1692
    _ErrorIf(test, self.ENODEFILECHECK, node,
1693
             "node hasn't returned file checksum data")
1694
    if test:
1695
      return
1696

    
1697
    for file_name in file_list:
1698
      node_is_mc = ninfo.master_candidate
1699
      must_have = (file_name not in master_files) or node_is_mc
1700
      # missing
1701
      test1 = file_name not in remote_cksum
1702
      # invalid checksum
1703
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1704
      # existing and good
1705
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1706
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1707
               "file '%s' missing", file_name)
1708
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1709
               "file '%s' has wrong checksum", file_name)
1710
      # not candidate and this is not a must-have file
1711
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1712
               "file '%s' should not exist on non master"
1713
               " candidates (and the file is outdated)", file_name)
1714
      # all good, except non-master/non-must have combination
1715
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1716
               "file '%s' should not exist"
1717
               " on non master candidates", file_name)
1718

    
1719
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1720
                      drbd_map):
1721
    """Verifies and the node DRBD status.
1722

1723
    @type ninfo: L{objects.Node}
1724
    @param ninfo: the node to check
1725
    @param nresult: the remote results for the node
1726
    @param instanceinfo: the dict of instances
1727
    @param drbd_helper: the configured DRBD usermode helper
1728
    @param drbd_map: the DRBD map as returned by
1729
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1730

1731
    """
1732
    node = ninfo.name
1733
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1734

    
1735
    if drbd_helper:
1736
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1737
      test = (helper_result == None)
1738
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1739
               "no drbd usermode helper returned")
1740
      if helper_result:
1741
        status, payload = helper_result
1742
        test = not status
1743
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1744
                 "drbd usermode helper check unsuccessful: %s", payload)
1745
        test = status and (payload != drbd_helper)
1746
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1747
                 "wrong drbd usermode helper: %s", payload)
1748

    
1749
    # compute the DRBD minors
1750
    node_drbd = {}
1751
    for minor, instance in drbd_map[node].items():
1752
      test = instance not in instanceinfo
1753
      _ErrorIf(test, self.ECLUSTERCFG, None,
1754
               "ghost instance '%s' in temporary DRBD map", instance)
1755
        # ghost instance should not be running, but otherwise we
1756
        # don't give double warnings (both ghost instance and
1757
        # unallocated minor in use)
1758
      if test:
1759
        node_drbd[minor] = (instance, False)
1760
      else:
1761
        instance = instanceinfo[instance]
1762
        node_drbd[minor] = (instance.name, instance.admin_up)
1763

    
1764
    # and now check them
1765
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1766
    test = not isinstance(used_minors, (tuple, list))
1767
    _ErrorIf(test, self.ENODEDRBD, node,
1768
             "cannot parse drbd status file: %s", str(used_minors))
1769
    if test:
1770
      # we cannot check drbd status
1771
      return
1772

    
1773
    for minor, (iname, must_exist) in node_drbd.items():
1774
      test = minor not in used_minors and must_exist
1775
      _ErrorIf(test, self.ENODEDRBD, node,
1776
               "drbd minor %d of instance %s is not active", minor, iname)
1777
    for minor in used_minors:
1778
      test = minor not in node_drbd
1779
      _ErrorIf(test, self.ENODEDRBD, node,
1780
               "unallocated drbd minor %d is in use", minor)
1781

    
1782
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1783
    """Builds the node OS structures.
1784

1785
    @type ninfo: L{objects.Node}
1786
    @param ninfo: the node to check
1787
    @param nresult: the remote results for the node
1788
    @param nimg: the node image object
1789

1790
    """
1791
    node = ninfo.name
1792
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1793

    
1794
    remote_os = nresult.get(constants.NV_OSLIST, None)
1795
    test = (not isinstance(remote_os, list) or
1796
            not compat.all(isinstance(v, list) and len(v) == 7
1797
                           for v in remote_os))
1798

    
1799
    _ErrorIf(test, self.ENODEOS, node,
1800
             "node hasn't returned valid OS data")
1801

    
1802
    nimg.os_fail = test
1803

    
1804
    if test:
1805
      return
1806

    
1807
    os_dict = {}
1808

    
1809
    for (name, os_path, status, diagnose,
1810
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1811

    
1812
      if name not in os_dict:
1813
        os_dict[name] = []
1814

    
1815
      # parameters is a list of lists instead of list of tuples due to
1816
      # JSON lacking a real tuple type, fix it:
1817
      parameters = [tuple(v) for v in parameters]
1818
      os_dict[name].append((os_path, status, diagnose,
1819
                            set(variants), set(parameters), set(api_ver)))
1820

    
1821
    nimg.oslist = os_dict
1822

    
1823
  def _VerifyNodeOS(self, ninfo, nimg, base):
1824
    """Verifies the node OS list.
1825

1826
    @type ninfo: L{objects.Node}
1827
    @param ninfo: the node to check
1828
    @param nimg: the node image object
1829
    @param base: the 'template' node we match against (e.g. from the master)
1830

1831
    """
1832
    node = ninfo.name
1833
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1834

    
1835
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1836

    
1837
    for os_name, os_data in nimg.oslist.items():
1838
      assert os_data, "Empty OS status for OS %s?!" % os_name
1839
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1840
      _ErrorIf(not f_status, self.ENODEOS, node,
1841
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1842
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1843
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1844
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1845
      # this will catched in backend too
1846
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1847
               and not f_var, self.ENODEOS, node,
1848
               "OS %s with API at least %d does not declare any variant",
1849
               os_name, constants.OS_API_V15)
1850
      # comparisons with the 'base' image
1851
      test = os_name not in base.oslist
1852
      _ErrorIf(test, self.ENODEOS, node,
1853
               "Extra OS %s not present on reference node (%s)",
1854
               os_name, base.name)
1855
      if test:
1856
        continue
1857
      assert base.oslist[os_name], "Base node has empty OS status?"
1858
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1859
      if not b_status:
1860
        # base OS is invalid, skipping
1861
        continue
1862
      for kind, a, b in [("API version", f_api, b_api),
1863
                         ("variants list", f_var, b_var),
1864
                         ("parameters", f_param, b_param)]:
1865
        _ErrorIf(a != b, self.ENODEOS, node,
1866
                 "OS %s %s differs from reference node %s: %s vs. %s",
1867
                 kind, os_name, base.name,
1868
                 utils.CommaJoin(a), utils.CommaJoin(b))
1869

    
1870
    # check any missing OSes
1871
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1872
    _ErrorIf(missing, self.ENODEOS, node,
1873
             "OSes present on reference node %s but missing on this node: %s",
1874
             base.name, utils.CommaJoin(missing))
1875

    
1876
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1877
    """Verifies and updates the node volume data.
1878

1879
    This function will update a L{NodeImage}'s internal structures
1880
    with data from the remote call.
1881

1882
    @type ninfo: L{objects.Node}
1883
    @param ninfo: the node to check
1884
    @param nresult: the remote results for the node
1885
    @param nimg: the node image object
1886
    @param vg_name: the configured VG name
1887

1888
    """
1889
    node = ninfo.name
1890
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1891

    
1892
    nimg.lvm_fail = True
1893
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1894
    if vg_name is None:
1895
      pass
1896
    elif isinstance(lvdata, basestring):
1897
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1898
               utils.SafeEncode(lvdata))
1899
    elif not isinstance(lvdata, dict):
1900
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1901
    else:
1902
      nimg.volumes = lvdata
1903
      nimg.lvm_fail = False
1904

    
1905
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1906
    """Verifies and updates the node instance list.
1907

1908
    If the listing was successful, then updates this node's instance
1909
    list. Otherwise, it marks the RPC call as failed for the instance
1910
    list key.
1911

1912
    @type ninfo: L{objects.Node}
1913
    @param ninfo: the node to check
1914
    @param nresult: the remote results for the node
1915
    @param nimg: the node image object
1916

1917
    """
1918
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1919
    test = not isinstance(idata, list)
1920
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1921
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1922
    if test:
1923
      nimg.hyp_fail = True
1924
    else:
1925
      nimg.instances = idata
1926

    
1927
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1928
    """Verifies and computes a node information map
1929

1930
    @type ninfo: L{objects.Node}
1931
    @param ninfo: the node to check
1932
    @param nresult: the remote results for the node
1933
    @param nimg: the node image object
1934
    @param vg_name: the configured VG name
1935

1936
    """
1937
    node = ninfo.name
1938
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1939

    
1940
    # try to read free memory (from the hypervisor)
1941
    hv_info = nresult.get(constants.NV_HVINFO, None)
1942
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1943
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1944
    if not test:
1945
      try:
1946
        nimg.mfree = int(hv_info["memory_free"])
1947
      except (ValueError, TypeError):
1948
        _ErrorIf(True, self.ENODERPC, node,
1949
                 "node returned invalid nodeinfo, check hypervisor")
1950

    
1951
    # FIXME: devise a free space model for file based instances as well
1952
    if vg_name is not None:
1953
      test = (constants.NV_VGLIST not in nresult or
1954
              vg_name not in nresult[constants.NV_VGLIST])
1955
      _ErrorIf(test, self.ENODELVM, node,
1956
               "node didn't return data for the volume group '%s'"
1957
               " - it is either missing or broken", vg_name)
1958
      if not test:
1959
        try:
1960
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1961
        except (ValueError, TypeError):
1962
          _ErrorIf(True, self.ENODERPC, node,
1963
                   "node returned invalid LVM info, check LVM status")
1964

    
1965
  def BuildHooksEnv(self):
1966
    """Build hooks env.
1967

1968
    Cluster-Verify hooks just ran in the post phase and their failure makes
1969
    the output be logged in the verify output and the verification to fail.
1970

1971
    """
1972
    all_nodes = self.cfg.GetNodeList()
1973
    env = {
1974
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1975
      }
1976
    for node in self.cfg.GetAllNodesInfo().values():
1977
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1978

    
1979
    return env, [], all_nodes
1980

    
1981
  def Exec(self, feedback_fn):
1982
    """Verify integrity of cluster, performing various test on nodes.
1983

1984
    """
1985
    self.bad = False
1986
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1987
    verbose = self.op.verbose
1988
    self._feedback_fn = feedback_fn
1989
    feedback_fn("* Verifying global settings")
1990
    for msg in self.cfg.VerifyConfig():
1991
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1992

    
1993
    # Check the cluster certificates
1994
    for cert_filename in constants.ALL_CERT_FILES:
1995
      (errcode, msg) = _VerifyCertificate(cert_filename)
1996
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1997

    
1998
    vg_name = self.cfg.GetVGName()
1999
    drbd_helper = self.cfg.GetDRBDHelper()
2000
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2001
    cluster = self.cfg.GetClusterInfo()
2002
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2003
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2004
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2005
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2006
                        for iname in instancelist)
2007
    i_non_redundant = [] # Non redundant instances
2008
    i_non_a_balanced = [] # Non auto-balanced instances
2009
    n_offline = 0 # Count of offline nodes
2010
    n_drained = 0 # Count of nodes being drained
2011
    node_vol_should = {}
2012

    
2013
    # FIXME: verify OS list
2014
    # do local checksums
2015
    master_files = [constants.CLUSTER_CONF_FILE]
2016
    master_node = self.master_node = self.cfg.GetMasterNode()
2017
    master_ip = self.cfg.GetMasterIP()
2018

    
2019
    file_names = ssconf.SimpleStore().GetFileList()
2020
    file_names.extend(constants.ALL_CERT_FILES)
2021
    file_names.extend(master_files)
2022
    if cluster.modify_etc_hosts:
2023
      file_names.append(constants.ETC_HOSTS)
2024

    
2025
    local_checksums = utils.FingerprintFiles(file_names)
2026

    
2027
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2028
    node_verify_param = {
2029
      constants.NV_FILELIST: file_names,
2030
      constants.NV_NODELIST: [node.name for node in nodeinfo
2031
                              if not node.offline],
2032
      constants.NV_HYPERVISOR: hypervisors,
2033
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2034
                                  node.secondary_ip) for node in nodeinfo
2035
                                 if not node.offline],
2036
      constants.NV_INSTANCELIST: hypervisors,
2037
      constants.NV_VERSION: None,
2038
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2039
      constants.NV_NODESETUP: None,
2040
      constants.NV_TIME: None,
2041
      constants.NV_MASTERIP: (master_node, master_ip),
2042
      constants.NV_OSLIST: None,
2043
      }
2044

    
2045
    if vg_name is not None:
2046
      node_verify_param[constants.NV_VGLIST] = None
2047
      node_verify_param[constants.NV_LVLIST] = vg_name
2048
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2049
      node_verify_param[constants.NV_DRBDLIST] = None
2050

    
2051
    if drbd_helper:
2052
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2053

    
2054
    # Build our expected cluster state
2055
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2056
                                                 name=node.name))
2057
                      for node in nodeinfo)
2058

    
2059
    for instance in instancelist:
2060
      inst_config = instanceinfo[instance]
2061

    
2062
      for nname in inst_config.all_nodes:
2063
        if nname not in node_image:
2064
          # ghost node
2065
          gnode = self.NodeImage(name=nname)
2066
          gnode.ghost = True
2067
          node_image[nname] = gnode
2068

    
2069
      inst_config.MapLVsByNode(node_vol_should)
2070

    
2071
      pnode = inst_config.primary_node
2072
      node_image[pnode].pinst.append(instance)
2073

    
2074
      for snode in inst_config.secondary_nodes:
2075
        nimg = node_image[snode]
2076
        nimg.sinst.append(instance)
2077
        if pnode not in nimg.sbp:
2078
          nimg.sbp[pnode] = []
2079
        nimg.sbp[pnode].append(instance)
2080

    
2081
    # At this point, we have the in-memory data structures complete,
2082
    # except for the runtime information, which we'll gather next
2083

    
2084
    # Due to the way our RPC system works, exact response times cannot be
2085
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2086
    # time before and after executing the request, we can at least have a time
2087
    # window.
2088
    nvinfo_starttime = time.time()
2089
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2090
                                           self.cfg.GetClusterName())
2091
    nvinfo_endtime = time.time()
2092

    
2093
    all_drbd_map = self.cfg.ComputeDRBDMap()
2094

    
2095
    feedback_fn("* Verifying node status")
2096

    
2097
    refos_img = None
2098

    
2099
    for node_i in nodeinfo:
2100
      node = node_i.name
2101
      nimg = node_image[node]
2102

    
2103
      if node_i.offline:
2104
        if verbose:
2105
          feedback_fn("* Skipping offline node %s" % (node,))
2106
        n_offline += 1
2107
        continue
2108

    
2109
      if node == master_node:
2110
        ntype = "master"
2111
      elif node_i.master_candidate:
2112
        ntype = "master candidate"
2113
      elif node_i.drained:
2114
        ntype = "drained"
2115
        n_drained += 1
2116
      else:
2117
        ntype = "regular"
2118
      if verbose:
2119
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2120

    
2121
      msg = all_nvinfo[node].fail_msg
2122
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2123
      if msg:
2124
        nimg.rpc_fail = True
2125
        continue
2126

    
2127
      nresult = all_nvinfo[node].payload
2128

    
2129
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2130
      self._VerifyNodeNetwork(node_i, nresult)
2131
      self._VerifyNodeLVM(node_i, nresult, vg_name)
2132
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2133
                            master_files)
2134
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2135
                           all_drbd_map)
2136
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2137

    
2138
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2139
      self._UpdateNodeInstances(node_i, nresult, nimg)
2140
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2141
      self._UpdateNodeOS(node_i, nresult, nimg)
2142
      if not nimg.os_fail:
2143
        if refos_img is None:
2144
          refos_img = nimg
2145
        self._VerifyNodeOS(node_i, nimg, refos_img)
2146

    
2147
    feedback_fn("* Verifying instance status")
2148
    for instance in instancelist:
2149
      if verbose:
2150
        feedback_fn("* Verifying instance %s" % instance)
2151
      inst_config = instanceinfo[instance]
2152
      self._VerifyInstance(instance, inst_config, node_image)
2153
      inst_nodes_offline = []
2154

    
2155
      pnode = inst_config.primary_node
2156
      pnode_img = node_image[pnode]
2157
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2158
               self.ENODERPC, pnode, "instance %s, connection to"
2159
               " primary node failed", instance)
2160

    
2161
      if pnode_img.offline:
2162
        inst_nodes_offline.append(pnode)
2163

    
2164
      # If the instance is non-redundant we cannot survive losing its primary
2165
      # node, so we are not N+1 compliant. On the other hand we have no disk
2166
      # templates with more than one secondary so that situation is not well
2167
      # supported either.
2168
      # FIXME: does not support file-backed instances
2169
      if not inst_config.secondary_nodes:
2170
        i_non_redundant.append(instance)
2171
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2172
               instance, "instance has multiple secondary nodes: %s",
2173
               utils.CommaJoin(inst_config.secondary_nodes),
2174
               code=self.ETYPE_WARNING)
2175

    
2176
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2177
        i_non_a_balanced.append(instance)
2178

    
2179
      for snode in inst_config.secondary_nodes:
2180
        s_img = node_image[snode]
2181
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2182
                 "instance %s, connection to secondary node failed", instance)
2183

    
2184
        if s_img.offline:
2185
          inst_nodes_offline.append(snode)
2186

    
2187
      # warn that the instance lives on offline nodes
2188
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2189
               "instance lives on offline node(s) %s",
2190
               utils.CommaJoin(inst_nodes_offline))
2191
      # ... or ghost nodes
2192
      for node in inst_config.all_nodes:
2193
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2194
                 "instance lives on ghost node %s", node)
2195

    
2196
    feedback_fn("* Verifying orphan volumes")
2197
    self._VerifyOrphanVolumes(node_vol_should, node_image)
2198

    
2199
    feedback_fn("* Verifying orphan instances")
2200
    self._VerifyOrphanInstances(instancelist, node_image)
2201

    
2202
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2203
      feedback_fn("* Verifying N+1 Memory redundancy")
2204
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2205

    
2206
    feedback_fn("* Other Notes")
2207
    if i_non_redundant:
2208
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2209
                  % len(i_non_redundant))
2210

    
2211
    if i_non_a_balanced:
2212
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2213
                  % len(i_non_a_balanced))
2214

    
2215
    if n_offline:
2216
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2217

    
2218
    if n_drained:
2219
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2220

    
2221
    return not self.bad
2222

    
2223
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2224
    """Analyze the post-hooks' result
2225

2226
    This method analyses the hook result, handles it, and sends some
2227
    nicely-formatted feedback back to the user.
2228

2229
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2230
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2231
    @param hooks_results: the results of the multi-node hooks rpc call
2232
    @param feedback_fn: function used send feedback back to the caller
2233
    @param lu_result: previous Exec result
2234
    @return: the new Exec result, based on the previous result
2235
        and hook results
2236

2237
    """
2238
    # We only really run POST phase hooks, and are only interested in
2239
    # their results
2240
    if phase == constants.HOOKS_PHASE_POST:
2241
      # Used to change hooks' output to proper indentation
2242
      indent_re = re.compile('^', re.M)
2243
      feedback_fn("* Hooks Results")
2244
      assert hooks_results, "invalid result from hooks"
2245

    
2246
      for node_name in hooks_results:
2247
        res = hooks_results[node_name]
2248
        msg = res.fail_msg
2249
        test = msg and not res.offline
2250
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2251
                      "Communication failure in hooks execution: %s", msg)
2252
        if res.offline or msg:
2253
          # No need to investigate payload if node is offline or gave an error.
2254
          # override manually lu_result here as _ErrorIf only
2255
          # overrides self.bad
2256
          lu_result = 1
2257
          continue
2258
        for script, hkr, output in res.payload:
2259
          test = hkr == constants.HKR_FAIL
2260
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2261
                        "Script %s failed, output:", script)
2262
          if test:
2263
            output = indent_re.sub('      ', output)
2264
            feedback_fn("%s" % output)
2265
            lu_result = 0
2266

    
2267
      return lu_result
2268

    
2269

    
2270
class LUVerifyDisks(NoHooksLU):
2271
  """Verifies the cluster disks status.
2272

2273
  """
2274
  REQ_BGL = False
2275

    
2276
  def ExpandNames(self):
2277
    self.needed_locks = {
2278
      locking.LEVEL_NODE: locking.ALL_SET,
2279
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2280
    }
2281
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2282

    
2283
  def Exec(self, feedback_fn):
2284
    """Verify integrity of cluster disks.
2285

2286
    @rtype: tuple of three items
2287
    @return: a tuple of (dict of node-to-node_error, list of instances
2288
        which need activate-disks, dict of instance: (node, volume) for
2289
        missing volumes
2290

2291
    """
2292
    result = res_nodes, res_instances, res_missing = {}, [], {}
2293

    
2294
    vg_name = self.cfg.GetVGName()
2295
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2296
    instances = [self.cfg.GetInstanceInfo(name)
2297
                 for name in self.cfg.GetInstanceList()]
2298

    
2299
    nv_dict = {}
2300
    for inst in instances:
2301
      inst_lvs = {}
2302
      if (not inst.admin_up or
2303
          inst.disk_template not in constants.DTS_NET_MIRROR):
2304
        continue
2305
      inst.MapLVsByNode(inst_lvs)
2306
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2307
      for node, vol_list in inst_lvs.iteritems():
2308
        for vol in vol_list:
2309
          nv_dict[(node, vol)] = inst
2310

    
2311
    if not nv_dict:
2312
      return result
2313

    
2314
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2315

    
2316
    for node in nodes:
2317
      # node_volume
2318
      node_res = node_lvs[node]
2319
      if node_res.offline:
2320
        continue
2321
      msg = node_res.fail_msg
2322
      if msg:
2323
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2324
        res_nodes[node] = msg
2325
        continue
2326

    
2327
      lvs = node_res.payload
2328
      for lv_name, (_, _, lv_online) in lvs.items():
2329
        inst = nv_dict.pop((node, lv_name), None)
2330
        if (not lv_online and inst is not None
2331
            and inst.name not in res_instances):
2332
          res_instances.append(inst.name)
2333

    
2334
    # any leftover items in nv_dict are missing LVs, let's arrange the
2335
    # data better
2336
    for key, inst in nv_dict.iteritems():
2337
      if inst.name not in res_missing:
2338
        res_missing[inst.name] = []
2339
      res_missing[inst.name].append(key)
2340

    
2341
    return result
2342

    
2343

    
2344
class LURepairDiskSizes(NoHooksLU):
2345
  """Verifies the cluster disks sizes.
2346

2347
  """
2348
  _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2349
  REQ_BGL = False
2350

    
2351
  def ExpandNames(self):
2352
    if self.op.instances:
2353
      self.wanted_names = []
2354
      for name in self.op.instances:
2355
        full_name = _ExpandInstanceName(self.cfg, name)
2356
        self.wanted_names.append(full_name)
2357
      self.needed_locks = {
2358
        locking.LEVEL_NODE: [],
2359
        locking.LEVEL_INSTANCE: self.wanted_names,
2360
        }
2361
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2362
    else:
2363
      self.wanted_names = None
2364
      self.needed_locks = {
2365
        locking.LEVEL_NODE: locking.ALL_SET,
2366
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2367
        }
2368
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2369

    
2370
  def DeclareLocks(self, level):
2371
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2372
      self._LockInstancesNodes(primary_only=True)
2373

    
2374
  def CheckPrereq(self):
2375
    """Check prerequisites.
2376

2377
    This only checks the optional instance list against the existing names.
2378

2379
    """
2380
    if self.wanted_names is None:
2381
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2382

    
2383
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2384
                             in self.wanted_names]
2385

    
2386
  def _EnsureChildSizes(self, disk):
2387
    """Ensure children of the disk have the needed disk size.
2388

2389
    This is valid mainly for DRBD8 and fixes an issue where the
2390
    children have smaller disk size.
2391

2392
    @param disk: an L{ganeti.objects.Disk} object
2393

2394
    """
2395
    if disk.dev_type == constants.LD_DRBD8:
2396
      assert disk.children, "Empty children for DRBD8?"
2397
      fchild = disk.children[0]
2398
      mismatch = fchild.size < disk.size
2399
      if mismatch:
2400
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2401
                     fchild.size, disk.size)
2402
        fchild.size = disk.size
2403

    
2404
      # and we recurse on this child only, not on the metadev
2405
      return self._EnsureChildSizes(fchild) or mismatch
2406
    else:
2407
      return False
2408

    
2409
  def Exec(self, feedback_fn):
2410
    """Verify the size of cluster disks.
2411

2412
    """
2413
    # TODO: check child disks too
2414
    # TODO: check differences in size between primary/secondary nodes
2415
    per_node_disks = {}
2416
    for instance in self.wanted_instances:
2417
      pnode = instance.primary_node
2418
      if pnode not in per_node_disks:
2419
        per_node_disks[pnode] = []
2420
      for idx, disk in enumerate(instance.disks):
2421
        per_node_disks[pnode].append((instance, idx, disk))
2422

    
2423
    changed = []
2424
    for node, dskl in per_node_disks.items():
2425
      newl = [v[2].Copy() for v in dskl]
2426
      for dsk in newl:
2427
        self.cfg.SetDiskID(dsk, node)
2428
      result = self.rpc.call_blockdev_getsizes(node, newl)
2429
      if result.fail_msg:
2430
        self.LogWarning("Failure in blockdev_getsizes call to node"
2431
                        " %s, ignoring", node)
2432
        continue
2433
      if len(result.data) != len(dskl):
2434
        self.LogWarning("Invalid result from node %s, ignoring node results",
2435
                        node)
2436
        continue
2437
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2438
        if size is None:
2439
          self.LogWarning("Disk %d of instance %s did not return size"
2440
                          " information, ignoring", idx, instance.name)
2441
          continue
2442
        if not isinstance(size, (int, long)):
2443
          self.LogWarning("Disk %d of instance %s did not return valid"
2444
                          " size information, ignoring", idx, instance.name)
2445
          continue
2446
        size = size >> 20
2447
        if size != disk.size:
2448
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2449
                       " correcting: recorded %d, actual %d", idx,
2450
                       instance.name, disk.size, size)
2451
          disk.size = size
2452
          self.cfg.Update(instance, feedback_fn)
2453
          changed.append((instance.name, idx, size))
2454
        if self._EnsureChildSizes(disk):
2455
          self.cfg.Update(instance, feedback_fn)
2456
          changed.append((instance.name, idx, disk.size))
2457
    return changed
2458

    
2459

    
2460
class LURenameCluster(LogicalUnit):
2461
  """Rename the cluster.
2462

2463
  """
2464
  HPATH = "cluster-rename"
2465
  HTYPE = constants.HTYPE_CLUSTER
2466
  _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2467

    
2468
  def BuildHooksEnv(self):
2469
    """Build hooks env.
2470

2471
    """
2472
    env = {
2473
      "OP_TARGET": self.cfg.GetClusterName(),
2474
      "NEW_NAME": self.op.name,
2475
      }
2476
    mn = self.cfg.GetMasterNode()
2477
    all_nodes = self.cfg.GetNodeList()
2478
    return env, [mn], all_nodes
2479

    
2480
  def CheckPrereq(self):
2481
    """Verify that the passed name is a valid one.
2482

2483
    """
2484
    hostname = utils.GetHostInfo(self.op.name)
2485

    
2486
    new_name = hostname.name
2487
    self.ip = new_ip = hostname.ip
2488
    old_name = self.cfg.GetClusterName()
2489
    old_ip = self.cfg.GetMasterIP()
2490
    if new_name == old_name and new_ip == old_ip:
2491
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2492
                                 " cluster has changed",
2493
                                 errors.ECODE_INVAL)
2494
    if new_ip != old_ip:
2495
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2496
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2497
                                   " reachable on the network. Aborting." %
2498
                                   new_ip, errors.ECODE_NOTUNIQUE)
2499

    
2500
    self.op.name = new_name
2501

    
2502
  def Exec(self, feedback_fn):
2503
    """Rename the cluster.
2504

2505
    """
2506
    clustername = self.op.name
2507
    ip = self.ip
2508

    
2509
    # shutdown the master IP
2510
    master = self.cfg.GetMasterNode()
2511
    result = self.rpc.call_node_stop_master(master, False)
2512
    result.Raise("Could not disable the master role")
2513

    
2514
    try:
2515
      cluster = self.cfg.GetClusterInfo()
2516
      cluster.cluster_name = clustername
2517
      cluster.master_ip = ip
2518
      self.cfg.Update(cluster, feedback_fn)
2519

    
2520
      # update the known hosts file
2521
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2522
      node_list = self.cfg.GetNodeList()
2523
      try:
2524
        node_list.remove(master)
2525
      except ValueError:
2526
        pass
2527
      result = self.rpc.call_upload_file(node_list,
2528
                                         constants.SSH_KNOWN_HOSTS_FILE)
2529
      for to_node, to_result in result.iteritems():
2530
        msg = to_result.fail_msg
2531
        if msg:
2532
          msg = ("Copy of file %s to node %s failed: %s" %
2533
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2534
          self.proc.LogWarning(msg)
2535

    
2536
    finally:
2537
      result = self.rpc.call_node_start_master(master, False, False)
2538
      msg = result.fail_msg
2539
      if msg:
2540
        self.LogWarning("Could not re-enable the master role on"
2541
                        " the master, please restart manually: %s", msg)
2542

    
2543

    
2544
class LUSetClusterParams(LogicalUnit):
2545
  """Change the parameters of the cluster.
2546

2547
  """
2548
  HPATH = "cluster-modify"
2549
  HTYPE = constants.HTYPE_CLUSTER
2550
  _OP_PARAMS = [
2551
    ("vg_name", None, _TMaybeString),
2552
    ("enabled_hypervisors", None,
2553
     _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2554
    ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2555
    ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2556
    ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2557
    ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2558
    ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2559
    ("uid_pool", None, _NoType),
2560
    ("add_uids", None, _NoType),
2561
    ("remove_uids", None, _NoType),
2562
    ("maintain_node_health", None, _TMaybeBool),
2563
    ("nicparams", None, _TOr(_TDict, _TNone)),
2564
    ("drbd_helper", None, _TOr(_TString, _TNone)),
2565
    ("default_iallocator", None, _TMaybeString),
2566
    ]
2567
  REQ_BGL = False
2568

    
2569
  def CheckArguments(self):
2570
    """Check parameters
2571

2572
    """
2573
    if self.op.uid_pool:
2574
      uidpool.CheckUidPool(self.op.uid_pool)
2575

    
2576
    if self.op.add_uids:
2577
      uidpool.CheckUidPool(self.op.add_uids)
2578

    
2579
    if self.op.remove_uids:
2580
      uidpool.CheckUidPool(self.op.remove_uids)
2581

    
2582
  def ExpandNames(self):
2583
    # FIXME: in the future maybe other cluster params won't require checking on
2584
    # all nodes to be modified.
2585
    self.needed_locks = {
2586
      locking.LEVEL_NODE: locking.ALL_SET,
2587
    }
2588
    self.share_locks[locking.LEVEL_NODE] = 1
2589

    
2590
  def BuildHooksEnv(self):
2591
    """Build hooks env.
2592

2593
    """
2594
    env = {
2595
      "OP_TARGET": self.cfg.GetClusterName(),
2596
      "NEW_VG_NAME": self.op.vg_name,
2597
      }
2598
    mn = self.cfg.GetMasterNode()
2599
    return env, [mn], [mn]
2600

    
2601
  def CheckPrereq(self):
2602
    """Check prerequisites.
2603

2604
    This checks whether the given params don't conflict and
2605
    if the given volume group is valid.
2606

2607
    """
2608
    if self.op.vg_name is not None and not self.op.vg_name:
2609
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2610
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2611
                                   " instances exist", errors.ECODE_INVAL)
2612

    
2613
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2614
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2615
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2616
                                   " drbd-based instances exist",
2617
                                   errors.ECODE_INVAL)
2618

    
2619
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2620

    
2621
    # if vg_name not None, checks given volume group on all nodes
2622
    if self.op.vg_name:
2623
      vglist = self.rpc.call_vg_list(node_list)
2624
      for node in node_list:
2625
        msg = vglist[node].fail_msg
2626
        if msg:
2627
          # ignoring down node
2628
          self.LogWarning("Error while gathering data on node %s"
2629
                          " (ignoring node): %s", node, msg)
2630
          continue
2631
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2632
                                              self.op.vg_name,
2633
                                              constants.MIN_VG_SIZE)
2634
        if vgstatus:
2635
          raise errors.OpPrereqError("Error on node '%s': %s" %
2636
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2637

    
2638
    if self.op.drbd_helper:
2639
      # checks given drbd helper on all nodes
2640
      helpers = self.rpc.call_drbd_helper(node_list)
2641
      for node in node_list:
2642
        ninfo = self.cfg.GetNodeInfo(node)
2643
        if ninfo.offline:
2644
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2645
          continue
2646
        msg = helpers[node].fail_msg
2647
        if msg:
2648
          raise errors.OpPrereqError("Error checking drbd helper on node"
2649
                                     " '%s': %s" % (node, msg),
2650
                                     errors.ECODE_ENVIRON)
2651
        node_helper = helpers[node].payload
2652
        if node_helper != self.op.drbd_helper:
2653
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2654
                                     (node, node_helper), errors.ECODE_ENVIRON)
2655

    
2656
    self.cluster = cluster = self.cfg.GetClusterInfo()
2657
    # validate params changes
2658
    if self.op.beparams:
2659
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2660
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2661

    
2662
    if self.op.nicparams:
2663
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2664
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2665
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2666
      nic_errors = []
2667

    
2668
      # check all instances for consistency
2669
      for instance in self.cfg.GetAllInstancesInfo().values():
2670
        for nic_idx, nic in enumerate(instance.nics):
2671
          params_copy = copy.deepcopy(nic.nicparams)
2672
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2673

    
2674
          # check parameter syntax
2675
          try:
2676
            objects.NIC.CheckParameterSyntax(params_filled)
2677
          except errors.ConfigurationError, err:
2678
            nic_errors.append("Instance %s, nic/%d: %s" %
2679
                              (instance.name, nic_idx, err))
2680

    
2681
          # if we're moving instances to routed, check that they have an ip
2682
          target_mode = params_filled[constants.NIC_MODE]
2683
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2684
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2685
                              (instance.name, nic_idx))
2686
      if nic_errors:
2687
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2688
                                   "\n".join(nic_errors))
2689

    
2690
    # hypervisor list/parameters
2691
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2692
    if self.op.hvparams:
2693
      for hv_name, hv_dict in self.op.hvparams.items():
2694
        if hv_name not in self.new_hvparams:
2695
          self.new_hvparams[hv_name] = hv_dict
2696
        else:
2697
          self.new_hvparams[hv_name].update(hv_dict)
2698

    
2699
    # os hypervisor parameters
2700
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2701
    if self.op.os_hvp:
2702
      for os_name, hvs in self.op.os_hvp.items():
2703
        if os_name not in self.new_os_hvp:
2704
          self.new_os_hvp[os_name] = hvs
2705
        else:
2706
          for hv_name, hv_dict in hvs.items():
2707
            if hv_name not in self.new_os_hvp[os_name]:
2708
              self.new_os_hvp[os_name][hv_name] = hv_dict
2709
            else:
2710
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2711

    
2712
    # os parameters
2713
    self.new_osp = objects.FillDict(cluster.osparams, {})
2714
    if self.op.osparams:
2715
      for os_name, osp in self.op.osparams.items():
2716
        if os_name not in self.new_osp:
2717
          self.new_osp[os_name] = {}
2718

    
2719
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2720
                                                  use_none=True)
2721

    
2722
        if not self.new_osp[os_name]:
2723
          # we removed all parameters
2724
          del self.new_osp[os_name]
2725
        else:
2726
          # check the parameter validity (remote check)
2727
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2728
                         os_name, self.new_osp[os_name])
2729

    
2730
    # changes to the hypervisor list
2731
    if self.op.enabled_hypervisors is not None:
2732
      self.hv_list = self.op.enabled_hypervisors
2733
      for hv in self.hv_list:
2734
        # if the hypervisor doesn't already exist in the cluster
2735
        # hvparams, we initialize it to empty, and then (in both
2736
        # cases) we make sure to fill the defaults, as we might not
2737
        # have a complete defaults list if the hypervisor wasn't
2738
        # enabled before
2739
        if hv not in new_hvp:
2740
          new_hvp[hv] = {}
2741
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2742
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2743
    else:
2744
      self.hv_list = cluster.enabled_hypervisors
2745

    
2746
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2747
      # either the enabled list has changed, or the parameters have, validate
2748
      for hv_name, hv_params in self.new_hvparams.items():
2749
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2750
            (self.op.enabled_hypervisors and
2751
             hv_name in self.op.enabled_hypervisors)):
2752
          # either this is a new hypervisor, or its parameters have changed
2753
          hv_class = hypervisor.GetHypervisor(hv_name)
2754
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2755
          hv_class.CheckParameterSyntax(hv_params)
2756
          _CheckHVParams(self, node_list, hv_name, hv_params)
2757

    
2758
    if self.op.os_hvp:
2759
      # no need to check any newly-enabled hypervisors, since the
2760
      # defaults have already been checked in the above code-block
2761
      for os_name, os_hvp in self.new_os_hvp.items():
2762
        for hv_name, hv_params in os_hvp.items():
2763
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2764
          # we need to fill in the new os_hvp on top of the actual hv_p
2765
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2766
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2767
          hv_class = hypervisor.GetHypervisor(hv_name)
2768
          hv_class.CheckParameterSyntax(new_osp)
2769
          _CheckHVParams(self, node_list, hv_name, new_osp)
2770

    
2771
    if self.op.default_iallocator:
2772
      alloc_script = utils.FindFile(self.op.default_iallocator,
2773
                                    constants.IALLOCATOR_SEARCH_PATH,
2774
                                    os.path.isfile)
2775
      if alloc_script is None:
2776
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2777
                                   " specified" % self.op.default_iallocator,
2778
                                   errors.ECODE_INVAL)
2779

    
2780
  def Exec(self, feedback_fn):
2781
    """Change the parameters of the cluster.
2782

2783
    """
2784
    if self.op.vg_name is not None:
2785
      new_volume = self.op.vg_name
2786
      if not new_volume:
2787
        new_volume = None
2788
      if new_volume != self.cfg.GetVGName():
2789
        self.cfg.SetVGName(new_volume)
2790
      else:
2791
        feedback_fn("Cluster LVM configuration already in desired"
2792
                    " state, not changing")
2793
    if self.op.drbd_helper is not None:
2794
      new_helper = self.op.drbd_helper
2795
      if not new_helper:
2796
        new_helper = None
2797
      if new_helper != self.cfg.GetDRBDHelper():
2798
        self.cfg.SetDRBDHelper(new_helper)
2799
      else:
2800
        feedback_fn("Cluster DRBD helper already in desired state,"
2801
                    " not changing")
2802
    if self.op.hvparams:
2803
      self.cluster.hvparams = self.new_hvparams
2804
    if self.op.os_hvp:
2805
      self.cluster.os_hvp = self.new_os_hvp
2806
    if self.op.enabled_hypervisors is not None:
2807
      self.cluster.hvparams = self.new_hvparams
2808
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2809
    if self.op.beparams:
2810
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2811
    if self.op.nicparams:
2812
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2813
    if self.op.osparams:
2814
      self.cluster.osparams = self.new_osp
2815

    
2816
    if self.op.candidate_pool_size is not None:
2817
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2818
      # we need to update the pool size here, otherwise the save will fail
2819
      _AdjustCandidatePool(self, [])
2820

    
2821
    if self.op.maintain_node_health is not None:
2822
      self.cluster.maintain_node_health = self.op.maintain_node_health
2823

    
2824
    if self.op.add_uids is not None:
2825
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2826

    
2827
    if self.op.remove_uids is not None:
2828
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2829

    
2830
    if self.op.uid_pool is not None:
2831
      self.cluster.uid_pool = self.op.uid_pool
2832

    
2833
    if self.op.default_iallocator is not None:
2834
      self.cluster.default_iallocator = self.op.default_iallocator
2835

    
2836
    self.cfg.Update(self.cluster, feedback_fn)
2837

    
2838

    
2839
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2840
  """Distribute additional files which are part of the cluster configuration.
2841

2842
  ConfigWriter takes care of distributing the config and ssconf files, but
2843
  there are more files which should be distributed to all nodes. This function
2844
  makes sure those are copied.
2845

2846
  @param lu: calling logical unit
2847
  @param additional_nodes: list of nodes not in the config to distribute to
2848

2849
  """
2850
  # 1. Gather target nodes
2851
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2852
  dist_nodes = lu.cfg.GetOnlineNodeList()
2853
  if additional_nodes is not None:
2854
    dist_nodes.extend(additional_nodes)
2855
  if myself.name in dist_nodes:
2856
    dist_nodes.remove(myself.name)
2857

    
2858
  # 2. Gather files to distribute
2859
  dist_files = set([constants.ETC_HOSTS,
2860
                    constants.SSH_KNOWN_HOSTS_FILE,
2861
                    constants.RAPI_CERT_FILE,
2862
                    constants.RAPI_USERS_FILE,
2863
                    constants.CONFD_HMAC_KEY,
2864
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2865
                   ])
2866

    
2867
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2868
  for hv_name in enabled_hypervisors:
2869
    hv_class = hypervisor.GetHypervisor(hv_name)
2870
    dist_files.update(hv_class.GetAncillaryFiles())
2871

    
2872
  # 3. Perform the files upload
2873
  for fname in dist_files:
2874
    if os.path.exists(fname):
2875
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2876
      for to_node, to_result in result.items():
2877
        msg = to_result.fail_msg
2878
        if msg:
2879
          msg = ("Copy of file %s to node %s failed: %s" %
2880
                 (fname, to_node, msg))
2881
          lu.proc.LogWarning(msg)
2882

    
2883

    
2884
class LURedistributeConfig(NoHooksLU):
2885
  """Force the redistribution of cluster configuration.
2886

2887
  This is a very simple LU.
2888

2889
  """
2890
  REQ_BGL = False
2891

    
2892
  def ExpandNames(self):
2893
    self.needed_locks = {
2894
      locking.LEVEL_NODE: locking.ALL_SET,
2895
    }
2896
    self.share_locks[locking.LEVEL_NODE] = 1
2897

    
2898
  def Exec(self, feedback_fn):
2899
    """Redistribute the configuration.
2900

2901
    """
2902
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2903
    _RedistributeAncillaryFiles(self)
2904

    
2905

    
2906
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2907
  """Sleep and poll for an instance's disk to sync.
2908

2909
  """
2910
  if not instance.disks or disks is not None and not disks:
2911
    return True
2912

    
2913
  disks = _ExpandCheckDisks(instance, disks)
2914

    
2915
  if not oneshot:
2916
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2917

    
2918
  node = instance.primary_node
2919

    
2920
  for dev in disks:
2921
    lu.cfg.SetDiskID(dev, node)
2922

    
2923
  # TODO: Convert to utils.Retry
2924

    
2925
  retries = 0
2926
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2927
  while True:
2928
    max_time = 0
2929
    done = True
2930
    cumul_degraded = False
2931
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2932
    msg = rstats.fail_msg
2933
    if msg:
2934
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2935
      retries += 1
2936
      if retries >= 10:
2937
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2938
                                 " aborting." % node)
2939
      time.sleep(6)
2940
      continue
2941
    rstats = rstats.payload
2942
    retries = 0
2943
    for i, mstat in enumerate(rstats):
2944
      if mstat is None:
2945
        lu.LogWarning("Can't compute data for node %s/%s",
2946
                           node, disks[i].iv_name)
2947
        continue
2948

    
2949
      cumul_degraded = (cumul_degraded or
2950
                        (mstat.is_degraded and mstat.sync_percent is None))
2951
      if mstat.sync_percent is not None:
2952
        done = False
2953
        if mstat.estimated_time is not None:
2954
          rem_time = ("%s remaining (estimated)" %
2955
                      utils.FormatSeconds(mstat.estimated_time))
2956
          max_time = mstat.estimated_time
2957
        else:
2958
          rem_time = "no time estimate"
2959
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2960
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
2961

    
2962
    # if we're done but degraded, let's do a few small retries, to
2963
    # make sure we see a stable and not transient situation; therefore
2964
    # we force restart of the loop
2965
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2966
      logging.info("Degraded disks found, %d retries left", degr_retries)
2967
      degr_retries -= 1
2968
      time.sleep(1)
2969
      continue
2970

    
2971
    if done or oneshot:
2972
      break
2973

    
2974
    time.sleep(min(60, max_time))
2975

    
2976
  if done:
2977
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2978
  return not cumul_degraded
2979

    
2980

    
2981
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2982
  """Check that mirrors are not degraded.
2983

2984
  The ldisk parameter, if True, will change the test from the
2985
  is_degraded attribute (which represents overall non-ok status for
2986
  the device(s)) to the ldisk (representing the local storage status).
2987

2988
  """
2989
  lu.cfg.SetDiskID(dev, node)
2990

    
2991
  result = True
2992

    
2993
  if on_primary or dev.AssembleOnSecondary():
2994
    rstats = lu.rpc.call_blockdev_find(node, dev)
2995
    msg = rstats.fail_msg
2996
    if msg:
2997
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2998
      result = False
2999
    elif not rstats.payload:
3000
      lu.LogWarning("Can't find disk on node %s", node)
3001
      result = False
3002
    else:
3003
      if ldisk:
3004
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3005
      else:
3006
        result = result and not rstats.payload.is_degraded
3007

    
3008
  if dev.children:
3009
    for child in dev.children:
3010
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3011

    
3012
  return result
3013

    
3014

    
3015
class LUDiagnoseOS(NoHooksLU):
3016
  """Logical unit for OS diagnose/query.
3017

3018
  """
3019
  _OP_PARAMS = [
3020
    _POutputFields,
3021
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3022
    ]
3023
  REQ_BGL = False
3024
  _FIELDS_STATIC = utils.FieldSet()
3025
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
3026
                                   "parameters", "api_versions")
3027

    
3028
  def CheckArguments(self):
3029
    if self.op.names:
3030
      raise errors.OpPrereqError("Selective OS query not supported",
3031
                                 errors.ECODE_INVAL)
3032

    
3033
    _CheckOutputFields(static=self._FIELDS_STATIC,
3034
                       dynamic=self._FIELDS_DYNAMIC,
3035
                       selected=self.op.output_fields)
3036

    
3037
  def ExpandNames(self):
3038
    # Lock all nodes, in shared mode
3039
    # Temporary removal of locks, should be reverted later
3040
    # TODO: reintroduce locks when they are lighter-weight
3041
    self.needed_locks = {}
3042
    #self.share_locks[locking.LEVEL_NODE] = 1
3043
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3044

    
3045
  @staticmethod
3046
  def _DiagnoseByOS(rlist):
3047
    """Remaps a per-node return list into an a per-os per-node dictionary
3048

3049
    @param rlist: a map with node names as keys and OS objects as values
3050

3051
    @rtype: dict
3052
    @return: a dictionary with osnames as keys and as value another
3053
        map, with nodes as keys and tuples of (path, status, diagnose,
3054
        variants, parameters, api_versions) as values, eg::
3055

3056
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3057
                                     (/srv/..., False, "invalid api")],
3058
                           "node2": [(/srv/..., True, "", [], [])]}
3059
          }
3060

3061
    """
3062
    all_os = {}
3063
    # we build here the list of nodes that didn't fail the RPC (at RPC
3064
    # level), so that nodes with a non-responding node daemon don't
3065
    # make all OSes invalid
3066
    good_nodes = [node_name for node_name in rlist
3067
                  if not rlist[node_name].fail_msg]
3068
    for node_name, nr in rlist.items():
3069
      if nr.fail_msg or not nr.payload:
3070
        continue
3071
      for (name, path, status, diagnose, variants,
3072
           params, api_versions) in nr.payload:
3073
        if name not in all_os:
3074
          # build a list of nodes for this os containing empty lists
3075
          # for each node in node_list
3076
          all_os[name] = {}
3077
          for nname in good_nodes:
3078
            all_os[name][nname] = []
3079
        # convert params from [name, help] to (name, help)
3080
        params = [tuple(v) for v in params]
3081
        all_os[name][node_name].append((path, status, diagnose,
3082
                                        variants, params, api_versions))
3083
    return all_os
3084

    
3085
  def Exec(self, feedback_fn):
3086
    """Compute the list of OSes.
3087

3088
    """
3089
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3090
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3091
    pol = self._DiagnoseByOS(node_data)
3092
    output = []
3093

    
3094
    for os_name, os_data in pol.items():
3095
      row = []
3096
      valid = True
3097
      (variants, params, api_versions) = null_state = (set(), set(), set())
3098
      for idx, osl in enumerate(os_data.values()):
3099
        valid = bool(valid and osl and osl[0][1])
3100
        if not valid:
3101
          (variants, params, api_versions) = null_state
3102
          break
3103
        node_variants, node_params, node_api = osl[0][3:6]
3104
        if idx == 0: # first entry
3105
          variants = set(node_variants)
3106
          params = set(node_params)
3107
          api_versions = set(node_api)
3108
        else: # keep consistency
3109
          variants.intersection_update(node_variants)
3110
          params.intersection_update(node_params)
3111
          api_versions.intersection_update(node_api)
3112

    
3113
      for field in self.op.output_fields:
3114
        if field == "name":
3115
          val = os_name
3116
        elif field == "valid":
3117
          val = valid
3118
        elif field == "node_status":
3119
          # this is just a copy of the dict
3120
          val = {}
3121
          for node_name, nos_list in os_data.items():
3122
            val[node_name] = nos_list
3123
        elif field == "variants":
3124
          val = list(variants)
3125
        elif field == "parameters":
3126
          val = list(params)
3127
        elif field == "api_versions":
3128
          val = list(api_versions)
3129
        else:
3130
          raise errors.ParameterError(field)
3131
        row.append(val)
3132
      output.append(row)
3133

    
3134
    return output
3135

    
3136

    
3137
class LURemoveNode(LogicalUnit):
3138
  """Logical unit for removing a node.
3139

3140
  """
3141
  HPATH = "node-remove"
3142
  HTYPE = constants.HTYPE_NODE
3143
  _OP_PARAMS = [
3144
    _PNodeName,
3145
    ]
3146

    
3147
  def BuildHooksEnv(self):
3148
    """Build hooks env.
3149

3150
    This doesn't run on the target node in the pre phase as a failed
3151
    node would then be impossible to remove.
3152

3153
    """
3154
    env = {
3155
      "OP_TARGET": self.op.node_name,
3156
      "NODE_NAME": self.op.node_name,
3157
      }
3158
    all_nodes = self.cfg.GetNodeList()
3159
    try:
3160
      all_nodes.remove(self.op.node_name)
3161
    except ValueError:
3162
      logging.warning("Node %s which is about to be removed not found"
3163
                      " in the all nodes list", self.op.node_name)
3164
    return env, all_nodes, all_nodes
3165

    
3166
  def CheckPrereq(self):
3167
    """Check prerequisites.
3168

3169
    This checks:
3170
     - the node exists in the configuration
3171
     - it does not have primary or secondary instances
3172
     - it's not the master
3173

3174
    Any errors are signaled by raising errors.OpPrereqError.
3175

3176
    """
3177
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3178
    node = self.cfg.GetNodeInfo(self.op.node_name)
3179
    assert node is not None
3180

    
3181
    instance_list = self.cfg.GetInstanceList()
3182

    
3183
    masternode = self.cfg.GetMasterNode()
3184
    if node.name == masternode:
3185
      raise errors.OpPrereqError("Node is the master node,"
3186
                                 " you need to failover first.",
3187
                                 errors.ECODE_INVAL)
3188

    
3189
    for instance_name in instance_list:
3190
      instance = self.cfg.GetInstanceInfo(instance_name)
3191
      if node.name in instance.all_nodes:
3192
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3193
                                   " please remove first." % instance_name,
3194
                                   errors.ECODE_INVAL)
3195
    self.op.node_name = node.name
3196
    self.node = node
3197

    
3198
  def Exec(self, feedback_fn):
3199
    """Removes the node from the cluster.
3200

3201
    """
3202
    node = self.node
3203
    logging.info("Stopping the node daemon and removing configs from node %s",
3204
                 node.name)
3205

    
3206
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3207

    
3208
    # Promote nodes to master candidate as needed
3209
    _AdjustCandidatePool(self, exceptions=[node.name])
3210
    self.context.RemoveNode(node.name)
3211

    
3212
    # Run post hooks on the node before it's removed
3213
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3214
    try:
3215
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3216
    except:
3217
      # pylint: disable-msg=W0702
3218
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3219

    
3220
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3221
    msg = result.fail_msg
3222
    if msg:
3223
      self.LogWarning("Errors encountered on the remote node while leaving"
3224
                      " the cluster: %s", msg)
3225

    
3226
    # Remove node from our /etc/hosts
3227
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3228
      # FIXME: this should be done via an rpc call to node daemon
3229
      utils.RemoveHostFromEtcHosts(node.name)
3230
      _RedistributeAncillaryFiles(self)
3231

    
3232

    
3233
class LUQueryNodes(NoHooksLU):
3234
  """Logical unit for querying nodes.
3235

3236
  """
3237
  # pylint: disable-msg=W0142
3238
  _OP_PARAMS = [
3239
    _POutputFields,
3240
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3241
    ("use_locking", False, _TBool),
3242
    ]
3243
  REQ_BGL = False
3244

    
3245
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3246
                    "master_candidate", "offline", "drained"]
3247

    
3248
  _FIELDS_DYNAMIC = utils.FieldSet(
3249
    "dtotal", "dfree",
3250
    "mtotal", "mnode", "mfree",
3251
    "bootid",
3252
    "ctotal", "cnodes", "csockets",
3253
    )
3254

    
3255
  _FIELDS_STATIC = utils.FieldSet(*[
3256
    "pinst_cnt", "sinst_cnt",
3257
    "pinst_list", "sinst_list",
3258
    "pip", "sip", "tags",
3259
    "master",
3260
    "role"] + _SIMPLE_FIELDS
3261
    )
3262

    
3263
  def CheckArguments(self):
3264
    _CheckOutputFields(static=self._FIELDS_STATIC,
3265
                       dynamic=self._FIELDS_DYNAMIC,
3266
                       selected=self.op.output_fields)
3267

    
3268
  def ExpandNames(self):
3269
    self.needed_locks = {}
3270
    self.share_locks[locking.LEVEL_NODE] = 1
3271

    
3272
    if self.op.names:
3273
      self.wanted = _GetWantedNodes(self, self.op.names)
3274
    else:
3275
      self.wanted = locking.ALL_SET
3276

    
3277
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3278
    self.do_locking = self.do_node_query and self.op.use_locking
3279
    if self.do_locking:
3280
      # if we don't request only static fields, we need to lock the nodes
3281
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
3282

    
3283
  def Exec(self, feedback_fn):
3284
    """Computes the list of nodes and their attributes.
3285

3286
    """
3287
    all_info = self.cfg.GetAllNodesInfo()
3288
    if self.do_locking:
3289
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
3290
    elif self.wanted != locking.ALL_SET:
3291
      nodenames = self.wanted
3292
      missing = set(nodenames).difference(all_info.keys())
3293
      if missing:
3294
        raise errors.OpExecError(
3295
          "Some nodes were removed before retrieving their data: %s" % missing)
3296
    else:
3297
      nodenames = all_info.keys()
3298

    
3299
    nodenames = utils.NiceSort(nodenames)
3300
    nodelist = [all_info[name] for name in nodenames]
3301

    
3302
    # begin data gathering
3303

    
3304
    if self.do_node_query:
3305
      live_data = {}
3306
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3307
                                          self.cfg.GetHypervisorType())
3308
      for name in nodenames:
3309
        nodeinfo = node_data[name]
3310
        if not nodeinfo.fail_msg and nodeinfo.payload:
3311
          nodeinfo = nodeinfo.payload
3312
          fn = utils.TryConvert
3313
          live_data[name] = {
3314
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3315
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3316
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
3317
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3318
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3319
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3320
            "bootid": nodeinfo.get('bootid', None),
3321
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3322
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3323
            }
3324
        else:
3325
          live_data[name] = {}
3326
    else:
3327
      live_data = dict.fromkeys(nodenames, {})
3328

    
3329
    node_to_primary = dict([(name, set()) for name in nodenames])
3330
    node_to_secondary = dict([(name, set()) for name in nodenames])
3331

    
3332
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3333
                             "sinst_cnt", "sinst_list"))
3334
    if inst_fields & frozenset(self.op.output_fields):
3335
      inst_data = self.cfg.GetAllInstancesInfo()
3336

    
3337
      for inst in inst_data.values():
3338
        if inst.primary_node in node_to_primary:
3339
          node_to_primary[inst.primary_node].add(inst.name)
3340
        for secnode in inst.secondary_nodes:
3341
          if secnode in node_to_secondary:
3342
            node_to_secondary[secnode].add(inst.name)
3343

    
3344
    master_node = self.cfg.GetMasterNode()
3345

    
3346
    # end data gathering
3347

    
3348
    output = []
3349
    for node in nodelist:
3350
      node_output = []
3351
      for field in self.op.output_fields:
3352
        if field in self._SIMPLE_FIELDS:
3353
          val = getattr(node, field)
3354
        elif field == "pinst_list":
3355
          val = list(node_to_primary[node.name])
3356
        elif field == "sinst_list":
3357
          val = list(node_to_secondary[node.name])
3358
        elif field == "pinst_cnt":
3359
          val = len(node_to_primary[node.name])
3360
        elif field == "sinst_cnt":
3361
          val = len(node_to_secondary[node.name])
3362
        elif field == "pip":
3363
          val = node.primary_ip
3364
        elif field == "sip":
3365
          val = node.secondary_ip
3366
        elif field == "tags":
3367
          val = list(node.GetTags())
3368
        elif field == "master":
3369
          val = node.name == master_node
3370
        elif self._FIELDS_DYNAMIC.Matches(field):
3371
          val = live_data[node.name].get(field, None)
3372
        elif field == "role":
3373
          if node.name == master_node:
3374
            val = "M"
3375
          elif node.master_candidate:
3376
            val = "C"
3377
          elif node.drained:
3378
            val = "D"
3379
          elif node.offline:
3380
            val = "O"
3381
          else:
3382
            val = "R"
3383
        else:
3384
          raise errors.ParameterError(field)
3385
        node_output.append(val)
3386
      output.append(node_output)
3387

    
3388
    return output
3389

    
3390

    
3391
class LUQueryNodeVolumes(NoHooksLU):
3392
  """Logical unit for getting volumes on node(s).
3393

3394
  """
3395
  _OP_PARAMS = [
3396
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3397
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3398
    ]
3399
  REQ_BGL = False
3400
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3401
  _FIELDS_STATIC = utils.FieldSet("node")
3402

    
3403
  def CheckArguments(self):
3404
    _CheckOutputFields(static=self._FIELDS_STATIC,
3405
                       dynamic=self._FIELDS_DYNAMIC,
3406
                       selected=self.op.output_fields)
3407

    
3408
  def ExpandNames(self):
3409
    self.needed_locks = {}
3410
    self.share_locks[locking.LEVEL_NODE] = 1
3411
    if not self.op.nodes:
3412
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3413
    else:
3414
      self.needed_locks[locking.LEVEL_NODE] = \
3415
        _GetWantedNodes(self, self.op.nodes)
3416

    
3417
  def Exec(self, feedback_fn):
3418
    """Computes the list of nodes and their attributes.
3419

3420
    """
3421
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3422
    volumes = self.rpc.call_node_volumes(nodenames)
3423

    
3424
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3425
             in self.cfg.GetInstanceList()]
3426

    
3427
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3428

    
3429
    output = []
3430
    for node in nodenames:
3431
      nresult = volumes[node]
3432
      if nresult.offline:
3433
        continue
3434
      msg = nresult.fail_msg
3435
      if msg:
3436
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3437
        continue
3438

    
3439
      node_vols = nresult.payload[:]
3440
      node_vols.sort(key=lambda vol: vol['dev'])
3441

    
3442
      for vol in node_vols:
3443
        node_output = []
3444
        for field in self.op.output_fields:
3445
          if field == "node":
3446
            val = node
3447
          elif field == "phys":
3448
            val = vol['dev']
3449
          elif field == "vg":
3450
            val = vol['vg']
3451
          elif field == "name":
3452
            val = vol['name']
3453
          elif field == "size":
3454
            val = int(float(vol['size']))
3455
          elif field == "instance":
3456
            for inst in ilist:
3457
              if node not in lv_by_node[inst]:
3458
                continue
3459
              if vol['name'] in lv_by_node[inst][node]:
3460
                val = inst.name
3461
                break
3462
            else:
3463
              val = '-'
3464
          else:
3465
            raise errors.ParameterError(field)
3466
          node_output.append(str(val))
3467

    
3468
        output.append(node_output)
3469

    
3470
    return output
3471

    
3472

    
3473
class LUQueryNodeStorage(NoHooksLU):
3474
  """Logical unit for getting information on storage units on node(s).
3475

3476
  """
3477
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3478
  _OP_PARAMS = [
3479
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3480
    ("storage_type", _NoDefault, _CheckStorageType),
3481
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3482
    ("name", None, _TMaybeString),
3483
    ]
3484
  REQ_BGL = False
3485

    
3486
  def CheckArguments(self):
3487
    _CheckOutputFields(static=self._FIELDS_STATIC,
3488
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3489
                       selected=self.op.output_fields)
3490

    
3491
  def ExpandNames(self):
3492
    self.needed_locks = {}
3493
    self.share_locks[locking.LEVEL_NODE] = 1
3494

    
3495
    if self.op.nodes:
3496
      self.needed_locks[locking.LEVEL_NODE] = \
3497
        _GetWantedNodes(self, self.op.nodes)
3498
    else:
3499
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3500

    
3501
  def Exec(self, feedback_fn):
3502
    """Computes the list of nodes and their attributes.
3503

3504
    """
3505
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3506

    
3507
    # Always get name to sort by
3508
    if constants.SF_NAME in self.op.output_fields:
3509
      fields = self.op.output_fields[:]
3510
    else:
3511
      fields = [constants.SF_NAME] + self.op.output_fields
3512

    
3513
    # Never ask for node or type as it's only known to the LU
3514
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3515
      while extra in fields:
3516
        fields.remove(extra)
3517

    
3518
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3519
    name_idx = field_idx[constants.SF_NAME]
3520

    
3521
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3522
    data = self.rpc.call_storage_list(self.nodes,
3523
                                      self.op.storage_type, st_args,
3524
                                      self.op.name, fields)
3525

    
3526
    result = []
3527

    
3528
    for node in utils.NiceSort(self.nodes):
3529
      nresult = data[node]
3530
      if nresult.offline:
3531
        continue
3532

    
3533
      msg = nresult.fail_msg
3534
      if msg:
3535
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3536
        continue
3537

    
3538
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3539

    
3540
      for name in utils.NiceSort(rows.keys()):
3541
        row = rows[name]
3542

    
3543
        out = []
3544

    
3545
        for field in self.op.output_fields:
3546
          if field == constants.SF_NODE:
3547
            val = node
3548
          elif field == constants.SF_TYPE:
3549
            val = self.op.storage_type
3550
          elif field in field_idx:
3551
            val = row[field_idx[field]]
3552
          else:
3553
            raise errors.ParameterError(field)
3554

    
3555
          out.append(val)
3556

    
3557
        result.append(out)
3558

    
3559
    return result
3560

    
3561

    
3562
class LUModifyNodeStorage(NoHooksLU):
3563
  """Logical unit for modifying a storage volume on a node.
3564

3565
  """
3566
  _OP_PARAMS = [
3567
    _PNodeName,
3568
    ("storage_type", _NoDefault, _CheckStorageType),
3569
    ("name", _NoDefault, _TNonEmptyString),
3570
    ("changes", _NoDefault, _TDict),
3571
    ]
3572
  REQ_BGL = False
3573

    
3574
  def CheckArguments(self):
3575
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3576

    
3577
    storage_type = self.op.storage_type
3578

    
3579
    try:
3580
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3581
    except KeyError:
3582
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3583
                                 " modified" % storage_type,
3584
                                 errors.ECODE_INVAL)
3585

    
3586
    diff = set(self.op.changes.keys()) - modifiable
3587
    if diff:
3588
      raise errors.OpPrereqError("The following fields can not be modified for"
3589
                                 " storage units of type '%s': %r" %
3590
                                 (storage_type, list(diff)),
3591
                                 errors.ECODE_INVAL)
3592

    
3593
  def ExpandNames(self):
3594
    self.needed_locks = {
3595
      locking.LEVEL_NODE: self.op.node_name,
3596
      }
3597

    
3598
  def Exec(self, feedback_fn):
3599
    """Computes the list of nodes and their attributes.
3600

3601
    """
3602
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3603
    result = self.rpc.call_storage_modify(self.op.node_name,
3604
                                          self.op.storage_type, st_args,
3605
                                          self.op.name, self.op.changes)
3606
    result.Raise("Failed to modify storage unit '%s' on %s" %
3607
                 (self.op.name, self.op.node_name))
3608

    
3609

    
3610
class LUAddNode(LogicalUnit):
3611
  """Logical unit for adding node to the cluster.
3612

3613
  """
3614
  HPATH = "node-add"
3615
  HTYPE = constants.HTYPE_NODE
3616
  _OP_PARAMS = [
3617
    _PNodeName,
3618
    ("primary_ip", None, _NoType),
3619
    ("secondary_ip", None, _TMaybeString),
3620
    ("readd", False, _TBool),
3621
    ]
3622

    
3623
  def CheckArguments(self):
3624
    # validate/normalize the node name
3625
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3626

    
3627
  def BuildHooksEnv(self):
3628
    """Build hooks env.
3629

3630
    This will run on all nodes before, and on all nodes + the new node after.
3631

3632
    """
3633
    env = {
3634
      "OP_TARGET": self.op.node_name,
3635
      "NODE_NAME": self.op.node_name,
3636
      "NODE_PIP": self.op.primary_ip,
3637
      "NODE_SIP": self.op.secondary_ip,
3638
      }
3639
    nodes_0 = self.cfg.GetNodeList()
3640
    nodes_1 = nodes_0 + [self.op.node_name, ]
3641
    return env, nodes_0, nodes_1
3642

    
3643
  def CheckPrereq(self):
3644
    """Check prerequisites.
3645

3646
    This checks:
3647
     - the new node is not already in the config
3648
     - it is resolvable
3649
     - its parameters (single/dual homed) matches the cluster
3650

3651
    Any errors are signaled by raising errors.OpPrereqError.
3652

3653
    """
3654
    node_name = self.op.node_name
3655
    cfg = self.cfg
3656

    
3657
    dns_data = utils.GetHostInfo(node_name)
3658

    
3659
    node = dns_data.name
3660
    primary_ip = self.op.primary_ip = dns_data.ip
3661
    if self.op.secondary_ip is None:
3662
      self.op.secondary_ip = primary_ip
3663
    if not utils.IsValidIP4(self.op.secondary_ip):
3664
      raise errors.OpPrereqError("Invalid secondary IP given",
3665
                                 errors.ECODE_INVAL)
3666
    secondary_ip = self.op.secondary_ip
3667

    
3668
    node_list = cfg.GetNodeList()
3669
    if not self.op.readd and node in node_list:
3670
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3671
                                 node, errors.ECODE_EXISTS)
3672
    elif self.op.readd and node not in node_list:
3673
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3674
                                 errors.ECODE_NOENT)
3675

    
3676
    self.changed_primary_ip = False
3677

    
3678
    for existing_node_name in node_list:
3679
      existing_node = cfg.GetNodeInfo(existing_node_name)
3680

    
3681
      if self.op.readd and node == existing_node_name:
3682
        if existing_node.secondary_ip != secondary_ip:
3683
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3684
                                     " address configuration as before",
3685
                                     errors.ECODE_INVAL)
3686
        if existing_node.primary_ip != primary_ip:
3687
          self.changed_primary_ip = True
3688

    
3689
        continue
3690

    
3691
      if (existing_node.primary_ip == primary_ip or
3692
          existing_node.secondary_ip == primary_ip or
3693
          existing_node.primary_ip == secondary_ip or
3694
          existing_node.secondary_ip == secondary_ip):
3695
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3696
                                   " existing node %s" % existing_node.name,
3697
                                   errors.ECODE_NOTUNIQUE)
3698

    
3699
    # check that the type of the node (single versus dual homed) is the
3700
    # same as for the master
3701
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3702
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3703
    newbie_singlehomed = secondary_ip == primary_ip
3704
    if master_singlehomed != newbie_singlehomed:
3705
      if master_singlehomed:
3706
        raise errors.OpPrereqError("The master has no private ip but the"
3707
                                   " new node has one",
3708
                                   errors.ECODE_INVAL)
3709
      else:
3710
        raise errors.OpPrereqError("The master has a private ip but the"
3711
                                   " new node doesn't have one",
3712
                                   errors.ECODE_INVAL)
3713

    
3714
    # checks reachability
3715
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3716
      raise errors.OpPrereqError("Node not reachable by ping",
3717
                                 errors.ECODE_ENVIRON)
3718

    
3719
    if not newbie_singlehomed:
3720
      # check reachability from my secondary ip to newbie's secondary ip
3721
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3722
                           source=myself.secondary_ip):
3723
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3724
                                   " based ping to noded port",
3725
                                   errors.ECODE_ENVIRON)
3726

    
3727
    if self.op.readd:
3728
      exceptions = [node]
3729
    else:
3730
      exceptions = []
3731

    
3732
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3733

    
3734
    if self.op.readd:
3735
      self.new_node = self.cfg.GetNodeInfo(node)
3736
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3737
    else:
3738
      self.new_node = objects.Node(name=node,
3739
                                   primary_ip=primary_ip,
3740
                                   secondary_ip=secondary_ip,
3741
                                   master_candidate=self.master_candidate,
3742
                                   offline=False, drained=False)
3743

    
3744
  def Exec(self, feedback_fn):
3745
    """Adds the new node to the cluster.
3746

3747
    """
3748
    new_node = self.new_node
3749
    node = new_node.name
3750

    
3751
    # for re-adds, reset the offline/drained/master-candidate flags;
3752
    # we need to reset here, otherwise offline would prevent RPC calls
3753
    # later in the procedure; this also means that if the re-add
3754
    # fails, we are left with a non-offlined, broken node
3755
    if self.op.readd:
3756
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3757
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3758
      # if we demote the node, we do cleanup later in the procedure
3759
      new_node.master_candidate = self.master_candidate
3760
      if self.changed_primary_ip:
3761
        new_node.primary_ip = self.op.primary_ip
3762

    
3763
    # notify the user about any possible mc promotion
3764
    if new_node.master_candidate:
3765
      self.LogInfo("Node will be a master candidate")
3766

    
3767
    # check connectivity
3768
    result = self.rpc.call_version([node])[node]
3769
    result.Raise("Can't get version information from node %s" % node)
3770
    if constants.PROTOCOL_VERSION == result.payload:
3771
      logging.info("Communication to node %s fine, sw version %s match",
3772
                   node, result.payload)
3773
    else:
3774
      raise errors.OpExecError("Version mismatch master version %s,"
3775
                               " node version %s" %
3776
                               (constants.PROTOCOL_VERSION, result.payload))
3777

    
3778
    # setup ssh on node
3779
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3780
      logging.info("Copy ssh key to node %s", node)
3781
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3782
      keyarray = []
3783
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3784
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3785
                  priv_key, pub_key]
3786

    
3787
      for i in keyfiles:
3788
        keyarray.append(utils.ReadFile(i))
3789

    
3790
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3791
                                      keyarray[2], keyarray[3], keyarray[4],
3792
                                      keyarray[5])
3793
      result.Raise("Cannot transfer ssh keys to the new node")
3794

    
3795
    # Add node to our /etc/hosts, and add key to known_hosts
3796
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3797
      # FIXME: this should be done via an rpc call to node daemon
3798
      utils.AddHostToEtcHosts(new_node.name)
3799

    
3800
    if new_node.secondary_ip != new_node.primary_ip:
3801
      result = self.rpc.call_node_has_ip_address(new_node.name,
3802
                                                 new_node.secondary_ip)
3803
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3804
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3805
      if not result.payload:
3806
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3807
                                 " you gave (%s). Please fix and re-run this"
3808
                                 " command." % new_node.secondary_ip)
3809

    
3810
    node_verify_list = [self.cfg.GetMasterNode()]
3811
    node_verify_param = {
3812
      constants.NV_NODELIST: [node],
3813
      # TODO: do a node-net-test as well?
3814
    }
3815

    
3816
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3817
                                       self.cfg.GetClusterName())
3818
    for verifier in node_verify_list:
3819
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3820
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3821
      if nl_payload:
3822
        for failed in nl_payload:
3823
          feedback_fn("ssh/hostname verification failed"
3824
                      " (checking from %s): %s" %
3825
                      (verifier, nl_payload[failed]))
3826
        raise errors.OpExecError("ssh/hostname verification failed.")
3827

    
3828
    if self.op.readd:
3829
      _RedistributeAncillaryFiles(self)
3830
      self.context.ReaddNode(new_node)
3831
      # make sure we redistribute the config
3832
      self.cfg.Update(new_node, feedback_fn)
3833
      # and make sure the new node will not have old files around
3834
      if not new_node.master_candidate:
3835
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3836
        msg = result.fail_msg
3837
        if msg:
3838
          self.LogWarning("Node failed to demote itself from master"
3839
                          " candidate status: %s" % msg)
3840
    else:
3841
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3842
      self.context.AddNode(new_node, self.proc.GetECId())
3843

    
3844

    
3845
class LUSetNodeParams(LogicalUnit):
3846
  """Modifies the parameters of a node.
3847

3848
  """
3849
  HPATH = "node-modify"
3850
  HTYPE = constants.HTYPE_NODE
3851
  _OP_PARAMS = [
3852
    _PNodeName,
3853
    ("master_candidate", None, _TMaybeBool),
3854
    ("offline", None, _TMaybeBool),
3855
    ("drained", None, _TMaybeBool),
3856
    ("auto_promote", False, _TBool),
3857
    _PForce,
3858
    ]
3859
  REQ_BGL = False
3860

    
3861
  def CheckArguments(self):
3862
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3863
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3864
    if all_mods.count(None) == 3:
3865
      raise errors.OpPrereqError("Please pass at least one modification",
3866
                                 errors.ECODE_INVAL)
3867
    if all_mods.count(True) > 1:
3868
      raise errors.OpPrereqError("Can't set the node into more than one"
3869
                                 " state at the same time",
3870
                                 errors.ECODE_INVAL)
3871

    
3872
    # Boolean value that tells us whether we're offlining or draining the node
3873
    self.offline_or_drain = (self.op.offline == True or
3874
                             self.op.drained == True)
3875
    self.deoffline_or_drain = (self.op.offline == False or
3876
                               self.op.drained == False)
3877
    self.might_demote = (self.op.master_candidate == False or
3878
                         self.offline_or_drain)
3879

    
3880
    self.lock_all = self.op.auto_promote and self.might_demote
3881

    
3882

    
3883
  def ExpandNames(self):
3884
    if self.lock_all:
3885
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3886
    else:
3887
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3888

    
3889
  def BuildHooksEnv(self):
3890
    """Build hooks env.
3891

3892
    This runs on the master node.
3893

3894
    """
3895
    env = {
3896
      "OP_TARGET": self.op.node_name,
3897
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3898
      "OFFLINE": str(self.op.offline),
3899
      "DRAINED": str(self.op.drained),
3900
      }
3901
    nl = [self.cfg.GetMasterNode(),
3902
          self.op.node_name]
3903
    return env, nl, nl
3904

    
3905
  def CheckPrereq(self):
3906
    """Check prerequisites.
3907

3908
    This only checks the instance list against the existing names.
3909

3910
    """
3911
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3912

    
3913
    if (self.op.master_candidate is not None or
3914
        self.op.drained is not None or
3915
        self.op.offline is not None):
3916
      # we can't change the master's node flags
3917
      if self.op.node_name == self.cfg.GetMasterNode():
3918
        raise errors.OpPrereqError("The master role can be changed"
3919
                                   " only via masterfailover",
3920
                                   errors.ECODE_INVAL)
3921

    
3922

    
3923
    if node.master_candidate and self.might_demote and not self.lock_all:
3924
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3925
      # check if after removing the current node, we're missing master
3926
      # candidates
3927
      (mc_remaining, mc_should, _) = \
3928
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3929
      if mc_remaining < mc_should:
3930
        raise errors.OpPrereqError("Not enough master candidates, please"
3931
                                   " pass auto_promote to allow promotion",
3932
                                   errors.ECODE_INVAL)
3933

    
3934
    if (self.op.master_candidate == True and
3935
        ((node.offline and not self.op.offline == False) or
3936
         (node.drained and not self.op.drained == False))):
3937
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3938
                                 " to master_candidate" % node.name,
3939
                                 errors.ECODE_INVAL)
3940

    
3941
    # If we're being deofflined/drained, we'll MC ourself if needed
3942
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3943
        self.op.master_candidate == True and not node.master_candidate):
3944
      self.op.master_candidate = _DecideSelfPromotion(self)
3945
      if self.op.master_candidate:
3946
        self.LogInfo("Autopromoting node to master candidate")
3947

    
3948
    return
3949

    
3950
  def Exec(self, feedback_fn):
3951
    """Modifies a node.
3952

3953
    """
3954
    node = self.node
3955

    
3956
    result = []
3957
    changed_mc = False
3958

    
3959
    if self.op.offline is not None:
3960
      node.offline = self.op.offline
3961
      result.append(("offline", str(self.op.offline)))
3962
      if self.op.offline == True:
3963
        if node.master_candidate:
3964
          node.master_candidate = False
3965
          changed_mc = True
3966
          result.append(("master_candidate", "auto-demotion due to offline"))
3967
        if node.drained:
3968
          node.drained = False
3969
          result.append(("drained", "clear drained status due to offline"))
3970

    
3971
    if self.op.master_candidate is not None:
3972
      node.master_candidate = self.op.master_candidate
3973
      changed_mc = True
3974
      result.append(("master_candidate", str(self.op.master_candidate)))
3975
      if self.op.master_candidate == False:
3976
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3977
        msg = rrc.fail_msg
3978
        if msg:
3979
          self.LogWarning("Node failed to demote itself: %s" % msg)
3980

    
3981
    if self.op.drained is not None:
3982
      node.drained = self.op.drained
3983
      result.append(("drained", str(self.op.drained)))
3984
      if self.op.drained == True:
3985
        if node.master_candidate:
3986
          node.master_candidate = False
3987
          changed_mc = True
3988
          result.append(("master_candidate", "auto-demotion due to drain"))
3989
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3990
          msg = rrc.fail_msg
3991
          if msg:
3992
            self.LogWarning("Node failed to demote itself: %s" % msg)
3993
        if node.offline:
3994
          node.offline = False
3995
          result.append(("offline", "clear offline status due to drain"))
3996

    
3997
    # we locked all nodes, we adjust the CP before updating this node
3998
    if self.lock_all:
3999
      _AdjustCandidatePool(self, [node.name])
4000

    
4001
    # this will trigger configuration file update, if needed
4002
    self.cfg.Update(node, feedback_fn)
4003

    
4004
    # this will trigger job queue propagation or cleanup
4005
    if changed_mc:
4006
      self.context.ReaddNode(node)
4007

    
4008
    return result
4009

    
4010

    
4011
class LUPowercycleNode(NoHooksLU):
4012
  """Powercycles a node.
4013

4014
  """
4015
  _OP_PARAMS = [
4016
    _PNodeName,
4017
    _PForce,
4018
    ]
4019
  REQ_BGL = False
4020

    
4021
  def CheckArguments(self):
4022
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4023
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4024
      raise errors.OpPrereqError("The node is the master and the force"
4025
                                 " parameter was not set",
4026
                                 errors.ECODE_INVAL)
4027

    
4028
  def ExpandNames(self):
4029
    """Locking for PowercycleNode.
4030

4031
    This is a last-resort option and shouldn't block on other
4032
    jobs. Therefore, we grab no locks.
4033

4034
    """
4035
    self.needed_locks = {}
4036

    
4037
  def Exec(self, feedback_fn):
4038
    """Reboots a node.
4039

4040
    """
4041
    result = self.rpc.call_node_powercycle(self.op.node_name,
4042
                                           self.cfg.GetHypervisorType())
4043
    result.Raise("Failed to schedule the reboot")
4044
    return result.payload
4045

    
4046

    
4047
class LUQueryClusterInfo(NoHooksLU):
4048
  """Query cluster configuration.
4049

4050
  """
4051
  REQ_BGL = False
4052

    
4053
  def ExpandNames(self):
4054
    self.needed_locks = {}
4055

    
4056
  def Exec(self, feedback_fn):
4057
    """Return cluster config.
4058

4059
    """
4060
    cluster = self.cfg.GetClusterInfo()
4061
    os_hvp = {}
4062

    
4063
    # Filter just for enabled hypervisors
4064
    for os_name, hv_dict in cluster.os_hvp.items():
4065
      os_hvp[os_name] = {}
4066
      for hv_name, hv_params in hv_dict.items():
4067
        if hv_name in cluster.enabled_hypervisors:
4068
          os_hvp[os_name][hv_name] = hv_params
4069

    
4070
    result = {
4071
      "software_version": constants.RELEASE_VERSION,
4072
      "protocol_version": constants.PROTOCOL_VERSION,
4073
      "config_version": constants.CONFIG_VERSION,
4074
      "os_api_version": max(constants.OS_API_VERSIONS),
4075
      "export_version": constants.EXPORT_VERSION,
4076
      "architecture": (platform.architecture()[0], platform.machine()),
4077
      "name": cluster.cluster_name,
4078
      "master": cluster.master_node,
4079
      "default_hypervisor": cluster.enabled_hypervisors[0],
4080
      "enabled_hypervisors": cluster.enabled_hypervisors,
4081
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4082
                        for hypervisor_name in cluster.enabled_hypervisors]),
4083
      "os_hvp": os_hvp,
4084
      "beparams": cluster.beparams,
4085
      "osparams": cluster.osparams,
4086
      "nicparams": cluster.nicparams,
4087
      "candidate_pool_size": cluster.candidate_pool_size,
4088
      "master_netdev": cluster.master_netdev,
4089
      "volume_group_name": cluster.volume_group_name,
4090
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4091
      "file_storage_dir": cluster.file_storage_dir,
4092
      "maintain_node_health": cluster.maintain_node_health,
4093
      "ctime": cluster.ctime,
4094
      "mtime": cluster.mtime,
4095
      "uuid": cluster.uuid,
4096
      "tags": list(cluster.GetTags()),
4097
      "uid_pool": cluster.uid_pool,
4098
      "default_iallocator": cluster.default_iallocator,
4099
      }
4100

    
4101
    return result
4102

    
4103

    
4104
class LUQueryConfigValues(NoHooksLU):
4105
  """Return configuration values.
4106

4107
  """
4108
  _OP_PARAMS = [_POutputFields]
4109
  REQ_BGL = False
4110
  _FIELDS_DYNAMIC = utils.FieldSet()
4111
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4112
                                  "watcher_pause")
4113

    
4114
  def CheckArguments(self):
4115
    _CheckOutputFields(static=self._FIELDS_STATIC,
4116
                       dynamic=self._FIELDS_DYNAMIC,
4117
                       selected=self.op.output_fields)
4118

    
4119
  def ExpandNames(self):
4120
    self.needed_locks = {}
4121

    
4122
  def Exec(self, feedback_fn):
4123
    """Dump a representation of the cluster config to the standard output.
4124

4125
    """
4126
    values = []
4127
    for field in self.op.output_fields:
4128
      if field == "cluster_name":
4129
        entry = self.cfg.GetClusterName()
4130
      elif field == "master_node":
4131
        entry = self.cfg.GetMasterNode()
4132
      elif field == "drain_flag":
4133
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4134
      elif field == "watcher_pause":
4135
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4136
      else:
4137
        raise errors.ParameterError(field)
4138
      values.append(entry)
4139
    return values
4140

    
4141

    
4142
class LUActivateInstanceDisks(NoHooksLU):
4143
  """Bring up an instance's disks.
4144

4145
  """
4146
  _OP_PARAMS = [
4147
    _PInstanceName,
4148
    ("ignore_size", False, _TBool),
4149
    ]
4150
  REQ_BGL = False
4151

    
4152
  def ExpandNames(self):
4153
    self._ExpandAndLockInstance()
4154
    self.needed_locks[locking.LEVEL_NODE] = []
4155
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4156

    
4157
  def DeclareLocks(self, level):
4158
    if level == locking.LEVEL_NODE:
4159
      self._LockInstancesNodes()
4160

    
4161
  def CheckPrereq(self):
4162
    """Check prerequisites.
4163

4164
    This checks that the instance is in the cluster.
4165

4166
    """
4167
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4168
    assert self.instance is not None, \
4169
      "Cannot retrieve locked instance %s" % self.op.instance_name
4170
    _CheckNodeOnline(self, self.instance.primary_node)
4171

    
4172
  def Exec(self, feedback_fn):
4173
    """Activate the disks.
4174

4175
    """
4176
    disks_ok, disks_info = \
4177
              _AssembleInstanceDisks(self, self.instance,
4178
                                     ignore_size=self.op.ignore_size)
4179
    if not disks_ok:
4180
      raise errors.OpExecError("Cannot activate block devices")
4181

    
4182
    return disks_info
4183

    
4184

    
4185
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4186
                           ignore_size=False):
4187
  """Prepare the block devices for an instance.
4188

4189
  This sets up the block devices on all nodes.
4190

4191
  @type lu: L{LogicalUnit}
4192
  @param lu: the logical unit on whose behalf we execute
4193
  @type instance: L{objects.Instance}
4194
  @param instance: the instance for whose disks we assemble
4195
  @type disks: list of L{objects.Disk} or None
4196
  @param disks: which disks to assemble (or all, if None)
4197
  @type ignore_secondaries: boolean
4198
  @param ignore_secondaries: if true, errors on secondary nodes
4199
      won't result in an error return from the function
4200
  @type ignore_size: boolean
4201
  @param ignore_size: if true, the current known size of the disk
4202
      will not be used during the disk activation, useful for cases
4203
      when the size is wrong
4204
  @return: False if the operation failed, otherwise a list of
4205
      (host, instance_visible_name, node_visible_name)
4206
      with the mapping from node devices to instance devices
4207

4208
  """
4209
  device_info = []
4210
  disks_ok = True
4211
  iname = instance.name
4212
  disks = _ExpandCheckDisks(instance, disks)
4213

    
4214
  # With the two passes mechanism we try to reduce the window of
4215
  # opportunity for the race condition of switching DRBD to primary
4216
  # before handshaking occured, but we do not eliminate it
4217

    
4218
  # The proper fix would be to wait (with some limits) until the
4219
  # connection has been made and drbd transitions from WFConnection
4220
  # into any other network-connected state (Connected, SyncTarget,
4221
  # SyncSource, etc.)
4222

    
4223
  # 1st pass, assemble on all nodes in secondary mode
4224
  for inst_disk in disks:
4225
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4226
      if ignore_size:
4227
        node_disk = node_disk.Copy()
4228
        node_disk.UnsetSize()
4229
      lu.cfg.SetDiskID(node_disk, node)
4230
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4231
      msg = result.fail_msg
4232
      if msg:
4233
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4234
                           " (is_primary=False, pass=1): %s",
4235
                           inst_disk.iv_name, node, msg)
4236
        if not ignore_secondaries:
4237
          disks_ok = False
4238

    
4239
  # FIXME: race condition on drbd migration to primary
4240

    
4241
  # 2nd pass, do only the primary node
4242
  for inst_disk in disks:
4243
    dev_path = None
4244

    
4245
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4246
      if node != instance.primary_node:
4247
        continue
4248
      if ignore_size:
4249
        node_disk = node_disk.Copy()
4250
        node_disk.UnsetSize()
4251
      lu.cfg.SetDiskID(node_disk, node)
4252
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4253
      msg = result.fail_msg
4254
      if msg:
4255
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4256
                           " (is_primary=True, pass=2): %s",
4257
                           inst_disk.iv_name, node, msg)
4258
        disks_ok = False
4259
      else:
4260
        dev_path = result.payload
4261

    
4262
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4263

    
4264
  # leave the disks configured for the primary node
4265
  # this is a workaround that would be fixed better by
4266
  # improving the logical/physical id handling
4267
  for disk in disks:
4268
    lu.cfg.SetDiskID(disk, instance.primary_node)
4269

    
4270
  return disks_ok, device_info
4271

    
4272

    
4273
def _StartInstanceDisks(lu, instance, force):
4274
  """Start the disks of an instance.
4275

4276
  """
4277
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4278
                                           ignore_secondaries=force)
4279
  if not disks_ok:
4280
    _ShutdownInstanceDisks(lu, instance)
4281
    if force is not None and not force:
4282
      lu.proc.LogWarning("", hint="If the message above refers to a"
4283
                         " secondary node,"
4284
                         " you can retry the operation using '--force'.")
4285
    raise errors.OpExecError("Disk consistency error")
4286

    
4287

    
4288
class LUDeactivateInstanceDisks(NoHooksLU):
4289
  """Shutdown an instance's disks.
4290

4291
  """
4292
  _OP_PARAMS = [
4293
    _PInstanceName,
4294
    ]
4295
  REQ_BGL = False
4296

    
4297
  def ExpandNames(self):
4298
    self._ExpandAndLockInstance()
4299
    self.needed_locks[locking.LEVEL_NODE] = []
4300
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4301

    
4302
  def DeclareLocks(self, level):
4303
    if level == locking.LEVEL_NODE:
4304
      self._LockInstancesNodes()
4305

    
4306
  def CheckPrereq(self):
4307
    """Check prerequisites.
4308

4309
    This checks that the instance is in the cluster.
4310

4311
    """
4312
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4313
    assert self.instance is not None, \
4314
      "Cannot retrieve locked instance %s" % self.op.instance_name
4315

    
4316
  def Exec(self, feedback_fn):
4317
    """Deactivate the disks
4318

4319
    """
4320
    instance = self.instance
4321
    _SafeShutdownInstanceDisks(self, instance)
4322

    
4323

    
4324
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4325
  """Shutdown block devices of an instance.
4326

4327
  This function checks if an instance is running, before calling
4328
  _ShutdownInstanceDisks.
4329

4330
  """
4331
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4332
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4333

    
4334

    
4335
def _ExpandCheckDisks(instance, disks):
4336
  """Return the instance disks selected by the disks list
4337

4338
  @type disks: list of L{objects.Disk} or None
4339
  @param disks: selected disks
4340
  @rtype: list of L{objects.Disk}
4341
  @return: selected instance disks to act on
4342

4343
  """
4344
  if disks is None:
4345
    return instance.disks
4346
  else:
4347
    if not set(disks).issubset(instance.disks):
4348
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4349
                                   " target instance")
4350
    return disks
4351

    
4352

    
4353
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4354
  """Shutdown block devices of an instance.
4355

4356
  This does the shutdown on all nodes of the instance.
4357

4358
  If the ignore_primary is false, errors on the primary node are
4359
  ignored.
4360

4361
  """
4362
  all_result = True
4363
  disks = _ExpandCheckDisks(instance, disks)
4364

    
4365
  for disk in disks:
4366
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4367
      lu.cfg.SetDiskID(top_disk, node)
4368
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4369
      msg = result.fail_msg
4370
      if msg:
4371
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4372
                      disk.iv_name, node, msg)
4373
        if not ignore_primary or node != instance.primary_node:
4374
          all_result = False
4375
  return all_result
4376

    
4377

    
4378
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4379
  """Checks if a node has enough free memory.
4380

4381
  This function check if a given node has the needed amount of free
4382
  memory. In case the node has less memory or we cannot get the
4383
  information from the node, this function raise an OpPrereqError
4384
  exception.
4385

4386
  @type lu: C{LogicalUnit}
4387
  @param lu: a logical unit from which we get configuration data
4388
  @type node: C{str}
4389
  @param node: the node to check
4390
  @type reason: C{str}
4391
  @param reason: string to use in the error message
4392
  @type requested: C{int}
4393
  @param requested: the amount of memory in MiB to check for
4394
  @type hypervisor_name: C{str}
4395
  @param hypervisor_name: the hypervisor to ask for memory stats
4396
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4397
      we cannot check the node
4398

4399
  """
4400
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4401
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4402
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4403
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4404
  if not isinstance(free_mem, int):
4405
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4406
                               " was '%s'" % (node, free_mem),
4407
                               errors.ECODE_ENVIRON)
4408
  if requested > free_mem:
4409
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4410
                               " needed %s MiB, available %s MiB" %
4411
                               (node, reason, requested, free_mem),
4412
                               errors.ECODE_NORES)
4413

    
4414

    
4415
def _CheckNodesFreeDisk(lu, nodenames, requested):
4416
  """Checks if nodes have enough free disk space in the default VG.
4417

4418
  This function check if all given nodes have the needed amount of
4419
  free disk. In case any node has less disk or we cannot get the
4420
  information from the node, this function raise an OpPrereqError
4421
  exception.
4422

4423
  @type lu: C{LogicalUnit}
4424
  @param lu: a logical unit from which we get configuration data
4425
  @type nodenames: C{list}
4426
  @param nodenames: the list of node names to check
4427
  @type requested: C{int}
4428
  @param requested: the amount of disk in MiB to check for
4429
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4430
      we cannot check the node
4431

4432
  """
4433
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4434
                                   lu.cfg.GetHypervisorType())
4435
  for node in nodenames:
4436
    info = nodeinfo[node]
4437
    info.Raise("Cannot get current information from node %s" % node,
4438
               prereq=True, ecode=errors.ECODE_ENVIRON)
4439
    vg_free = info.payload.get("vg_free", None)
4440
    if not isinstance(vg_free, int):
4441
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4442
                                 " result was '%s'" % (node, vg_free),
4443
                                 errors.ECODE_ENVIRON)
4444
    if requested > vg_free:
4445
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4446
                                 " required %d MiB, available %d MiB" %
4447
                                 (node, requested, vg_free),
4448
                                 errors.ECODE_NORES)
4449

    
4450

    
4451
class LUStartupInstance(LogicalUnit):
4452
  """Starts an instance.
4453

4454
  """
4455
  HPATH = "instance-start"
4456
  HTYPE = constants.HTYPE_INSTANCE
4457
  _OP_PARAMS = [
4458
    _PInstanceName,
4459
    _PForce,
4460
    ("hvparams", _EmptyDict, _TDict),
4461
    ("beparams", _EmptyDict, _TDict),
4462
    ]
4463
  REQ_BGL = False
4464

    
4465
  def CheckArguments(self):
4466
    # extra beparams
4467
    if self.op.beparams:
4468
      # fill the beparams dict
4469
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4470

    
4471
  def ExpandNames(self):
4472
    self._ExpandAndLockInstance()
4473

    
4474
  def BuildHooksEnv(self):
4475
    """Build hooks env.
4476

4477
    This runs on master, primary and secondary nodes of the instance.
4478

4479
    """
4480
    env = {
4481
      "FORCE": self.op.force,
4482
      }
4483
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4484
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4485
    return env, nl, nl
4486

    
4487
  def CheckPrereq(self):
4488
    """Check prerequisites.
4489

4490
    This checks that the instance is in the cluster.
4491

4492
    """
4493
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4494
    assert self.instance is not None, \
4495
      "Cannot retrieve locked instance %s" % self.op.instance_name
4496

    
4497
    # extra hvparams
4498
    if self.op.hvparams:
4499
      # check hypervisor parameter syntax (locally)
4500
      cluster = self.cfg.GetClusterInfo()
4501
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4502
      filled_hvp = cluster.FillHV(instance)
4503
      filled_hvp.update(self.op.hvparams)
4504
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4505
      hv_type.CheckParameterSyntax(filled_hvp)
4506
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4507

    
4508
    _CheckNodeOnline(self, instance.primary_node)
4509

    
4510
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4511
    # check bridges existence
4512
    _CheckInstanceBridgesExist(self, instance)
4513

    
4514
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4515
                                              instance.name,
4516
                                              instance.hypervisor)
4517
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4518
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4519
    if not remote_info.payload: # not running already
4520
      _CheckNodeFreeMemory(self, instance.primary_node,
4521
                           "starting instance %s" % instance.name,
4522
                           bep[constants.BE_MEMORY], instance.hypervisor)
4523

    
4524
  def Exec(self, feedback_fn):
4525
    """Start the instance.
4526

4527
    """
4528
    instance = self.instance
4529
    force = self.op.force
4530

    
4531
    self.cfg.MarkInstanceUp(instance.name)
4532

    
4533
    node_current = instance.primary_node
4534

    
4535
    _StartInstanceDisks(self, instance, force)
4536

    
4537
    result = self.rpc.call_instance_start(node_current, instance,
4538
                                          self.op.hvparams, self.op.beparams)
4539
    msg = result.fail_msg
4540
    if msg:
4541
      _ShutdownInstanceDisks(self, instance)
4542
      raise errors.OpExecError("Could not start instance: %s" % msg)
4543

    
4544

    
4545
class LURebootInstance(LogicalUnit):
4546
  """Reboot an instance.
4547

4548
  """
4549
  HPATH = "instance-reboot"
4550
  HTYPE = constants.HTYPE_INSTANCE
4551
  _OP_PARAMS = [
4552
    _PInstanceName,
4553
    ("ignore_secondaries", False, _TBool),
4554
    ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4555
    _PShutdownTimeout,
4556
    ]
4557
  REQ_BGL = False
4558

    
4559
  def ExpandNames(self):
4560
    self._ExpandAndLockInstance()
4561

    
4562
  def BuildHooksEnv(self):
4563
    """Build hooks env.
4564

4565
    This runs on master, primary and secondary nodes of the instance.
4566

4567
    """
4568
    env = {
4569
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4570
      "REBOOT_TYPE": self.op.reboot_type,
4571
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4572
      }
4573
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4574
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4575
    return env, nl, nl
4576

    
4577
  def CheckPrereq(self):
4578
    """Check prerequisites.
4579

4580
    This checks that the instance is in the cluster.
4581

4582
    """
4583
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4584
    assert self.instance is not None, \
4585
      "Cannot retrieve locked instance %s" % self.op.instance_name
4586

    
4587
    _CheckNodeOnline(self, instance.primary_node)
4588

    
4589
    # check bridges existence
4590
    _CheckInstanceBridgesExist(self, instance)
4591

    
4592
  def Exec(self, feedback_fn):
4593
    """Reboot the instance.
4594

4595
    """
4596
    instance = self.instance
4597
    ignore_secondaries = self.op.ignore_secondaries
4598
    reboot_type = self.op.reboot_type
4599

    
4600
    node_current = instance.primary_node
4601

    
4602
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4603
                       constants.INSTANCE_REBOOT_HARD]:
4604
      for disk in instance.disks:
4605
        self.cfg.SetDiskID(disk, node_current)
4606
      result = self.rpc.call_instance_reboot(node_current, instance,
4607
                                             reboot_type,
4608
                                             self.op.shutdown_timeout)
4609
      result.Raise("Could not reboot instance")
4610
    else:
4611
      result = self.rpc.call_instance_shutdown(node_current, instance,
4612
                                               self.op.shutdown_timeout)
4613
      result.Raise("Could not shutdown instance for full reboot")
4614
      _ShutdownInstanceDisks(self, instance)
4615
      _StartInstanceDisks(self, instance, ignore_secondaries)
4616
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4617
      msg = result.fail_msg
4618
      if msg:
4619
        _ShutdownInstanceDisks(self, instance)
4620
        raise errors.OpExecError("Could not start instance for"
4621
                                 " full reboot: %s" % msg)
4622

    
4623
    self.cfg.MarkInstanceUp(instance.name)
4624

    
4625

    
4626
class LUShutdownInstance(LogicalUnit):
4627
  """Shutdown an instance.
4628

4629
  """
4630
  HPATH = "instance-stop"
4631
  HTYPE = constants.HTYPE_INSTANCE
4632
  _OP_PARAMS = [
4633
    _PInstanceName,
4634
    ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4635
    ]
4636
  REQ_BGL = False
4637

    
4638
  def ExpandNames(self):
4639
    self._ExpandAndLockInstance()
4640

    
4641
  def BuildHooksEnv(self):
4642
    """Build hooks env.
4643

4644
    This runs on master, primary and secondary nodes of the instance.
4645

4646
    """
4647
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4648
    env["TIMEOUT"] = self.op.timeout
4649
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4650
    return env, nl, nl
4651

    
4652
  def CheckPrereq(self):
4653
    """Check prerequisites.
4654

4655
    This checks that the instance is in the cluster.
4656

4657
    """
4658
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4659
    assert self.instance is not None, \
4660
      "Cannot retrieve locked instance %s" % self.op.instance_name
4661
    _CheckNodeOnline(self, self.instance.primary_node)
4662

    
4663
  def Exec(self, feedback_fn):
4664
    """Shutdown the instance.
4665

4666
    """
4667
    instance = self.instance
4668
    node_current = instance.primary_node
4669
    timeout = self.op.timeout
4670
    self.cfg.MarkInstanceDown(instance.name)
4671
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4672
    msg = result.fail_msg
4673
    if msg:
4674
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4675

    
4676
    _ShutdownInstanceDisks(self, instance)
4677

    
4678

    
4679
class LUReinstallInstance(LogicalUnit):
4680
  """Reinstall an instance.
4681

4682
  """
4683
  HPATH = "instance-reinstall"
4684
  HTYPE = constants.HTYPE_INSTANCE
4685
  _OP_PARAMS = [
4686
    _PInstanceName,
4687
    ("os_type", None, _TMaybeString),
4688
    ("force_variant", False, _TBool),
4689
    ]
4690
  REQ_BGL = False
4691

    
4692
  def ExpandNames(self):
4693
    self._ExpandAndLockInstance()
4694

    
4695
  def BuildHooksEnv(self):
4696
    """Build hooks env.
4697

4698
    This runs on master, primary and secondary nodes of the instance.
4699

4700
    """
4701
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4702
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4703
    return env, nl, nl
4704

    
4705
  def CheckPrereq(self):
4706
    """Check prerequisites.
4707

4708
    This checks that the instance is in the cluster and is not running.
4709

4710
    """
4711
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4712
    assert instance is not None, \
4713
      "Cannot retrieve locked instance %s" % self.op.instance_name
4714
    _CheckNodeOnline(self, instance.primary_node)
4715

    
4716
    if instance.disk_template == constants.DT_DISKLESS:
4717
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4718
                                 self.op.instance_name,
4719
                                 errors.ECODE_INVAL)
4720
    _CheckInstanceDown(self, instance, "cannot reinstall")
4721

    
4722
    if self.op.os_type is not None:
4723
      # OS verification
4724
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4725
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4726

    
4727
    self.instance = instance
4728

    
4729
  def Exec(self, feedback_fn):
4730
    """Reinstall the instance.
4731

4732
    """
4733
    inst = self.instance
4734

    
4735
    if self.op.os_type is not None:
4736
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4737
      inst.os = self.op.os_type
4738
      self.cfg.Update(inst, feedback_fn)
4739

    
4740
    _StartInstanceDisks(self, inst, None)
4741
    try:
4742
      feedback_fn("Running the instance OS create scripts...")
4743
      # FIXME: pass debug option from opcode to backend
4744
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4745
                                             self.op.debug_level)
4746
      result.Raise("Could not install OS for instance %s on node %s" %
4747
                   (inst.name, inst.primary_node))
4748
    finally:
4749
      _ShutdownInstanceDisks(self, inst)
4750

    
4751

    
4752
class LURecreateInstanceDisks(LogicalUnit):
4753
  """Recreate an instance's missing disks.
4754

4755
  """
4756
  HPATH = "instance-recreate-disks"
4757
  HTYPE = constants.HTYPE_INSTANCE
4758
  _OP_PARAMS = [
4759
    _PInstanceName,
4760
    ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4761
    ]
4762
  REQ_BGL = False
4763

    
4764
  def ExpandNames(self):
4765
    self._ExpandAndLockInstance()
4766

    
4767
  def BuildHooksEnv(self):
4768
    """Build hooks env.
4769

4770
    This runs on master, primary and secondary nodes of the instance.
4771

4772
    """
4773
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4774
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4775
    return env, nl, nl
4776

    
4777
  def CheckPrereq(self):
4778
    """Check prerequisites.
4779

4780
    This checks that the instance is in the cluster and is not running.
4781

4782
    """
4783
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4784
    assert instance is not None, \
4785
      "Cannot retrieve locked instance %s" % self.op.instance_name
4786
    _CheckNodeOnline(self, instance.primary_node)
4787

    
4788
    if instance.disk_template == constants.DT_DISKLESS:
4789
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4790
                                 self.op.instance_name, errors.ECODE_INVAL)
4791
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4792

    
4793
    if not self.op.disks:
4794
      self.op.disks = range(len(instance.disks))
4795
    else:
4796
      for idx in self.op.disks:
4797
        if idx >= len(instance.disks):
4798
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4799
                                     errors.ECODE_INVAL)
4800

    
4801
    self.instance = instance
4802

    
4803
  def Exec(self, feedback_fn):
4804
    """Recreate the disks.
4805

4806
    """
4807
    to_skip = []
4808
    for idx, _ in enumerate(self.instance.disks):
4809
      if idx not in self.op.disks: # disk idx has not been passed in
4810
        to_skip.append(idx)
4811
        continue
4812

    
4813
    _CreateDisks(self, self.instance, to_skip=to_skip)
4814

    
4815

    
4816
class LURenameInstance(LogicalUnit):
4817
  """Rename an instance.
4818

4819
  """
4820
  HPATH = "instance-rename"
4821
  HTYPE = constants.HTYPE_INSTANCE
4822
  _OP_PARAMS = [
4823
    _PInstanceName,
4824
    ("new_name", _NoDefault, _TNonEmptyString),
4825
    ("ignore_ip", False, _TBool),
4826
    ("check_name", True, _TBool),
4827
    ]
4828

    
4829
  def BuildHooksEnv(self):
4830
    """Build hooks env.
4831

4832
    This runs on master, primary and secondary nodes of the instance.
4833

4834
    """
4835
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4836
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4837
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4838
    return env, nl, nl
4839

    
4840
  def CheckPrereq(self):
4841
    """Check prerequisites.
4842

4843
    This checks that the instance is in the cluster and is not running.
4844

4845
    """
4846
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4847
                                                self.op.instance_name)
4848
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4849
    assert instance is not None
4850
    _CheckNodeOnline(self, instance.primary_node)
4851
    _CheckInstanceDown(self, instance, "cannot rename")
4852
    self.instance = instance
4853

    
4854
    # new name verification
4855
    if self.op.check_name:
4856
      name_info = utils.GetHostInfo(self.op.new_name)
4857
      self.op.new_name = name_info.name
4858

    
4859
    new_name = self.op.new_name
4860

    
4861
    instance_list = self.cfg.GetInstanceList()
4862
    if new_name in instance_list:
4863
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4864
                                 new_name, errors.ECODE_EXISTS)
4865

    
4866
    if not self.op.ignore_ip:
4867
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4868
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4869
                                   (name_info.ip, new_name),
4870
                                   errors.ECODE_NOTUNIQUE)
4871

    
4872
  def Exec(self, feedback_fn):
4873
    """Reinstall the instance.
4874

4875
    """
4876
    inst = self.instance
4877
    old_name = inst.name
4878

    
4879
    if inst.disk_template == constants.DT_FILE:
4880
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4881

    
4882
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4883
    # Change the instance lock. This is definitely safe while we hold the BGL
4884
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4885
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4886

    
4887
    # re-read the instance from the configuration after rename
4888
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4889

    
4890
    if inst.disk_template == constants.DT_FILE:
4891
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4892
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4893
                                                     old_file_storage_dir,
4894
                                                     new_file_storage_dir)
4895
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4896
                   " (but the instance has been renamed in Ganeti)" %
4897
                   (inst.primary_node, old_file_storage_dir,
4898
                    new_file_storage_dir))
4899

    
4900
    _StartInstanceDisks(self, inst, None)
4901
    try:
4902
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4903
                                                 old_name, self.op.debug_level)
4904
      msg = result.fail_msg
4905
      if msg:
4906
        msg = ("Could not run OS rename script for instance %s on node %s"
4907
               " (but the instance has been renamed in Ganeti): %s" %
4908
               (inst.name, inst.primary_node, msg))
4909
        self.proc.LogWarning(msg)
4910
    finally:
4911
      _ShutdownInstanceDisks(self, inst)
4912

    
4913

    
4914
class LURemoveInstance(LogicalUnit):
4915
  """Remove an instance.
4916

4917
  """
4918
  HPATH = "instance-remove"
4919
  HTYPE = constants.HTYPE_INSTANCE
4920
  _OP_PARAMS = [
4921
    _PInstanceName,
4922
    ("ignore_failures", False, _TBool),
4923
    _PShutdownTimeout,
4924
    ]
4925
  REQ_BGL = False
4926

    
4927
  def ExpandNames(self):
4928
    self._ExpandAndLockInstance()
4929
    self.needed_locks[locking.LEVEL_NODE] = []
4930
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4931

    
4932
  def DeclareLocks(self, level):
4933
    if level == locking.LEVEL_NODE:
4934
      self._LockInstancesNodes()
4935

    
4936
  def BuildHooksEnv(self):
4937
    """Build hooks env.
4938

4939
    This runs on master, primary and secondary nodes of the instance.
4940

4941
    """
4942
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4943
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4944
    nl = [self.cfg.GetMasterNode()]
4945
    nl_post = list(self.instance.all_nodes) + nl
4946
    return env, nl, nl_post
4947

    
4948
  def CheckPrereq(self):
4949
    """Check prerequisites.
4950

4951
    This checks that the instance is in the cluster.
4952

4953
    """
4954
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4955
    assert self.instance is not None, \
4956
      "Cannot retrieve locked instance %s" % self.op.instance_name
4957

    
4958
  def Exec(self, feedback_fn):
4959
    """Remove the instance.
4960

4961
    """
4962
    instance = self.instance
4963
    logging.info("Shutting down instance %s on node %s",
4964
                 instance.name, instance.primary_node)
4965

    
4966
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4967
                                             self.op.shutdown_timeout)
4968
    msg = result.fail_msg
4969
    if msg:
4970
      if self.op.ignore_failures:
4971
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4972
      else:
4973
        raise errors.OpExecError("Could not shutdown instance %s on"
4974
                                 " node %s: %s" %
4975
                                 (instance.name, instance.primary_node, msg))
4976

    
4977
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4978

    
4979

    
4980
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4981
  """Utility function to remove an instance.
4982

4983
  """
4984
  logging.info("Removing block devices for instance %s", instance.name)
4985

    
4986
  if not _RemoveDisks(lu, instance):
4987
    if not ignore_failures:
4988
      raise errors.OpExecError("Can't remove instance's disks")
4989
    feedback_fn("Warning: can't remove instance's disks")
4990

    
4991
  logging.info("Removing instance %s out of cluster config", instance.name)
4992

    
4993
  lu.cfg.RemoveInstance(instance.name)
4994

    
4995
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4996
    "Instance lock removal conflict"
4997

    
4998
  # Remove lock for the instance
4999
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5000

    
5001

    
5002
class LUQueryInstances(NoHooksLU):
5003
  """Logical unit for querying instances.
5004

5005
  """
5006
  # pylint: disable-msg=W0142
5007
  _OP_PARAMS = [
5008
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5009
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5010
    ("use_locking", False, _TBool),
5011
    ]
5012
  REQ_BGL = False
5013
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5014
                    "serial_no", "ctime", "mtime", "uuid"]
5015
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5016
                                    "admin_state",
5017
                                    "disk_template", "ip", "mac", "bridge",
5018
                                    "nic_mode", "nic_link",
5019
                                    "sda_size", "sdb_size", "vcpus", "tags",
5020
                                    "network_port", "beparams",
5021
                                    r"(disk)\.(size)/([0-9]+)",
5022
                                    r"(disk)\.(sizes)", "disk_usage",
5023
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5024
                                    r"(nic)\.(bridge)/([0-9]+)",
5025
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
5026
                                    r"(disk|nic)\.(count)",
5027
                                    "hvparams",
5028
                                    ] + _SIMPLE_FIELDS +
5029
                                  ["hv/%s" % name
5030
                                   for name in constants.HVS_PARAMETERS
5031
                                   if name not in constants.HVC_GLOBALS] +
5032
                                  ["be/%s" % name
5033
                                   for name in constants.BES_PARAMETERS])
5034
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
5035

    
5036

    
5037
  def CheckArguments(self):
5038
    _CheckOutputFields(static=self._FIELDS_STATIC,
5039
                       dynamic=self._FIELDS_DYNAMIC,
5040
                       selected=self.op.output_fields)
5041

    
5042
  def ExpandNames(self):
5043
    self.needed_locks = {}
5044
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5045
    self.share_locks[locking.LEVEL_NODE] = 1
5046

    
5047
    if self.op.names:
5048
      self.wanted = _GetWantedInstances(self, self.op.names)
5049
    else:
5050
      self.wanted = locking.ALL_SET
5051

    
5052
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5053
    self.do_locking = self.do_node_query and self.op.use_locking
5054
    if self.do_locking:
5055
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5056
      self.needed_locks[locking.LEVEL_NODE] = []
5057
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5058

    
5059
  def DeclareLocks(self, level):
5060
    if level == locking.LEVEL_NODE and self.do_locking:
5061
      self._LockInstancesNodes()
5062

    
5063
  def Exec(self, feedback_fn):
5064
    """Computes the list of nodes and their attributes.
5065

5066
    """
5067
    # pylint: disable-msg=R0912
5068
    # way too many branches here
5069
    all_info = self.cfg.GetAllInstancesInfo()
5070
    if self.wanted == locking.ALL_SET:
5071
      # caller didn't specify instance names, so ordering is not important
5072
      if self.do_locking:
5073
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5074
      else:
5075
        instance_names = all_info.keys()
5076
      instance_names = utils.NiceSort(instance_names)
5077
    else:
5078
      # caller did specify names, so we must keep the ordering
5079
      if self.do_locking:
5080
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5081
      else:
5082
        tgt_set = all_info.keys()
5083
      missing = set(self.wanted).difference(tgt_set)
5084
      if missing:
5085
        raise errors.OpExecError("Some instances were removed before"
5086
                                 " retrieving their data: %s" % missing)
5087
      instance_names = self.wanted
5088

    
5089
    instance_list = [all_info[iname] for iname in instance_names]
5090

    
5091
    # begin data gathering
5092

    
5093
    nodes = frozenset([inst.primary_node for inst in instance_list])
5094
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
5095

    
5096
    bad_nodes = []
5097
    off_nodes = []
5098
    if self.do_node_query:
5099
      live_data = {}
5100
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5101
      for name in nodes:
5102
        result = node_data[name]
5103
        if result.offline:
5104
          # offline nodes will be in both lists
5105
          off_nodes.append(name)
5106
        if result.fail_msg:
5107
          bad_nodes.append(name)
5108
        else:
5109
          if result.payload:
5110
            live_data.update(result.payload)
5111
          # else no instance is alive
5112
    else:
5113
      live_data = dict([(name, {}) for name in instance_names])
5114

    
5115
    # end data gathering
5116

    
5117
    HVPREFIX = "hv/"
5118
    BEPREFIX = "be/"
5119
    output = []
5120
    cluster = self.cfg.GetClusterInfo()
5121
    for instance in instance_list:
5122
      iout = []
5123
      i_hv = cluster.FillHV(instance, skip_globals=True)
5124
      i_be = cluster.FillBE(instance)
5125
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5126
      for field in self.op.output_fields:
5127
        st_match = self._FIELDS_STATIC.Matches(field)
5128
        if field in self._SIMPLE_FIELDS:
5129
          val = getattr(instance, field)
5130
        elif field == "pnode":
5131
          val = instance.primary_node
5132
        elif field == "snodes":
5133
          val = list(instance.secondary_nodes)
5134
        elif field == "admin_state":
5135
          val = instance.admin_up
5136
        elif field == "oper_state":
5137
          if instance.primary_node in bad_nodes:
5138
            val = None
5139
          else:
5140
            val = bool(live_data.get(instance.name))
5141
        elif field == "status":
5142
          if instance.primary_node in off_nodes:
5143
            val = "ERROR_nodeoffline"
5144
          elif instance.primary_node in bad_nodes:
5145
            val = "ERROR_nodedown"
5146
          else:
5147
            running = bool(live_data.get(instance.name))
5148
            if running:
5149
              if instance.admin_up:
5150
                val = "running"
5151
              else:
5152
                val = "ERROR_up"
5153
            else:
5154
              if instance.admin_up:
5155
                val = "ERROR_down"
5156
              else:
5157
                val = "ADMIN_down"
5158
        elif field == "oper_ram":
5159
          if instance.primary_node in bad_nodes:
5160
            val = None
5161
          elif instance.name in live_data:
5162
            val = live_data[instance.name].get("memory", "?")
5163
          else:
5164
            val = "-"
5165
        elif field == "vcpus":
5166
          val = i_be[constants.BE_VCPUS]
5167
        elif field == "disk_template":
5168
          val = instance.disk_template
5169
        elif field == "ip":
5170
          if instance.nics:
5171
            val = instance.nics[0].ip
5172
          else:
5173
            val = None
5174
        elif field == "nic_mode":
5175
          if instance.nics:
5176
            val = i_nicp[0][constants.NIC_MODE]
5177
          else:
5178
            val = None
5179
        elif field == "nic_link":
5180
          if instance.nics:
5181
            val = i_nicp[0][constants.NIC_LINK]
5182
          else:
5183
            val = None
5184
        elif field == "bridge":
5185
          if (instance.nics and
5186
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5187
            val = i_nicp[0][constants.NIC_LINK]
5188
          else:
5189
            val = None
5190
        elif field == "mac":
5191
          if instance.nics:
5192
            val = instance.nics[0].mac
5193
          else:
5194
            val = None
5195
        elif field == "sda_size" or field == "sdb_size":
5196
          idx = ord(field[2]) - ord('a')
5197
          try:
5198
            val = instance.FindDisk(idx).size
5199
          except errors.OpPrereqError:
5200
            val = None
5201
        elif field == "disk_usage": # total disk usage per node
5202
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5203
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5204
        elif field == "tags":
5205
          val = list(instance.GetTags())
5206
        elif field == "hvparams":
5207
          val = i_hv
5208
        elif (field.startswith(HVPREFIX) and
5209
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5210
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5211
          val = i_hv.get(field[len(HVPREFIX):], None)
5212
        elif field == "beparams":
5213
          val = i_be
5214
        elif (field.startswith(BEPREFIX) and
5215
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5216
          val = i_be.get(field[len(BEPREFIX):], None)
5217
        elif st_match and st_match.groups():
5218
          # matches a variable list
5219
          st_groups = st_match.groups()
5220
          if st_groups and st_groups[0] == "disk":
5221
            if st_groups[1] == "count":
5222
              val = len(instance.disks)
5223
            elif st_groups[1] == "sizes":
5224
              val = [disk.size for disk in instance.disks]
5225
            elif st_groups[1] == "size":
5226
              try:
5227
                val = instance.FindDisk(st_groups[2]).size
5228
              except errors.OpPrereqError:
5229
                val = None
5230
            else:
5231
              assert False, "Unhandled disk parameter"
5232
          elif st_groups[0] == "nic":
5233
            if st_groups[1] == "count":
5234
              val = len(instance.nics)
5235
            elif st_groups[1] == "macs":
5236
              val = [nic.mac for nic in instance.nics]
5237
            elif st_groups[1] == "ips":
5238
              val = [nic.ip for nic in instance.nics]
5239
            elif st_groups[1] == "modes":
5240
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5241
            elif st_groups[1] == "links":
5242
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5243
            elif st_groups[1] == "bridges":
5244
              val = []
5245
              for nicp in i_nicp:
5246
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5247
                  val.append(nicp[constants.NIC_LINK])
5248
                else:
5249
                  val.append(None)
5250
            else:
5251
              # index-based item
5252
              nic_idx = int(st_groups[2])
5253
              if nic_idx >= len(instance.nics):
5254
                val = None
5255
              else:
5256
                if st_groups[1] == "mac":
5257
                  val = instance.nics[nic_idx].mac
5258
                elif st_groups[1] == "ip":
5259
                  val = instance.nics[nic_idx].ip
5260
                elif st_groups[1] == "mode":
5261
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5262
                elif st_groups[1] == "link":
5263
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5264
                elif st_groups[1] == "bridge":
5265
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5266
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5267
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5268
                  else:
5269
                    val = None
5270
                else:
5271
                  assert False, "Unhandled NIC parameter"
5272
          else:
5273
            assert False, ("Declared but unhandled variable parameter '%s'" %
5274
                           field)
5275
        else:
5276
          assert False, "Declared but unhandled parameter '%s'" % field
5277
        iout.append(val)
5278
      output.append(iout)
5279

    
5280
    return output
5281

    
5282

    
5283
class LUFailoverInstance(LogicalUnit):
5284
  """Failover an instance.
5285

5286
  """
5287
  HPATH = "instance-failover"
5288
  HTYPE = constants.HTYPE_INSTANCE
5289
  _OP_PARAMS = [
5290
    _PInstanceName,
5291
    ("ignore_consistency", False, _TBool),
5292
    _PShutdownTimeout,
5293
    ]
5294
  REQ_BGL = False
5295

    
5296
  def ExpandNames(self):
5297
    self._ExpandAndLockInstance()
5298
    self.needed_locks[locking.LEVEL_NODE] = []
5299
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5300

    
5301
  def DeclareLocks(self, level):
5302
    if level == locking.LEVEL_NODE:
5303
      self._LockInstancesNodes()
5304

    
5305
  def BuildHooksEnv(self):
5306
    """Build hooks env.
5307

5308
    This runs on master, primary and secondary nodes of the instance.
5309

5310
    """
5311
    instance = self.instance
5312
    source_node = instance.primary_node
5313
    target_node = instance.secondary_nodes[0]
5314
    env = {
5315
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5316
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5317
      "OLD_PRIMARY": source_node,
5318
      "OLD_SECONDARY": target_node,
5319
      "NEW_PRIMARY": target_node,
5320
      "NEW_SECONDARY": source_node,
5321
      }
5322
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5323
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5324
    nl_post = list(nl)
5325
    nl_post.append(source_node)
5326
    return env, nl, nl_post
5327

    
5328
  def CheckPrereq(self):
5329
    """Check prerequisites.
5330

5331
    This checks that the instance is in the cluster.
5332

5333
    """
5334
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5335
    assert self.instance is not None, \
5336
      "Cannot retrieve locked instance %s" % self.op.instance_name
5337

    
5338
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5339
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5340
      raise errors.OpPrereqError("Instance's disk layout is not"
5341
                                 " network mirrored, cannot failover.",
5342
                                 errors.ECODE_STATE)
5343

    
5344
    secondary_nodes = instance.secondary_nodes
5345
    if not secondary_nodes:
5346
      raise errors.ProgrammerError("no secondary node but using "
5347
                                   "a mirrored disk template")
5348

    
5349
    target_node = secondary_nodes[0]
5350
    _CheckNodeOnline(self, target_node)
5351
    _CheckNodeNotDrained(self, target_node)
5352
    if instance.admin_up:
5353
      # check memory requirements on the secondary node
5354
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5355
                           instance.name, bep[constants.BE_MEMORY],
5356
                           instance.hypervisor)
5357
    else:
5358
      self.LogInfo("Not checking memory on the secondary node as"
5359
                   " instance will not be started")
5360

    
5361
    # check bridge existance
5362
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5363

    
5364
  def Exec(self, feedback_fn):
5365
    """Failover an instance.
5366

5367
    The failover is done by shutting it down on its present node and
5368
    starting it on the secondary.
5369

5370
    """
5371
    instance = self.instance
5372

    
5373
    source_node = instance.primary_node
5374
    target_node = instance.secondary_nodes[0]
5375

    
5376
    if instance.admin_up:
5377
      feedback_fn("* checking disk consistency between source and target")
5378
      for dev in instance.disks:
5379
        # for drbd, these are drbd over lvm
5380
        if not _CheckDiskConsistency(self, dev, target_node, False):
5381
          if not self.op.ignore_consistency:
5382
            raise errors.OpExecError("Disk %s is degraded on target node,"
5383
                                     " aborting failover." % dev.iv_name)
5384
    else:
5385
      feedback_fn("* not checking disk consistency as instance is not running")
5386

    
5387
    feedback_fn("* shutting down instance on source node")
5388
    logging.info("Shutting down instance %s on node %s",
5389
                 instance.name, source_node)
5390

    
5391
    result = self.rpc.call_instance_shutdown(source_node, instance,
5392
                                             self.op.shutdown_timeout)
5393
    msg = result.fail_msg
5394
    if msg:
5395
      if self.op.ignore_consistency:
5396
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5397
                             " Proceeding anyway. Please make sure node"
5398
                             " %s is down. Error details: %s",
5399
                             instance.name, source_node, source_node, msg)
5400
      else:
5401
        raise errors.OpExecError("Could not shutdown instance %s on"
5402
                                 " node %s: %s" %
5403
                                 (instance.name, source_node, msg))
5404

    
5405
    feedback_fn("* deactivating the instance's disks on source node")
5406
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5407
      raise errors.OpExecError("Can't shut down the instance's disks.")
5408

    
5409
    instance.primary_node = target_node
5410
    # distribute new instance config to the other nodes
5411
    self.cfg.Update(instance, feedback_fn)
5412

    
5413
    # Only start the instance if it's marked as up
5414
    if instance.admin_up:
5415
      feedback_fn("* activating the instance's disks on target node")
5416
      logging.info("Starting instance %s on node %s",
5417
                   instance.name, target_node)
5418

    
5419
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5420
                                           ignore_secondaries=True)
5421
      if not disks_ok:
5422
        _ShutdownInstanceDisks(self, instance)
5423
        raise errors.OpExecError("Can't activate the instance's disks")
5424

    
5425
      feedback_fn("* starting the instance on the target node")
5426
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5427
      msg = result.fail_msg
5428
      if msg:
5429
        _ShutdownInstanceDisks(self, instance)
5430
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5431
                                 (instance.name, target_node, msg))
5432

    
5433

    
5434
class LUMigrateInstance(LogicalUnit):
5435
  """Migrate an instance.
5436

5437
  This is migration without shutting down, compared to the failover,
5438
  which is done with shutdown.
5439

5440
  """
5441
  HPATH = "instance-migrate"
5442
  HTYPE = constants.HTYPE_INSTANCE
5443
  _OP_PARAMS = [
5444
    _PInstanceName,
5445
    ("live", True, _TBool),
5446
    ("cleanup", False, _TBool),
5447
    ]
5448

    
5449
  REQ_BGL = False
5450

    
5451
  def ExpandNames(self):
5452
    self._ExpandAndLockInstance()
5453

    
5454
    self.needed_locks[locking.LEVEL_NODE] = []
5455
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5456

    
5457
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5458
                                       self.op.live, self.op.cleanup)
5459
    self.tasklets = [self._migrater]
5460

    
5461
  def DeclareLocks(self, level):
5462
    if level == locking.LEVEL_NODE:
5463
      self._LockInstancesNodes()
5464

    
5465
  def BuildHooksEnv(self):
5466
    """Build hooks env.
5467

5468
    This runs on master, primary and secondary nodes of the instance.
5469

5470
    """
5471
    instance = self._migrater.instance
5472
    source_node = instance.primary_node
5473
    target_node = instance.secondary_nodes[0]
5474
    env = _BuildInstanceHookEnvByObject(self, instance)
5475
    env["MIGRATE_LIVE"] = self.op.live
5476
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5477
    env.update({
5478
        "OLD_PRIMARY": source_node,
5479
        "OLD_SECONDARY": target_node,
5480
        "NEW_PRIMARY": target_node,
5481
        "NEW_SECONDARY": source_node,
5482
        })
5483
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5484
    nl_post = list(nl)
5485
    nl_post.append(source_node)
5486
    return env, nl, nl_post
5487

    
5488

    
5489
class LUMoveInstance(LogicalUnit):
5490
  """Move an instance by data-copying.
5491

5492
  """
5493
  HPATH = "instance-move"
5494
  HTYPE = constants.HTYPE_INSTANCE
5495
  _OP_PARAMS = [
5496
    _PInstanceName,
5497
    ("target_node", _NoDefault, _TNonEmptyString),
5498
    _PShutdownTimeout,
5499
    ]
5500
  REQ_BGL = False
5501

    
5502
  def ExpandNames(self):
5503
    self._ExpandAndLockInstance()
5504
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5505
    self.op.target_node = target_node
5506
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5507
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5508

    
5509
  def DeclareLocks(self, level):
5510
    if level == locking.LEVEL_NODE:
5511
      self._LockInstancesNodes(primary_only=True)
5512

    
5513
  def BuildHooksEnv(self):
5514
    """Build hooks env.
5515

5516
    This runs on master, primary and secondary nodes of the instance.
5517

5518
    """
5519
    env = {
5520
      "TARGET_NODE": self.op.target_node,
5521
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5522
      }
5523
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5524
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5525
                                       self.op.target_node]
5526
    return env, nl, nl
5527

    
5528
  def CheckPrereq(self):
5529
    """Check prerequisites.
5530

5531
    This checks that the instance is in the cluster.
5532

5533
    """
5534
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5535
    assert self.instance is not None, \
5536
      "Cannot retrieve locked instance %s" % self.op.instance_name
5537

    
5538
    node = self.cfg.GetNodeInfo(self.op.target_node)
5539
    assert node is not None, \
5540
      "Cannot retrieve locked node %s" % self.op.target_node
5541

    
5542
    self.target_node = target_node = node.name
5543

    
5544
    if target_node == instance.primary_node:
5545
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5546
                                 (instance.name, target_node),
5547
                                 errors.ECODE_STATE)
5548

    
5549
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5550

    
5551
    for idx, dsk in enumerate(instance.disks):
5552
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5553
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5554
                                   " cannot copy" % idx, errors.ECODE_STATE)
5555

    
5556
    _CheckNodeOnline(self, target_node)
5557
    _CheckNodeNotDrained(self, target_node)
5558

    
5559
    if instance.admin_up:
5560
      # check memory requirements on the secondary node
5561
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5562
                           instance.name, bep[constants.BE_MEMORY],
5563
                           instance.hypervisor)
5564
    else:
5565
      self.LogInfo("Not checking memory on the secondary node as"
5566
                   " instance will not be started")
5567

    
5568
    # check bridge existance
5569
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5570

    
5571
  def Exec(self, feedback_fn):
5572
    """Move an instance.
5573

5574
    The move is done by shutting it down on its present node, copying
5575
    the data over (slow) and starting it on the new node.
5576

5577
    """
5578
    instance = self.instance
5579

    
5580
    source_node = instance.primary_node
5581
    target_node = self.target_node
5582

    
5583
    self.LogInfo("Shutting down instance %s on source node %s",
5584
                 instance.name, source_node)
5585

    
5586
    result = self.rpc.call_instance_shutdown(source_node, instance,
5587
                                             self.op.shutdown_timeout)
5588
    msg = result.fail_msg
5589
    if msg:
5590
      if self.op.ignore_consistency:
5591
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5592
                             " Proceeding anyway. Please make sure node"
5593
                             " %s is down. Error details: %s",
5594
                             instance.name, source_node, source_node, msg)
5595
      else:
5596
        raise errors.OpExecError("Could not shutdown instance %s on"
5597
                                 " node %s: %s" %
5598
                                 (instance.name, source_node, msg))
5599

    
5600
    # create the target disks
5601
    try:
5602
      _CreateDisks(self, instance, target_node=target_node)
5603
    except errors.OpExecError:
5604
      self.LogWarning("Device creation failed, reverting...")
5605
      try:
5606
        _RemoveDisks(self, instance, target_node=target_node)
5607
      finally:
5608
        self.cfg.ReleaseDRBDMinors(instance.name)
5609
        raise
5610

    
5611
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5612

    
5613
    errs = []
5614
    # activate, get path, copy the data over
5615
    for idx, disk in enumerate(instance.disks):
5616
      self.LogInfo("Copying data for disk %d", idx)
5617
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5618
                                               instance.name, True)
5619
      if result.fail_msg:
5620
        self.LogWarning("Can't assemble newly created disk %d: %s",
5621
                        idx, result.fail_msg)
5622
        errs.append(result.fail_msg)
5623
        break
5624
      dev_path = result.payload
5625
      result = self.rpc.call_blockdev_export(source_node, disk,
5626
                                             target_node, dev_path,
5627
                                             cluster_name)
5628
      if result.fail_msg:
5629
        self.LogWarning("Can't copy data over for disk %d: %s",
5630
                        idx, result.fail_msg)
5631
        errs.append(result.fail_msg)
5632
        break
5633

    
5634
    if errs:
5635
      self.LogWarning("Some disks failed to copy, aborting")
5636
      try:
5637
        _RemoveDisks(self, instance, target_node=target_node)
5638
      finally:
5639
        self.cfg.ReleaseDRBDMinors(instance.name)
5640
        raise errors.OpExecError("Errors during disk copy: %s" %
5641
                                 (",".join(errs),))
5642

    
5643
    instance.primary_node = target_node
5644
    self.cfg.Update(instance, feedback_fn)
5645

    
5646
    self.LogInfo("Removing the disks on the original node")
5647
    _RemoveDisks(self, instance, target_node=source_node)
5648

    
5649
    # Only start the instance if it's marked as up
5650
    if instance.admin_up:
5651
      self.LogInfo("Starting instance %s on node %s",
5652
                   instance.name, target_node)
5653

    
5654
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5655
                                           ignore_secondaries=True)
5656
      if not disks_ok:
5657
        _ShutdownInstanceDisks(self, instance)
5658
        raise errors.OpExecError("Can't activate the instance's disks")
5659

    
5660
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5661
      msg = result.fail_msg
5662
      if msg:
5663
        _ShutdownInstanceDisks(self, instance)
5664
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5665
                                 (instance.name, target_node, msg))
5666

    
5667

    
5668
class LUMigrateNode(LogicalUnit):
5669
  """Migrate all instances from a node.
5670

5671
  """
5672
  HPATH = "node-migrate"
5673
  HTYPE = constants.HTYPE_NODE
5674
  _OP_PARAMS = [
5675
    _PNodeName,
5676
    ("live", False, _TBool),
5677
    ]
5678
  REQ_BGL = False
5679

    
5680
  def ExpandNames(self):
5681
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5682

    
5683
    self.needed_locks = {
5684
      locking.LEVEL_NODE: [self.op.node_name],
5685
      }
5686

    
5687
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5688

    
5689
    # Create tasklets for migrating instances for all instances on this node
5690
    names = []
5691
    tasklets = []
5692

    
5693
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5694
      logging.debug("Migrating instance %s", inst.name)
5695
      names.append(inst.name)
5696

    
5697
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5698

    
5699
    self.tasklets = tasklets
5700

    
5701
    # Declare instance locks
5702
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5703

    
5704
  def DeclareLocks(self, level):
5705
    if level == locking.LEVEL_NODE:
5706
      self._LockInstancesNodes()
5707

    
5708
  def BuildHooksEnv(self):
5709
    """Build hooks env.
5710

5711
    This runs on the master, the primary and all the secondaries.
5712

5713
    """
5714
    env = {
5715
      "NODE_NAME": self.op.node_name,
5716
      }
5717

    
5718
    nl = [self.cfg.GetMasterNode()]
5719

    
5720
    return (env, nl, nl)
5721

    
5722

    
5723
class TLMigrateInstance(Tasklet):
5724
  def __init__(self, lu, instance_name, live, cleanup):
5725
    """Initializes this class.
5726

5727
    """
5728
    Tasklet.__init__(self, lu)
5729

    
5730
    # Parameters
5731
    self.instance_name = instance_name
5732
    self.live = live
5733
    self.cleanup = cleanup
5734

    
5735
  def CheckPrereq(self):
5736
    """Check prerequisites.
5737

5738
    This checks that the instance is in the cluster.
5739

5740
    """
5741
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5742
    instance = self.cfg.GetInstanceInfo(instance_name)
5743
    assert instance is not None
5744

    
5745
    if instance.disk_template != constants.DT_DRBD8:
5746
      raise errors.OpPrereqError("Instance's disk layout is not"
5747
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5748

    
5749
    secondary_nodes = instance.secondary_nodes
5750
    if not secondary_nodes:
5751
      raise errors.ConfigurationError("No secondary node but using"
5752
                                      " drbd8 disk template")
5753

    
5754
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5755

    
5756
    target_node = secondary_nodes[0]
5757
    # check memory requirements on the secondary node
5758
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5759
                         instance.name, i_be[constants.BE_MEMORY],
5760
                         instance.hypervisor)
5761

    
5762
    # check bridge existance
5763
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5764

    
5765
    if not self.cleanup:
5766
      _CheckNodeNotDrained(self.lu, target_node)
5767
      result = self.rpc.call_instance_migratable(instance.primary_node,
5768
                                                 instance)
5769
      result.Raise("Can't migrate, please use failover",
5770
                   prereq=True, ecode=errors.ECODE_STATE)
5771

    
5772
    self.instance = instance
5773

    
5774
  def _WaitUntilSync(self):
5775
    """Poll with custom rpc for disk sync.
5776

5777
    This uses our own step-based rpc call.
5778

5779
    """
5780
    self.feedback_fn("* wait until resync is done")
5781
    all_done = False
5782
    while not all_done:
5783
      all_done = True
5784
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5785
                                            self.nodes_ip,
5786
                                            self.instance.disks)
5787
      min_percent = 100
5788
      for node, nres in result.items():
5789
        nres.Raise("Cannot resync disks on node %s" % node)
5790
        node_done, node_percent = nres.payload
5791
        all_done = all_done and node_done
5792
        if node_percent is not None:
5793
          min_percent = min(min_percent, node_percent)
5794
      if not all_done:
5795
        if min_percent < 100:
5796
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5797
        time.sleep(2)
5798

    
5799
  def _EnsureSecondary(self, node):
5800
    """Demote a node to secondary.
5801

5802
    """
5803
    self.feedback_fn("* switching node %s to secondary mode" % node)
5804

    
5805
    for dev in self.instance.disks:
5806
      self.cfg.SetDiskID(dev, node)
5807

    
5808
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5809
                                          self.instance.disks)
5810
    result.Raise("Cannot change disk to secondary on node %s" % node)
5811

    
5812
  def _GoStandalone(self):
5813
    """Disconnect from the network.
5814

5815
    """
5816
    self.feedback_fn("* changing into standalone mode")
5817
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5818
                                               self.instance.disks)
5819
    for node, nres in result.items():
5820
      nres.Raise("Cannot disconnect disks node %s" % node)
5821

    
5822
  def _GoReconnect(self, multimaster):
5823
    """Reconnect to the network.
5824

5825
    """
5826
    if multimaster:
5827
      msg = "dual-master"
5828
    else:
5829
      msg = "single-master"
5830
    self.feedback_fn("* changing disks into %s mode" % msg)
5831
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5832
                                           self.instance.disks,
5833
                                           self.instance.name, multimaster)
5834
    for node, nres in result.items():
5835
      nres.Raise("Cannot change disks config on node %s" % node)
5836

    
5837
  def _ExecCleanup(self):
5838
    """Try to cleanup after a failed migration.
5839

5840
    The cleanup is done by:
5841
      - check that the instance is running only on one node
5842
        (and update the config if needed)
5843
      - change disks on its secondary node to secondary
5844
      - wait until disks are fully synchronized
5845
      - disconnect from the network
5846
      - change disks into single-master mode
5847
      - wait again until disks are fully synchronized
5848

5849
    """
5850
    instance = self.instance
5851
    target_node = self.target_node
5852
    source_node = self.source_node
5853

    
5854
    # check running on only one node
5855
    self.feedback_fn("* checking where the instance actually runs"
5856
                     " (if this hangs, the hypervisor might be in"
5857
                     " a bad state)")
5858
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5859
    for node, result in ins_l.items():
5860
      result.Raise("Can't contact node %s" % node)
5861

    
5862
    runningon_source = instance.name in ins_l[source_node].payload
5863
    runningon_target = instance.name in ins_l[target_node].payload
5864

    
5865
    if runningon_source and runningon_target:
5866
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5867
                               " or the hypervisor is confused. You will have"
5868
                               " to ensure manually that it runs only on one"
5869
                               " and restart this operation.")
5870

    
5871
    if not (runningon_source or runningon_target):
5872
      raise errors.OpExecError("Instance does not seem to be running at all."
5873
                               " In this case, it's safer to repair by"
5874
                               " running 'gnt-instance stop' to ensure disk"
5875
                               " shutdown, and then restarting it.")
5876

    
5877
    if runningon_target:
5878
      # the migration has actually succeeded, we need to update the config
5879
      self.feedback_fn("* instance running on secondary node (%s),"
5880
                       " updating config" % target_node)
5881
      instance.primary_node = target_node
5882
      self.cfg.Update(instance, self.feedback_fn)
5883
      demoted_node = source_node
5884
    else:
5885
      self.feedback_fn("* instance confirmed to be running on its"
5886
                       " primary node (%s)" % source_node)
5887
      demoted_node = target_node
5888

    
5889
    self._EnsureSecondary(demoted_node)
5890
    try:
5891
      self._WaitUntilSync()
5892
    except errors.OpExecError:
5893
      # we ignore here errors, since if the device is standalone, it
5894
      # won't be able to sync
5895
      pass
5896
    self._GoStandalone()
5897
    self._GoReconnect(False)
5898
    self._WaitUntilSync()
5899

    
5900
    self.feedback_fn("* done")
5901

    
5902
  def _RevertDiskStatus(self):
5903
    """Try to revert the disk status after a failed migration.
5904

5905
    """
5906
    target_node = self.target_node
5907
    try:
5908
      self._EnsureSecondary(target_node)
5909
      self._GoStandalone()
5910
      self._GoReconnect(False)
5911
      self._WaitUntilSync()
5912
    except errors.OpExecError, err:
5913
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5914
                         " drives: error '%s'\n"
5915
                         "Please look and recover the instance status" %
5916
                         str(err))
5917

    
5918
  def _AbortMigration(self):
5919
    """Call the hypervisor code to abort a started migration.
5920

5921
    """
5922
    instance = self.instance
5923
    target_node = self.target_node
5924
    migration_info = self.migration_info
5925

    
5926
    abort_result = self.rpc.call_finalize_migration(target_node,
5927
                                                    instance,
5928
                                                    migration_info,
5929
                                                    False)
5930
    abort_msg = abort_result.fail_msg
5931
    if abort_msg:
5932
      logging.error("Aborting migration failed on target node %s: %s",
5933
                    target_node, abort_msg)
5934
      # Don't raise an exception here, as we stil have to try to revert the
5935
      # disk status, even if this step failed.
5936

    
5937
  def _ExecMigration(self):
5938
    """Migrate an instance.
5939

5940
    The migrate is done by:
5941
      - change the disks into dual-master mode
5942
      - wait until disks are fully synchronized again
5943
      - migrate the instance
5944
      - change disks on the new secondary node (the old primary) to secondary
5945
      - wait until disks are fully synchronized
5946
      - change disks into single-master mode
5947

5948
    """
5949
    instance = self.instance
5950
    target_node = self.target_node
5951
    source_node = self.source_node
5952

    
5953
    self.feedback_fn("* checking disk consistency between source and target")
5954
    for dev in instance.disks:
5955
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5956
        raise errors.OpExecError("Disk %s is degraded or not fully"
5957
                                 " synchronized on target node,"
5958
                                 " aborting migrate." % dev.iv_name)
5959

    
5960
    # First get the migration information from the remote node
5961
    result = self.rpc.call_migration_info(source_node, instance)
5962
    msg = result.fail_msg
5963
    if msg:
5964
      log_err = ("Failed fetching source migration information from %s: %s" %
5965
                 (source_node, msg))
5966
      logging.error(log_err)
5967
      raise errors.OpExecError(log_err)
5968

    
5969
    self.migration_info = migration_info = result.payload
5970

    
5971
    # Then switch the disks to master/master mode
5972
    self._EnsureSecondary(target_node)
5973
    self._GoStandalone()
5974
    self._GoReconnect(True)
5975
    self._WaitUntilSync()
5976

    
5977
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5978
    result = self.rpc.call_accept_instance(target_node,
5979
                                           instance,
5980
                                           migration_info,
5981
                                           self.nodes_ip[target_node])
5982

    
5983
    msg = result.fail_msg
5984
    if msg:
5985
      logging.error("Instance pre-migration failed, trying to revert"
5986
                    " disk status: %s", msg)
5987
      self.feedback_fn("Pre-migration failed, aborting")
5988
      self._AbortMigration()
5989
      self._RevertDiskStatus()
5990
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5991
                               (instance.name, msg))
5992

    
5993
    self.feedback_fn("* migrating instance to %s" % target_node)
5994
    time.sleep(10)
5995
    result = self.rpc.call_instance_migrate(source_node, instance,
5996
                                            self.nodes_ip[target_node],
5997
                                            self.live)
5998
    msg = result.fail_msg
5999
    if msg:
6000
      logging.error("Instance migration failed, trying to revert"
6001
                    " disk status: %s", msg)
6002
      self.feedback_fn("Migration failed, aborting")
6003
      self._AbortMigration()
6004
      self._RevertDiskStatus()
6005
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6006
                               (instance.name, msg))
6007
    time.sleep(10)
6008

    
6009
    instance.primary_node = target_node
6010
    # distribute new instance config to the other nodes
6011
    self.cfg.Update(instance, self.feedback_fn)
6012

    
6013
    result = self.rpc.call_finalize_migration(target_node,
6014
                                              instance,
6015
                                              migration_info,
6016
                                              True)
6017
    msg = result.fail_msg
6018
    if msg:
6019
      logging.error("Instance migration succeeded, but finalization failed:"
6020
                    " %s", msg)
6021
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6022
                               msg)
6023

    
6024
    self._EnsureSecondary(source_node)
6025
    self._WaitUntilSync()
6026
    self._GoStandalone()
6027
    self._GoReconnect(False)
6028
    self._WaitUntilSync()
6029

    
6030
    self.feedback_fn("* done")
6031

    
6032
  def Exec(self, feedback_fn):
6033
    """Perform the migration.
6034

6035
    """
6036
    feedback_fn("Migrating instance %s" % self.instance.name)
6037

    
6038
    self.feedback_fn = feedback_fn
6039

    
6040
    self.source_node = self.instance.primary_node
6041
    self.target_node = self.instance.secondary_nodes[0]
6042
    self.all_nodes = [self.source_node, self.target_node]
6043
    self.nodes_ip = {
6044
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6045
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6046
      }
6047

    
6048
    if self.cleanup:
6049
      return self._ExecCleanup()
6050
    else:
6051
      return self._ExecMigration()
6052

    
6053

    
6054
def _CreateBlockDev(lu, node, instance, device, force_create,
6055
                    info, force_open):
6056
  """Create a tree of block devices on a given node.
6057

6058
  If this device type has to be created on secondaries, create it and
6059
  all its children.
6060

6061
  If not, just recurse to children keeping the same 'force' value.
6062

6063
  @param lu: the lu on whose behalf we execute
6064
  @param node: the node on which to create the device
6065
  @type instance: L{objects.Instance}
6066
  @param instance: the instance which owns the device
6067
  @type device: L{objects.Disk}
6068
  @param device: the device to create
6069
  @type force_create: boolean
6070
  @param force_create: whether to force creation of this device; this
6071
      will be change to True whenever we find a device which has
6072
      CreateOnSecondary() attribute
6073
  @param info: the extra 'metadata' we should attach to the device
6074
      (this will be represented as a LVM tag)
6075
  @type force_open: boolean
6076
  @param force_open: this parameter will be passes to the
6077
      L{backend.BlockdevCreate} function where it specifies
6078
      whether we run on primary or not, and it affects both
6079
      the child assembly and the device own Open() execution
6080

6081
  """
6082
  if device.CreateOnSecondary():
6083
    force_create = True
6084

    
6085
  if device.children:
6086
    for child in device.children:
6087
      _CreateBlockDev(lu, node, instance, child, force_create,
6088
                      info, force_open)
6089

    
6090
  if not force_create:
6091
    return
6092

    
6093
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6094

    
6095

    
6096
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6097
  """Create a single block device on a given node.
6098

6099
  This will not recurse over children of the device, so they must be
6100
  created in advance.
6101

6102
  @param lu: the lu on whose behalf we execute
6103
  @param node: the node on which to create the device
6104
  @type instance: L{objects.Instance}
6105
  @param instance: the instance which owns the device
6106
  @type device: L{objects.Disk}
6107
  @param device: the device to create
6108
  @param info: the extra 'metadata' we should attach to the device
6109
      (this will be represented as a LVM tag)
6110
  @type force_open: boolean
6111
  @param force_open: this parameter will be passes to the
6112
      L{backend.BlockdevCreate} function where it specifies
6113
      whether we run on primary or not, and it affects both
6114
      the child assembly and the device own Open() execution
6115

6116
  """
6117
  lu.cfg.SetDiskID(device, node)
6118
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6119
                                       instance.name, force_open, info)
6120
  result.Raise("Can't create block device %s on"
6121
               " node %s for instance %s" % (device, node, instance.name))
6122
  if device.physical_id is None:
6123
    device.physical_id = result.payload
6124

    
6125

    
6126
def _GenerateUniqueNames(lu, exts):
6127
  """Generate a suitable LV name.
6128

6129
  This will generate a logical volume name for the given instance.
6130

6131
  """
6132
  results = []
6133
  for val in exts:
6134
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6135
    results.append("%s%s" % (new_id, val))
6136
  return results
6137

    
6138

    
6139
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6140
                         p_minor, s_minor):
6141
  """Generate a drbd8 device complete with its children.
6142

6143
  """
6144
  port = lu.cfg.AllocatePort()
6145
  vgname = lu.cfg.GetVGName()
6146
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6147
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6148
                          logical_id=(vgname, names[0]))
6149
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6150
                          logical_id=(vgname, names[1]))
6151
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6152
                          logical_id=(primary, secondary, port,
6153
                                      p_minor, s_minor,
6154
                                      shared_secret),
6155
                          children=[dev_data, dev_meta],
6156
                          iv_name=iv_name)
6157
  return drbd_dev
6158

    
6159

    
6160
def _GenerateDiskTemplate(lu, template_name,
6161
                          instance_name, primary_node,
6162
                          secondary_nodes, disk_info,
6163
                          file_storage_dir, file_driver,
6164
                          base_index):
6165
  """Generate the entire disk layout for a given template type.
6166

6167
  """
6168
  #TODO: compute space requirements
6169

    
6170
  vgname = lu.cfg.GetVGName()
6171
  disk_count = len(disk_info)
6172
  disks = []
6173
  if template_name == constants.DT_DISKLESS:
6174
    pass
6175
  elif template_name == constants.DT_PLAIN:
6176
    if len(secondary_nodes) != 0:
6177
      raise errors.ProgrammerError("Wrong template configuration")
6178

    
6179
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6180
                                      for i in range(disk_count)])
6181
    for idx, disk in enumerate(disk_info):
6182
      disk_index = idx + base_index
6183
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6184
                              logical_id=(vgname, names[idx]),
6185
                              iv_name="disk/%d" % disk_index,
6186
                              mode=disk["mode"])
6187
      disks.append(disk_dev)
6188
  elif template_name == constants.DT_DRBD8:
6189
    if len(secondary_nodes) != 1:
6190
      raise errors.ProgrammerError("Wrong template configuration")
6191
    remote_node = secondary_nodes[0]
6192
    minors = lu.cfg.AllocateDRBDMinor(
6193
      [primary_node, remote_node] * len(disk_info), instance_name)
6194

    
6195
    names = []
6196
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6197
                                               for i in range(disk_count)]):
6198
      names.append(lv_prefix + "_data")
6199
      names.append(lv_prefix + "_meta")
6200
    for idx, disk in enumerate(disk_info):
6201
      disk_index = idx + base_index
6202
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6203
                                      disk["size"], names[idx*2:idx*2+2],
6204
                                      "disk/%d" % disk_index,
6205
                                      minors[idx*2], minors[idx*2+1])
6206
      disk_dev.mode = disk["mode"]
6207
      disks.append(disk_dev)
6208
  elif template_name == constants.DT_FILE:
6209
    if len(secondary_nodes) != 0:
6210
      raise errors.ProgrammerError("Wrong template configuration")
6211

    
6212
    _RequireFileStorage()
6213

    
6214
    for idx, disk in enumerate(disk_info):
6215
      disk_index = idx + base_index
6216
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6217
                              iv_name="disk/%d" % disk_index,
6218
                              logical_id=(file_driver,
6219
                                          "%s/disk%d" % (file_storage_dir,
6220
                                                         disk_index)),
6221
                              mode=disk["mode"])
6222
      disks.append(disk_dev)
6223
  else:
6224
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6225
  return disks
6226

    
6227

    
6228
def _GetInstanceInfoText(instance):
6229
  """Compute that text that should be added to the disk's metadata.
6230

6231
  """
6232
  return "originstname+%s" % instance.name
6233

    
6234

    
6235
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6236
  """Create all disks for an instance.
6237

6238
  This abstracts away some work from AddInstance.
6239

6240
  @type lu: L{LogicalUnit}
6241
  @param lu: the logical unit on whose behalf we execute
6242
  @type instance: L{objects.Instance}
6243
  @param instance: the instance whose disks we should create
6244
  @type to_skip: list
6245
  @param to_skip: list of indices to skip
6246
  @type target_node: string
6247
  @param target_node: if passed, overrides the target node for creation
6248
  @rtype: boolean
6249
  @return: the success of the creation
6250

6251
  """
6252
  info = _GetInstanceInfoText(instance)
6253
  if target_node is None:
6254
    pnode = instance.primary_node
6255
    all_nodes = instance.all_nodes
6256
  else:
6257
    pnode = target_node
6258
    all_nodes = [pnode]
6259

    
6260
  if instance.disk_template == constants.DT_FILE:
6261
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6262
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6263

    
6264
    result.Raise("Failed to create directory '%s' on"
6265
                 " node %s" % (file_storage_dir, pnode))
6266

    
6267
  # Note: this needs to be kept in sync with adding of disks in
6268
  # LUSetInstanceParams
6269
  for idx, device in enumerate(instance.disks):
6270
    if to_skip and idx in to_skip:
6271
      continue
6272
    logging.info("Creating volume %s for instance %s",
6273
                 device.iv_name, instance.name)
6274
    #HARDCODE
6275
    for node in all_nodes:
6276
      f_create = node == pnode
6277
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6278

    
6279

    
6280
def _RemoveDisks(lu, instance, target_node=None):
6281
  """Remove all disks for an instance.
6282

6283
  This abstracts away some work from `AddInstance()` and
6284
  `RemoveInstance()`. Note that in case some of the devices couldn't
6285
  be removed, the removal will continue with the other ones (compare
6286
  with `_CreateDisks()`).
6287

6288
  @type lu: L{LogicalUnit}
6289
  @param lu: the logical unit on whose behalf we execute
6290
  @type instance: L{objects.Instance}
6291
  @param instance: the instance whose disks we should remove
6292
  @type target_node: string
6293
  @param target_node: used to override the node on which to remove the disks
6294
  @rtype: boolean
6295
  @return: the success of the removal
6296

6297
  """
6298
  logging.info("Removing block devices for instance %s", instance.name)
6299

    
6300
  all_result = True
6301
  for device in instance.disks:
6302
    if target_node:
6303
      edata = [(target_node, device)]
6304
    else:
6305
      edata = device.ComputeNodeTree(instance.primary_node)
6306
    for node, disk in edata:
6307
      lu.cfg.SetDiskID(disk, node)
6308
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6309
      if msg:
6310
        lu.LogWarning("Could not remove block device %s on node %s,"
6311
                      " continuing anyway: %s", device.iv_name, node, msg)
6312
        all_result = False
6313

    
6314
  if instance.disk_template == constants.DT_FILE:
6315
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6316
    if target_node:
6317
      tgt = target_node
6318
    else:
6319
      tgt = instance.primary_node
6320
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6321
    if result.fail_msg:
6322
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6323
                    file_storage_dir, instance.primary_node, result.fail_msg)
6324
      all_result = False
6325

    
6326
  return all_result
6327

    
6328

    
6329
def _ComputeDiskSize(disk_template, disks):
6330
  """Compute disk size requirements in the volume group
6331

6332
  """
6333
  # Required free disk space as a function of disk and swap space
6334
  req_size_dict = {
6335
    constants.DT_DISKLESS: None,
6336
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6337
    # 128 MB are added for drbd metadata for each disk
6338
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6339
    constants.DT_FILE: None,
6340
  }
6341

    
6342
  if disk_template not in req_size_dict:
6343
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6344
                                 " is unknown" %  disk_template)
6345

    
6346
  return req_size_dict[disk_template]
6347

    
6348

    
6349
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6350
  """Hypervisor parameter validation.
6351

6352
  This function abstract the hypervisor parameter validation to be
6353
  used in both instance create and instance modify.
6354

6355
  @type lu: L{LogicalUnit}
6356
  @param lu: the logical unit for which we check
6357
  @type nodenames: list
6358
  @param nodenames: the list of nodes on which we should check
6359
  @type hvname: string
6360
  @param hvname: the name of the hypervisor we should use
6361
  @type hvparams: dict
6362
  @param hvparams: the parameters which we need to check
6363
  @raise errors.OpPrereqError: if the parameters are not valid
6364

6365
  """
6366
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6367
                                                  hvname,
6368
                                                  hvparams)
6369
  for node in nodenames:
6370
    info = hvinfo[node]
6371
    if info.offline:
6372
      continue
6373
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6374

    
6375

    
6376
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6377
  """OS parameters validation.
6378

6379
  @type lu: L{LogicalUnit}
6380
  @param lu: the logical unit for which we check
6381
  @type required: boolean
6382
  @param required: whether the validation should fail if the OS is not
6383
      found
6384
  @type nodenames: list
6385
  @param nodenames: the list of nodes on which we should check
6386
  @type osname: string
6387
  @param osname: the name of the hypervisor we should use
6388
  @type osparams: dict
6389
  @param osparams: the parameters which we need to check
6390
  @raise errors.OpPrereqError: if the parameters are not valid
6391

6392
  """
6393
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6394
                                   [constants.OS_VALIDATE_PARAMETERS],
6395
                                   osparams)
6396
  for node, nres in result.items():
6397
    # we don't check for offline cases since this should be run only
6398
    # against the master node and/or an instance's nodes
6399
    nres.Raise("OS Parameters validation failed on node %s" % node)
6400
    if not nres.payload:
6401
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6402
                 osname, node)
6403

    
6404

    
6405
class LUCreateInstance(LogicalUnit):
6406
  """Create an instance.
6407

6408
  """
6409
  HPATH = "instance-add"
6410
  HTYPE = constants.HTYPE_INSTANCE
6411
  _OP_PARAMS = [
6412
    _PInstanceName,
6413
    ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6414
    ("start", True, _TBool),
6415
    ("wait_for_sync", True, _TBool),
6416
    ("ip_check", True, _TBool),
6417
    ("name_check", True, _TBool),
6418
    ("disks", _NoDefault, _TListOf(_TDict)),
6419
    ("nics", _NoDefault, _TListOf(_TDict)),
6420
    ("hvparams", _EmptyDict, _TDict),
6421
    ("beparams", _EmptyDict, _TDict),
6422
    ("osparams", _EmptyDict, _TDict),
6423
    ("no_install", None, _TMaybeBool),
6424
    ("os_type", None, _TMaybeString),
6425
    ("force_variant", False, _TBool),
6426
    ("source_handshake", None, _TOr(_TList, _TNone)),
6427
    ("source_x509_ca", None, _TOr(_TList, _TNone)),
6428
    ("source_instance_name", None, _TMaybeString),
6429
    ("src_node", None, _TMaybeString),
6430
    ("src_path", None, _TMaybeString),
6431
    ("pnode", None, _TMaybeString),
6432
    ("snode", None, _TMaybeString),
6433
    ("iallocator", None, _TMaybeString),
6434
    ("hypervisor", None, _TMaybeString),
6435
    ("disk_template", _NoDefault, _CheckDiskTemplate),
6436
    ("identify_defaults", False, _TBool),
6437
    ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6438
    ("file_storage_dir", None, _TMaybeString),
6439
    ("dry_run", False, _TBool),
6440
    ]
6441
  REQ_BGL = False
6442

    
6443
  def CheckArguments(self):
6444
    """Check arguments.
6445

6446
    """
6447
    # do not require name_check to ease forward/backward compatibility
6448
    # for tools
6449
    if self.op.no_install and self.op.start:
6450
      self.LogInfo("No-installation mode selected, disabling startup")
6451
      self.op.start = False
6452
    # validate/normalize the instance name
6453
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6454
    if self.op.ip_check and not self.op.name_check:
6455
      # TODO: make the ip check more flexible and not depend on the name check
6456
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6457
                                 errors.ECODE_INVAL)
6458

    
6459
    # check nics' parameter names
6460
    for nic in self.op.nics:
6461
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6462

    
6463
    # check disks. parameter names and consistent adopt/no-adopt strategy
6464
    has_adopt = has_no_adopt = False
6465
    for disk in self.op.disks:
6466
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6467
      if "adopt" in disk:
6468
        has_adopt = True
6469
      else:
6470
        has_no_adopt = True
6471
    if has_adopt and has_no_adopt:
6472
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6473
                                 errors.ECODE_INVAL)
6474
    if has_adopt:
6475
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6476
        raise errors.OpPrereqError("Disk adoption is not supported for the"
6477
                                   " '%s' disk template" %
6478
                                   self.op.disk_template,
6479
                                   errors.ECODE_INVAL)
6480
      if self.op.iallocator is not None:
6481
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6482
                                   " iallocator script", errors.ECODE_INVAL)
6483
      if self.op.mode == constants.INSTANCE_IMPORT:
6484
        raise errors.OpPrereqError("Disk adoption not allowed for"
6485
                                   " instance import", errors.ECODE_INVAL)
6486

    
6487
    self.adopt_disks = has_adopt
6488

    
6489
    # instance name verification
6490
    if self.op.name_check:
6491
      self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6492
      self.op.instance_name = self.hostname1.name
6493
      # used in CheckPrereq for ip ping check
6494
      self.check_ip = self.hostname1.ip
6495
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6496
      raise errors.OpPrereqError("Remote imports require names to be checked" %
6497
                                 errors.ECODE_INVAL)
6498
    else:
6499
      self.check_ip = None
6500

    
6501
    # file storage checks
6502
    if (self.op.file_driver and
6503
        not self.op.file_driver in constants.FILE_DRIVER):
6504
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6505
                                 self.op.file_driver, errors.ECODE_INVAL)
6506

    
6507
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6508
      raise errors.OpPrereqError("File storage directory path not absolute",
6509
                                 errors.ECODE_INVAL)
6510

    
6511
    ### Node/iallocator related checks
6512
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6513
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6514
                                 " node must be given",
6515
                                 errors.ECODE_INVAL)
6516

    
6517
    self._cds = _GetClusterDomainSecret()
6518

    
6519
    if self.op.mode == constants.INSTANCE_IMPORT:
6520
      # On import force_variant must be True, because if we forced it at
6521
      # initial install, our only chance when importing it back is that it
6522
      # works again!
6523
      self.op.force_variant = True
6524

    
6525
      if self.op.no_install:
6526
        self.LogInfo("No-installation mode has no effect during import")
6527

    
6528
    elif self.op.mode == constants.INSTANCE_CREATE:
6529
      if self.op.os_type is None:
6530
        raise errors.OpPrereqError("No guest OS specified",
6531
                                   errors.ECODE_INVAL)
6532
      if self.op.disk_template is None:
6533
        raise errors.OpPrereqError("No disk template specified",
6534
                                   errors.ECODE_INVAL)
6535

    
6536
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6537
      # Check handshake to ensure both clusters have the same domain secret
6538
      src_handshake = self.op.source_handshake
6539
      if not src_handshake:
6540
        raise errors.OpPrereqError("Missing source handshake",
6541
                                   errors.ECODE_INVAL)
6542

    
6543
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6544
                                                           src_handshake)
6545
      if errmsg:
6546
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6547
                                   errors.ECODE_INVAL)
6548

    
6549
      # Load and check source CA
6550
      self.source_x509_ca_pem = self.op.source_x509_ca
6551
      if not self.source_x509_ca_pem:
6552
        raise errors.OpPrereqError("Missing source X509 CA",
6553
                                   errors.ECODE_INVAL)
6554

    
6555
      try:
6556
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6557
                                                    self._cds)
6558
      except OpenSSL.crypto.Error, err:
6559
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6560
                                   (err, ), errors.ECODE_INVAL)
6561

    
6562
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6563
      if errcode is not None:
6564
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6565
                                   errors.ECODE_INVAL)
6566

    
6567
      self.source_x509_ca = cert
6568

    
6569
      src_instance_name = self.op.source_instance_name
6570
      if not src_instance_name:
6571
        raise errors.OpPrereqError("Missing source instance name",
6572
                                   errors.ECODE_INVAL)
6573

    
6574
      self.source_instance_name = \
6575
        utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6576

    
6577
    else:
6578
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6579
                                 self.op.mode, errors.ECODE_INVAL)
6580

    
6581
  def ExpandNames(self):
6582
    """ExpandNames for CreateInstance.
6583

6584
    Figure out the right locks for instance creation.
6585

6586
    """
6587
    self.needed_locks = {}
6588

    
6589
    instance_name = self.op.instance_name
6590
    # this is just a preventive check, but someone might still add this
6591
    # instance in the meantime, and creation will fail at lock-add time
6592
    if instance_name in self.cfg.GetInstanceList():
6593
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6594
                                 instance_name, errors.ECODE_EXISTS)
6595

    
6596
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6597

    
6598
    if self.op.iallocator:
6599
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6600
    else:
6601
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6602
      nodelist = [self.op.pnode]
6603
      if self.op.snode is not None:
6604
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6605
        nodelist.append(self.op.snode)
6606
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6607

    
6608
    # in case of import lock the source node too
6609
    if self.op.mode == constants.INSTANCE_IMPORT:
6610
      src_node = self.op.src_node
6611
      src_path = self.op.src_path
6612

    
6613
      if src_path is None:
6614
        self.op.src_path = src_path = self.op.instance_name
6615

    
6616
      if src_node is None:
6617
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6618
        self.op.src_node = None
6619
        if os.path.isabs(src_path):
6620
          raise errors.OpPrereqError("Importing an instance from an absolute"
6621
                                     " path requires a source node option.",
6622
                                     errors.ECODE_INVAL)
6623
      else:
6624
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6625
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6626
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6627
        if not os.path.isabs(src_path):
6628
          self.op.src_path = src_path = \
6629
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6630

    
6631
  def _RunAllocator(self):
6632
    """Run the allocator based on input opcode.
6633

6634
    """
6635
    nics = [n.ToDict() for n in self.nics]
6636
    ial = IAllocator(self.cfg, self.rpc,
6637
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6638
                     name=self.op.instance_name,
6639
                     disk_template=self.op.disk_template,
6640
                     tags=[],
6641
                     os=self.op.os_type,
6642
                     vcpus=self.be_full[constants.BE_VCPUS],
6643
                     mem_size=self.be_full[constants.BE_MEMORY],
6644
                     disks=self.disks,
6645
                     nics=nics,
6646
                     hypervisor=self.op.hypervisor,
6647
                     )
6648

    
6649
    ial.Run(self.op.iallocator)
6650

    
6651
    if not ial.success:
6652
      raise errors.OpPrereqError("Can't compute nodes using"
6653
                                 " iallocator '%s': %s" %
6654
                                 (self.op.iallocator, ial.info),
6655
                                 errors.ECODE_NORES)
6656
    if len(ial.result) != ial.required_nodes:
6657
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6658
                                 " of nodes (%s), required %s" %
6659
                                 (self.op.iallocator, len(ial.result),
6660
                                  ial.required_nodes), errors.ECODE_FAULT)
6661
    self.op.pnode = ial.result[0]
6662
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6663
                 self.op.instance_name, self.op.iallocator,
6664
                 utils.CommaJoin(ial.result))
6665
    if ial.required_nodes == 2:
6666
      self.op.snode = ial.result[1]
6667

    
6668
  def BuildHooksEnv(self):
6669
    """Build hooks env.
6670

6671
    This runs on master, primary and secondary nodes of the instance.
6672

6673
    """
6674
    env = {
6675
      "ADD_MODE": self.op.mode,
6676
      }
6677
    if self.op.mode == constants.INSTANCE_IMPORT:
6678
      env["SRC_NODE"] = self.op.src_node
6679
      env["SRC_PATH"] = self.op.src_path
6680
      env["SRC_IMAGES"] = self.src_images
6681

    
6682
    env.update(_BuildInstanceHookEnv(
6683
      name=self.op.instance_name,
6684
      primary_node=self.op.pnode,
6685
      secondary_nodes=self.secondaries,
6686
      status=self.op.start,
6687
      os_type=self.op.os_type,
6688
      memory=self.be_full[constants.BE_MEMORY],
6689
      vcpus=self.be_full[constants.BE_VCPUS],
6690
      nics=_NICListToTuple(self, self.nics),
6691
      disk_template=self.op.disk_template,
6692
      disks=[(d["size"], d["mode"]) for d in self.disks],
6693
      bep=self.be_full,
6694
      hvp=self.hv_full,
6695
      hypervisor_name=self.op.hypervisor,
6696
    ))
6697

    
6698
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6699
          self.secondaries)
6700
    return env, nl, nl
6701

    
6702
  def _ReadExportInfo(self):
6703
    """Reads the export information from disk.
6704

6705
    It will override the opcode source node and path with the actual
6706
    information, if these two were not specified before.
6707

6708
    @return: the export information
6709

6710
    """
6711
    assert self.op.mode == constants.INSTANCE_IMPORT
6712

    
6713
    src_node = self.op.src_node
6714
    src_path = self.op.src_path
6715

    
6716
    if src_node is None:
6717
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6718
      exp_list = self.rpc.call_export_list(locked_nodes)
6719
      found = False
6720
      for node in exp_list:
6721
        if exp_list[node].fail_msg:
6722
          continue
6723
        if src_path in exp_list[node].payload:
6724
          found = True
6725
          self.op.src_node = src_node = node
6726
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6727
                                                       src_path)
6728
          break
6729
      if not found:
6730
        raise errors.OpPrereqError("No export found for relative path %s" %
6731
                                    src_path, errors.ECODE_INVAL)
6732

    
6733
    _CheckNodeOnline(self, src_node)
6734
    result = self.rpc.call_export_info(src_node, src_path)
6735
    result.Raise("No export or invalid export found in dir %s" % src_path)
6736

    
6737
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6738
    if not export_info.has_section(constants.INISECT_EXP):
6739
      raise errors.ProgrammerError("Corrupted export config",
6740
                                   errors.ECODE_ENVIRON)
6741

    
6742
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6743
    if (int(ei_version) != constants.EXPORT_VERSION):
6744
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6745
                                 (ei_version, constants.EXPORT_VERSION),
6746
                                 errors.ECODE_ENVIRON)
6747
    return export_info
6748

    
6749
  def _ReadExportParams(self, einfo):
6750
    """Use export parameters as defaults.
6751

6752
    In case the opcode doesn't specify (as in override) some instance
6753
    parameters, then try to use them from the export information, if
6754
    that declares them.
6755

6756
    """
6757
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6758

    
6759
    if self.op.disk_template is None:
6760
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6761
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6762
                                          "disk_template")
6763
      else:
6764
        raise errors.OpPrereqError("No disk template specified and the export"
6765
                                   " is missing the disk_template information",
6766
                                   errors.ECODE_INVAL)
6767

    
6768
    if not self.op.disks:
6769
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6770
        disks = []
6771
        # TODO: import the disk iv_name too
6772
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6773
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6774
          disks.append({"size": disk_sz})
6775
        self.op.disks = disks
6776
      else:
6777
        raise errors.OpPrereqError("No disk info specified and the export"
6778
                                   " is missing the disk information",
6779
                                   errors.ECODE_INVAL)
6780

    
6781
    if (not self.op.nics and
6782
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6783
      nics = []
6784
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6785
        ndict = {}
6786
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6787
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6788
          ndict[name] = v
6789
        nics.append(ndict)
6790
      self.op.nics = nics
6791

    
6792
    if (self.op.hypervisor is None and
6793
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6794
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6795
    if einfo.has_section(constants.INISECT_HYP):
6796
      # use the export parameters but do not override the ones
6797
      # specified by the user
6798
      for name, value in einfo.items(constants.INISECT_HYP):
6799
        if name not in self.op.hvparams:
6800
          self.op.hvparams[name] = value
6801

    
6802
    if einfo.has_section(constants.INISECT_BEP):
6803
      # use the parameters, without overriding
6804
      for name, value in einfo.items(constants.INISECT_BEP):
6805
        if name not in self.op.beparams:
6806
          self.op.beparams[name] = value
6807
    else:
6808
      # try to read the parameters old style, from the main section
6809
      for name in constants.BES_PARAMETERS:
6810
        if (name not in self.op.beparams and
6811
            einfo.has_option(constants.INISECT_INS, name)):
6812
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6813

    
6814
    if einfo.has_section(constants.INISECT_OSP):
6815
      # use the parameters, without overriding
6816
      for name, value in einfo.items(constants.INISECT_OSP):
6817
        if name not in self.op.osparams:
6818
          self.op.osparams[name] = value
6819

    
6820
  def _RevertToDefaults(self, cluster):
6821
    """Revert the instance parameters to the default values.
6822

6823
    """
6824
    # hvparams
6825
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6826
    for name in self.op.hvparams.keys():
6827
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6828
        del self.op.hvparams[name]
6829
    # beparams
6830
    be_defs = cluster.SimpleFillBE({})
6831
    for name in self.op.beparams.keys():
6832
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6833
        del self.op.beparams[name]
6834
    # nic params
6835
    nic_defs = cluster.SimpleFillNIC({})
6836
    for nic in self.op.nics:
6837
      for name in constants.NICS_PARAMETERS:
6838
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6839
          del nic[name]
6840
    # osparams
6841
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6842
    for name in self.op.osparams.keys():
6843
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
6844
        del self.op.osparams[name]
6845

    
6846
  def CheckPrereq(self):
6847
    """Check prerequisites.
6848

6849
    """
6850
    if self.op.mode == constants.INSTANCE_IMPORT:
6851
      export_info = self._ReadExportInfo()
6852
      self._ReadExportParams(export_info)
6853

    
6854
    _CheckDiskTemplate(self.op.disk_template)
6855

    
6856
    if (not self.cfg.GetVGName() and
6857
        self.op.disk_template not in constants.DTS_NOT_LVM):
6858
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6859
                                 " instances", errors.ECODE_STATE)
6860

    
6861
    if self.op.hypervisor is None:
6862
      self.op.hypervisor = self.cfg.GetHypervisorType()
6863

    
6864
    cluster = self.cfg.GetClusterInfo()
6865
    enabled_hvs = cluster.enabled_hypervisors
6866
    if self.op.hypervisor not in enabled_hvs:
6867
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6868
                                 " cluster (%s)" % (self.op.hypervisor,
6869
                                  ",".join(enabled_hvs)),
6870
                                 errors.ECODE_STATE)
6871

    
6872
    # check hypervisor parameter syntax (locally)
6873
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6874
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6875
                                      self.op.hvparams)
6876
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6877
    hv_type.CheckParameterSyntax(filled_hvp)
6878
    self.hv_full = filled_hvp
6879
    # check that we don't specify global parameters on an instance
6880
    _CheckGlobalHvParams(self.op.hvparams)
6881

    
6882
    # fill and remember the beparams dict
6883
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6884
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
6885

    
6886
    # build os parameters
6887
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6888

    
6889
    # now that hvp/bep are in final format, let's reset to defaults,
6890
    # if told to do so
6891
    if self.op.identify_defaults:
6892
      self._RevertToDefaults(cluster)
6893

    
6894
    # NIC buildup
6895
    self.nics = []
6896
    for idx, nic in enumerate(self.op.nics):
6897
      nic_mode_req = nic.get("mode", None)
6898
      nic_mode = nic_mode_req
6899
      if nic_mode is None:
6900
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6901

    
6902
      # in routed mode, for the first nic, the default ip is 'auto'
6903
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6904
        default_ip_mode = constants.VALUE_AUTO
6905
      else:
6906
        default_ip_mode = constants.VALUE_NONE
6907

    
6908
      # ip validity checks
6909
      ip = nic.get("ip", default_ip_mode)
6910
      if ip is None or ip.lower() == constants.VALUE_NONE:
6911
        nic_ip = None
6912
      elif ip.lower() == constants.VALUE_AUTO:
6913
        if not self.op.name_check:
6914
          raise errors.OpPrereqError("IP address set to auto but name checks"
6915
                                     " have been skipped. Aborting.",
6916
                                     errors.ECODE_INVAL)
6917
        nic_ip = self.hostname1.ip
6918
      else:
6919
        if not utils.IsValidIP4(ip):
6920
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6921
                                     " like a valid IP" % ip,
6922
                                     errors.ECODE_INVAL)
6923
        nic_ip = ip
6924

    
6925
      # TODO: check the ip address for uniqueness
6926
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6927
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6928
                                   errors.ECODE_INVAL)
6929

    
6930
      # MAC address verification
6931
      mac = nic.get("mac", constants.VALUE_AUTO)
6932
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6933
        mac = utils.NormalizeAndValidateMac(mac)
6934

    
6935
        try:
6936
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6937
        except errors.ReservationError:
6938
          raise errors.OpPrereqError("MAC address %s already in use"
6939
                                     " in cluster" % mac,
6940
                                     errors.ECODE_NOTUNIQUE)
6941

    
6942
      # bridge verification
6943
      bridge = nic.get("bridge", None)
6944
      link = nic.get("link", None)
6945
      if bridge and link:
6946
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6947
                                   " at the same time", errors.ECODE_INVAL)
6948
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6949
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6950
                                   errors.ECODE_INVAL)
6951
      elif bridge:
6952
        link = bridge
6953

    
6954
      nicparams = {}
6955
      if nic_mode_req:
6956
        nicparams[constants.NIC_MODE] = nic_mode_req
6957
      if link:
6958
        nicparams[constants.NIC_LINK] = link
6959

    
6960
      check_params = cluster.SimpleFillNIC(nicparams)
6961
      objects.NIC.CheckParameterSyntax(check_params)
6962
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6963

    
6964
    # disk checks/pre-build
6965
    self.disks = []
6966
    for disk in self.op.disks:
6967
      mode = disk.get("mode", constants.DISK_RDWR)
6968
      if mode not in constants.DISK_ACCESS_SET:
6969
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6970
                                   mode, errors.ECODE_INVAL)
6971
      size = disk.get("size", None)
6972
      if size is None:
6973
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6974
      try:
6975
        size = int(size)
6976
      except (TypeError, ValueError):
6977
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6978
                                   errors.ECODE_INVAL)
6979
      new_disk = {"size": size, "mode": mode}
6980
      if "adopt" in disk:
6981
        new_disk["adopt"] = disk["adopt"]
6982
      self.disks.append(new_disk)
6983

    
6984
    if self.op.mode == constants.INSTANCE_IMPORT:
6985

    
6986
      # Check that the new instance doesn't have less disks than the export
6987
      instance_disks = len(self.disks)
6988
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6989
      if instance_disks < export_disks:
6990
        raise errors.OpPrereqError("Not enough disks to import."
6991
                                   " (instance: %d, export: %d)" %
6992
                                   (instance_disks, export_disks),
6993
                                   errors.ECODE_INVAL)
6994

    
6995
      disk_images = []
6996
      for idx in range(export_disks):
6997
        option = 'disk%d_dump' % idx
6998
        if export_info.has_option(constants.INISECT_INS, option):
6999
          # FIXME: are the old os-es, disk sizes, etc. useful?
7000
          export_name = export_info.get(constants.INISECT_INS, option)
7001
          image = utils.PathJoin(self.op.src_path, export_name)
7002
          disk_images.append(image)
7003
        else:
7004
          disk_images.append(False)
7005

    
7006
      self.src_images = disk_images
7007

    
7008
      old_name = export_info.get(constants.INISECT_INS, 'name')
7009
      try:
7010
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7011
      except (TypeError, ValueError), err:
7012
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7013
                                   " an integer: %s" % str(err),
7014
                                   errors.ECODE_STATE)
7015
      if self.op.instance_name == old_name:
7016
        for idx, nic in enumerate(self.nics):
7017
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7018
            nic_mac_ini = 'nic%d_mac' % idx
7019
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7020

    
7021
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7022

    
7023
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7024
    if self.op.ip_check:
7025
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7026
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7027
                                   (self.check_ip, self.op.instance_name),
7028
                                   errors.ECODE_NOTUNIQUE)
7029

    
7030
    #### mac address generation
7031
    # By generating here the mac address both the allocator and the hooks get
7032
    # the real final mac address rather than the 'auto' or 'generate' value.
7033
    # There is a race condition between the generation and the instance object
7034
    # creation, which means that we know the mac is valid now, but we're not
7035
    # sure it will be when we actually add the instance. If things go bad
7036
    # adding the instance will abort because of a duplicate mac, and the
7037
    # creation job will fail.
7038
    for nic in self.nics:
7039
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7040
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7041

    
7042
    #### allocator run
7043

    
7044
    if self.op.iallocator is not None:
7045
      self._RunAllocator()
7046

    
7047
    #### node related checks
7048

    
7049
    # check primary node
7050
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7051
    assert self.pnode is not None, \
7052
      "Cannot retrieve locked node %s" % self.op.pnode
7053
    if pnode.offline:
7054
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7055
                                 pnode.name, errors.ECODE_STATE)
7056
    if pnode.drained:
7057
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7058
                                 pnode.name, errors.ECODE_STATE)
7059

    
7060
    self.secondaries = []
7061

    
7062
    # mirror node verification
7063
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7064
      if self.op.snode is None:
7065
        raise errors.OpPrereqError("The networked disk templates need"
7066
                                   " a mirror node", errors.ECODE_INVAL)
7067
      if self.op.snode == pnode.name:
7068
        raise errors.OpPrereqError("The secondary node cannot be the"
7069
                                   " primary node.", errors.ECODE_INVAL)
7070
      _CheckNodeOnline(self, self.op.snode)
7071
      _CheckNodeNotDrained(self, self.op.snode)
7072
      self.secondaries.append(self.op.snode)
7073

    
7074
    nodenames = [pnode.name] + self.secondaries
7075

    
7076
    req_size = _ComputeDiskSize(self.op.disk_template,
7077
                                self.disks)
7078

    
7079
    # Check lv size requirements, if not adopting
7080
    if req_size is not None and not self.adopt_disks:
7081
      _CheckNodesFreeDisk(self, nodenames, req_size)
7082

    
7083
    if self.adopt_disks: # instead, we must check the adoption data
7084
      all_lvs = set([i["adopt"] for i in self.disks])
7085
      if len(all_lvs) != len(self.disks):
7086
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7087
                                   errors.ECODE_INVAL)
7088
      for lv_name in all_lvs:
7089
        try:
7090
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7091
        except errors.ReservationError:
7092
          raise errors.OpPrereqError("LV named %s used by another instance" %
7093
                                     lv_name, errors.ECODE_NOTUNIQUE)
7094

    
7095
      node_lvs = self.rpc.call_lv_list([pnode.name],
7096
                                       self.cfg.GetVGName())[pnode.name]
7097
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7098
      node_lvs = node_lvs.payload
7099
      delta = all_lvs.difference(node_lvs.keys())
7100
      if delta:
7101
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7102
                                   utils.CommaJoin(delta),
7103
                                   errors.ECODE_INVAL)
7104
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7105
      if online_lvs:
7106
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7107
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7108
                                   errors.ECODE_STATE)
7109
      # update the size of disk based on what is found
7110
      for dsk in self.disks:
7111
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7112

    
7113
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7114

    
7115
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7116
    # check OS parameters (remotely)
7117
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7118

    
7119
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7120

    
7121
    # memory check on primary node
7122
    if self.op.start:
7123
      _CheckNodeFreeMemory(self, self.pnode.name,
7124
                           "creating instance %s" % self.op.instance_name,
7125
                           self.be_full[constants.BE_MEMORY],
7126
                           self.op.hypervisor)
7127

    
7128
    self.dry_run_result = list(nodenames)
7129

    
7130
  def Exec(self, feedback_fn):
7131
    """Create and add the instance to the cluster.
7132

7133
    """
7134
    instance = self.op.instance_name
7135
    pnode_name = self.pnode.name
7136

    
7137
    ht_kind = self.op.hypervisor
7138
    if ht_kind in constants.HTS_REQ_PORT:
7139
      network_port = self.cfg.AllocatePort()
7140
    else:
7141
      network_port = None
7142

    
7143
    if constants.ENABLE_FILE_STORAGE:
7144
      # this is needed because os.path.join does not accept None arguments
7145
      if self.op.file_storage_dir is None:
7146
        string_file_storage_dir = ""
7147
      else:
7148
        string_file_storage_dir = self.op.file_storage_dir
7149

    
7150
      # build the full file storage dir path
7151
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7152
                                        string_file_storage_dir, instance)
7153
    else:
7154
      file_storage_dir = ""
7155

    
7156
    disks = _GenerateDiskTemplate(self,
7157
                                  self.op.disk_template,
7158
                                  instance, pnode_name,
7159
                                  self.secondaries,
7160
                                  self.disks,
7161
                                  file_storage_dir,
7162
                                  self.op.file_driver,
7163
                                  0)
7164

    
7165
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7166
                            primary_node=pnode_name,
7167
                            nics=self.nics, disks=disks,
7168
                            disk_template=self.op.disk_template,
7169
                            admin_up=False,
7170
                            network_port=network_port,
7171
                            beparams=self.op.beparams,
7172
                            hvparams=self.op.hvparams,
7173
                            hypervisor=self.op.hypervisor,
7174
                            osparams=self.op.osparams,
7175
                            )
7176

    
7177
    if self.adopt_disks:
7178
      # rename LVs to the newly-generated names; we need to construct
7179
      # 'fake' LV disks with the old data, plus the new unique_id
7180
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7181
      rename_to = []
7182
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7183
        rename_to.append(t_dsk.logical_id)
7184
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7185
        self.cfg.SetDiskID(t_dsk, pnode_name)
7186
      result = self.rpc.call_blockdev_rename(pnode_name,
7187
                                             zip(tmp_disks, rename_to))
7188
      result.Raise("Failed to rename adoped LVs")
7189
    else:
7190
      feedback_fn("* creating instance disks...")
7191
      try:
7192
        _CreateDisks(self, iobj)
7193
      except errors.OpExecError:
7194
        self.LogWarning("Device creation failed, reverting...")
7195
        try:
7196
          _RemoveDisks(self, iobj)
7197
        finally:
7198
          self.cfg.ReleaseDRBDMinors(instance)
7199
          raise
7200

    
7201
    feedback_fn("adding instance %s to cluster config" % instance)
7202

    
7203
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7204

    
7205
    # Declare that we don't want to remove the instance lock anymore, as we've
7206
    # added the instance to the config
7207
    del self.remove_locks[locking.LEVEL_INSTANCE]
7208
    # Unlock all the nodes
7209
    if self.op.mode == constants.INSTANCE_IMPORT:
7210
      nodes_keep = [self.op.src_node]
7211
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7212
                       if node != self.op.src_node]
7213
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7214
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7215
    else:
7216
      self.context.glm.release(locking.LEVEL_NODE)
7217
      del self.acquired_locks[locking.LEVEL_NODE]
7218

    
7219
    if self.op.wait_for_sync:
7220
      disk_abort = not _WaitForSync(self, iobj)
7221
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7222
      # make sure the disks are not degraded (still sync-ing is ok)
7223
      time.sleep(15)
7224
      feedback_fn("* checking mirrors status")
7225
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7226
    else:
7227
      disk_abort = False
7228

    
7229
    if disk_abort:
7230
      _RemoveDisks(self, iobj)
7231
      self.cfg.RemoveInstance(iobj.name)
7232
      # Make sure the instance lock gets removed
7233
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7234
      raise errors.OpExecError("There are some degraded disks for"
7235
                               " this instance")
7236

    
7237
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7238
      if self.op.mode == constants.INSTANCE_CREATE:
7239
        if not self.op.no_install:
7240
          feedback_fn("* running the instance OS create scripts...")
7241
          # FIXME: pass debug option from opcode to backend
7242
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7243
                                                 self.op.debug_level)
7244
          result.Raise("Could not add os for instance %s"
7245
                       " on node %s" % (instance, pnode_name))
7246

    
7247
      elif self.op.mode == constants.INSTANCE_IMPORT:
7248
        feedback_fn("* running the instance OS import scripts...")
7249

    
7250
        transfers = []
7251

    
7252
        for idx, image in enumerate(self.src_images):
7253
          if not image:
7254
            continue
7255

    
7256
          # FIXME: pass debug option from opcode to backend
7257
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7258
                                             constants.IEIO_FILE, (image, ),
7259
                                             constants.IEIO_SCRIPT,
7260
                                             (iobj.disks[idx], idx),
7261
                                             None)
7262
          transfers.append(dt)
7263

    
7264
        import_result = \
7265
          masterd.instance.TransferInstanceData(self, feedback_fn,
7266
                                                self.op.src_node, pnode_name,
7267
                                                self.pnode.secondary_ip,
7268
                                                iobj, transfers)
7269
        if not compat.all(import_result):
7270
          self.LogWarning("Some disks for instance %s on node %s were not"
7271
                          " imported successfully" % (instance, pnode_name))
7272

    
7273
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7274
        feedback_fn("* preparing remote import...")
7275
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
7276
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7277

    
7278
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7279
                                                     self.source_x509_ca,
7280
                                                     self._cds, timeouts)
7281
        if not compat.all(disk_results):
7282
          # TODO: Should the instance still be started, even if some disks
7283
          # failed to import (valid for local imports, too)?
7284
          self.LogWarning("Some disks for instance %s on node %s were not"
7285
                          " imported successfully" % (instance, pnode_name))
7286

    
7287
        # Run rename script on newly imported instance
7288
        assert iobj.name == instance
7289
        feedback_fn("Running rename script for %s" % instance)
7290
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7291
                                                   self.source_instance_name,
7292
                                                   self.op.debug_level)
7293
        if result.fail_msg:
7294
          self.LogWarning("Failed to run rename script for %s on node"
7295
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7296

    
7297
      else:
7298
        # also checked in the prereq part
7299
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7300
                                     % self.op.mode)
7301

    
7302
    if self.op.start:
7303
      iobj.admin_up = True
7304
      self.cfg.Update(iobj, feedback_fn)
7305
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7306
      feedback_fn("* starting instance...")
7307
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7308
      result.Raise("Could not start instance")
7309

    
7310
    return list(iobj.all_nodes)
7311

    
7312

    
7313
class LUConnectConsole(NoHooksLU):
7314
  """Connect to an instance's console.
7315

7316
  This is somewhat special in that it returns the command line that
7317
  you need to run on the master node in order to connect to the
7318
  console.
7319

7320
  """
7321
  _OP_PARAMS = [
7322
    _PInstanceName
7323
    ]
7324
  REQ_BGL = False
7325

    
7326
  def ExpandNames(self):
7327
    self._ExpandAndLockInstance()
7328

    
7329
  def CheckPrereq(self):
7330
    """Check prerequisites.
7331

7332
    This checks that the instance is in the cluster.
7333

7334
    """
7335
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7336
    assert self.instance is not None, \
7337
      "Cannot retrieve locked instance %s" % self.op.instance_name
7338
    _CheckNodeOnline(self, self.instance.primary_node)
7339

    
7340
  def Exec(self, feedback_fn):
7341
    """Connect to the console of an instance
7342

7343
    """
7344
    instance = self.instance
7345
    node = instance.primary_node
7346

    
7347
    node_insts = self.rpc.call_instance_list([node],
7348
                                             [instance.hypervisor])[node]
7349
    node_insts.Raise("Can't get node information from %s" % node)
7350

    
7351
    if instance.name not in node_insts.payload:
7352
      raise errors.OpExecError("Instance %s is not running." % instance.name)
7353

    
7354
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7355

    
7356
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7357
    cluster = self.cfg.GetClusterInfo()
7358
    # beparams and hvparams are passed separately, to avoid editing the
7359
    # instance and then saving the defaults in the instance itself.
7360
    hvparams = cluster.FillHV(instance)
7361
    beparams = cluster.FillBE(instance)
7362
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7363

    
7364
    # build ssh cmdline
7365
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7366

    
7367

    
7368
class LUReplaceDisks(LogicalUnit):
7369
  """Replace the disks of an instance.
7370

7371
  """
7372
  HPATH = "mirrors-replace"
7373
  HTYPE = constants.HTYPE_INSTANCE
7374
  _OP_PARAMS = [
7375
    _PInstanceName,
7376
    ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7377
    ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7378
    ("remote_node", None, _TMaybeString),
7379
    ("iallocator", None, _TMaybeString),
7380
    ("early_release", False, _TBool),
7381
    ]
7382
  REQ_BGL = False
7383

    
7384
  def CheckArguments(self):
7385
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7386
                                  self.op.iallocator)
7387

    
7388
  def ExpandNames(self):
7389
    self._ExpandAndLockInstance()
7390

    
7391
    if self.op.iallocator is not None:
7392
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7393

    
7394
    elif self.op.remote_node is not None:
7395
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7396
      self.op.remote_node = remote_node
7397

    
7398
      # Warning: do not remove the locking of the new secondary here
7399
      # unless DRBD8.AddChildren is changed to work in parallel;
7400
      # currently it doesn't since parallel invocations of
7401
      # FindUnusedMinor will conflict
7402
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7403
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7404

    
7405
    else:
7406
      self.needed_locks[locking.LEVEL_NODE] = []
7407
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7408

    
7409
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7410
                                   self.op.iallocator, self.op.remote_node,
7411
                                   self.op.disks, False, self.op.early_release)
7412

    
7413
    self.tasklets = [self.replacer]
7414

    
7415
  def DeclareLocks(self, level):
7416
    # If we're not already locking all nodes in the set we have to declare the
7417
    # instance's primary/secondary nodes.
7418
    if (level == locking.LEVEL_NODE and
7419
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7420
      self._LockInstancesNodes()
7421

    
7422
  def BuildHooksEnv(self):
7423
    """Build hooks env.
7424

7425
    This runs on the master, the primary and all the secondaries.
7426

7427
    """
7428
    instance = self.replacer.instance
7429
    env = {
7430
      "MODE": self.op.mode,
7431
      "NEW_SECONDARY": self.op.remote_node,
7432
      "OLD_SECONDARY": instance.secondary_nodes[0],
7433
      }
7434
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7435
    nl = [
7436
      self.cfg.GetMasterNode(),
7437
      instance.primary_node,
7438
      ]
7439
    if self.op.remote_node is not None:
7440
      nl.append(self.op.remote_node)
7441
    return env, nl, nl
7442

    
7443

    
7444
class TLReplaceDisks(Tasklet):
7445
  """Replaces disks for an instance.
7446

7447
  Note: Locking is not within the scope of this class.
7448

7449
  """
7450
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7451
               disks, delay_iallocator, early_release):
7452
    """Initializes this class.
7453

7454
    """
7455
    Tasklet.__init__(self, lu)
7456

    
7457
    # Parameters
7458
    self.instance_name = instance_name
7459
    self.mode = mode
7460
    self.iallocator_name = iallocator_name
7461
    self.remote_node = remote_node
7462
    self.disks = disks
7463
    self.delay_iallocator = delay_iallocator
7464
    self.early_release = early_release
7465

    
7466
    # Runtime data
7467
    self.instance = None
7468
    self.new_node = None
7469
    self.target_node = None
7470
    self.other_node = None
7471
    self.remote_node_info = None
7472
    self.node_secondary_ip = None
7473

    
7474
  @staticmethod
7475
  def CheckArguments(mode, remote_node, iallocator):
7476
    """Helper function for users of this class.
7477

7478
    """
7479
    # check for valid parameter combination
7480
    if mode == constants.REPLACE_DISK_CHG:
7481
      if remote_node is None and iallocator is None:
7482
        raise errors.OpPrereqError("When changing the secondary either an"
7483
                                   " iallocator script must be used or the"
7484
                                   " new node given", errors.ECODE_INVAL)
7485

    
7486
      if remote_node is not None and iallocator is not None:
7487
        raise errors.OpPrereqError("Give either the iallocator or the new"
7488
                                   " secondary, not both", errors.ECODE_INVAL)
7489

    
7490
    elif remote_node is not None or iallocator is not None:
7491
      # Not replacing the secondary
7492
      raise errors.OpPrereqError("The iallocator and new node options can"
7493
                                 " only be used when changing the"
7494
                                 " secondary node", errors.ECODE_INVAL)
7495

    
7496
  @staticmethod
7497
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7498
    """Compute a new secondary node using an IAllocator.
7499

7500
    """
7501
    ial = IAllocator(lu.cfg, lu.rpc,
7502
                     mode=constants.IALLOCATOR_MODE_RELOC,
7503
                     name=instance_name,
7504
                     relocate_from=relocate_from)
7505

    
7506
    ial.Run(iallocator_name)
7507

    
7508
    if not ial.success:
7509
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7510
                                 " %s" % (iallocator_name, ial.info),
7511
                                 errors.ECODE_NORES)
7512

    
7513
    if len(ial.result) != ial.required_nodes:
7514
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7515
                                 " of nodes (%s), required %s" %
7516
                                 (iallocator_name,
7517
                                  len(ial.result), ial.required_nodes),
7518
                                 errors.ECODE_FAULT)
7519

    
7520
    remote_node_name = ial.result[0]
7521

    
7522
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7523
               instance_name, remote_node_name)
7524

    
7525
    return remote_node_name
7526

    
7527
  def _FindFaultyDisks(self, node_name):
7528
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7529
                                    node_name, True)
7530

    
7531
  def CheckPrereq(self):
7532
    """Check prerequisites.
7533

7534
    This checks that the instance is in the cluster.
7535

7536
    """
7537
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7538
    assert instance is not None, \
7539
      "Cannot retrieve locked instance %s" % self.instance_name
7540

    
7541
    if instance.disk_template != constants.DT_DRBD8:
7542
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7543
                                 " instances", errors.ECODE_INVAL)
7544

    
7545
    if len(instance.secondary_nodes) != 1:
7546
      raise errors.OpPrereqError("The instance has a strange layout,"
7547
                                 " expected one secondary but found %d" %
7548
                                 len(instance.secondary_nodes),
7549
                                 errors.ECODE_FAULT)
7550

    
7551
    if not self.delay_iallocator:
7552
      self._CheckPrereq2()
7553

    
7554
  def _CheckPrereq2(self):
7555
    """Check prerequisites, second part.
7556

7557
    This function should always be part of CheckPrereq. It was separated and is
7558
    now called from Exec because during node evacuation iallocator was only
7559
    called with an unmodified cluster model, not taking planned changes into
7560
    account.
7561

7562
    """
7563
    instance = self.instance
7564
    secondary_node = instance.secondary_nodes[0]
7565

    
7566
    if self.iallocator_name is None:
7567
      remote_node = self.remote_node
7568
    else:
7569
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7570
                                       instance.name, instance.secondary_nodes)
7571

    
7572
    if remote_node is not None:
7573
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7574
      assert self.remote_node_info is not None, \
7575
        "Cannot retrieve locked node %s" % remote_node
7576
    else:
7577
      self.remote_node_info = None
7578

    
7579
    if remote_node == self.instance.primary_node:
7580
      raise errors.OpPrereqError("The specified node is the primary node of"
7581
                                 " the instance.", errors.ECODE_INVAL)
7582

    
7583
    if remote_node == secondary_node:
7584
      raise errors.OpPrereqError("The specified node is already the"
7585
                                 " secondary node of the instance.",
7586
                                 errors.ECODE_INVAL)
7587

    
7588
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7589
                                    constants.REPLACE_DISK_CHG):
7590
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7591
                                 errors.ECODE_INVAL)
7592

    
7593
    if self.mode == constants.REPLACE_DISK_AUTO:
7594
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7595
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7596

    
7597
      if faulty_primary and faulty_secondary:
7598
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7599
                                   " one node and can not be repaired"
7600
                                   " automatically" % self.instance_name,
7601
                                   errors.ECODE_STATE)
7602

    
7603
      if faulty_primary:
7604
        self.disks = faulty_primary
7605
        self.target_node = instance.primary_node
7606
        self.other_node = secondary_node
7607
        check_nodes = [self.target_node, self.other_node]
7608
      elif faulty_secondary:
7609
        self.disks = faulty_secondary
7610
        self.target_node = secondary_node
7611
        self.other_node = instance.primary_node
7612
        check_nodes = [self.target_node, self.other_node]
7613
      else:
7614
        self.disks = []
7615
        check_nodes = []
7616

    
7617
    else:
7618
      # Non-automatic modes
7619
      if self.mode == constants.REPLACE_DISK_PRI:
7620
        self.target_node = instance.primary_node
7621
        self.other_node = secondary_node
7622
        check_nodes = [self.target_node, self.other_node]
7623

    
7624
      elif self.mode == constants.REPLACE_DISK_SEC:
7625
        self.target_node = secondary_node
7626
        self.other_node = instance.primary_node
7627
        check_nodes = [self.target_node, self.other_node]
7628

    
7629
      elif self.mode == constants.REPLACE_DISK_CHG:
7630
        self.new_node = remote_node
7631
        self.other_node = instance.primary_node
7632
        self.target_node = secondary_node
7633
        check_nodes = [self.new_node, self.other_node]
7634

    
7635
        _CheckNodeNotDrained(self.lu, remote_node)
7636

    
7637
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7638
        assert old_node_info is not None
7639
        if old_node_info.offline and not self.early_release:
7640
          # doesn't make sense to delay the release
7641
          self.early_release = True
7642
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7643
                          " early-release mode", secondary_node)
7644

    
7645
      else:
7646
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7647
                                     self.mode)
7648

    
7649
      # If not specified all disks should be replaced
7650
      if not self.disks:
7651
        self.disks = range(len(self.instance.disks))
7652

    
7653
    for node in check_nodes:
7654
      _CheckNodeOnline(self.lu, node)
7655

    
7656
    # Check whether disks are valid
7657
    for disk_idx in self.disks:
7658
      instance.FindDisk(disk_idx)
7659

    
7660
    # Get secondary node IP addresses
7661
    node_2nd_ip = {}
7662

    
7663
    for node_name in [self.target_node, self.other_node, self.new_node]:
7664
      if node_name is not None:
7665
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7666

    
7667
    self.node_secondary_ip = node_2nd_ip
7668

    
7669
  def Exec(self, feedback_fn):
7670
    """Execute disk replacement.
7671

7672
    This dispatches the disk replacement to the appropriate handler.
7673

7674
    """
7675
    if self.delay_iallocator:
7676
      self._CheckPrereq2()
7677

    
7678
    if not self.disks:
7679
      feedback_fn("No disks need replacement")
7680
      return
7681

    
7682
    feedback_fn("Replacing disk(s) %s for %s" %
7683
                (utils.CommaJoin(self.disks), self.instance.name))
7684

    
7685
    activate_disks = (not self.instance.admin_up)
7686

    
7687
    # Activate the instance disks if we're replacing them on a down instance
7688
    if activate_disks:
7689
      _StartInstanceDisks(self.lu, self.instance, True)
7690

    
7691
    try:
7692
      # Should we replace the secondary node?
7693
      if self.new_node is not None:
7694
        fn = self._ExecDrbd8Secondary
7695
      else:
7696
        fn = self._ExecDrbd8DiskOnly
7697

    
7698
      return fn(feedback_fn)
7699

    
7700
    finally:
7701
      # Deactivate the instance disks if we're replacing them on a
7702
      # down instance
7703
      if activate_disks:
7704
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7705

    
7706
  def _CheckVolumeGroup(self, nodes):
7707
    self.lu.LogInfo("Checking volume groups")
7708

    
7709
    vgname = self.cfg.GetVGName()
7710

    
7711
    # Make sure volume group exists on all involved nodes
7712
    results = self.rpc.call_vg_list(nodes)
7713
    if not results:
7714
      raise errors.OpExecError("Can't list volume groups on the nodes")
7715

    
7716
    for node in nodes:
7717
      res = results[node]
7718
      res.Raise("Error checking node %s" % node)
7719
      if vgname not in res.payload:
7720
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7721
                                 (vgname, node))
7722

    
7723
  def _CheckDisksExistence(self, nodes):
7724
    # Check disk existence
7725
    for idx, dev in enumerate(self.instance.disks):
7726
      if idx not in self.disks:
7727
        continue
7728

    
7729
      for node in nodes:
7730
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7731
        self.cfg.SetDiskID(dev, node)
7732

    
7733
        result = self.rpc.call_blockdev_find(node, dev)
7734

    
7735
        msg = result.fail_msg
7736
        if msg or not result.payload:
7737
          if not msg:
7738
            msg = "disk not found"
7739
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7740
                                   (idx, node, msg))
7741

    
7742
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7743
    for idx, dev in enumerate(self.instance.disks):
7744
      if idx not in self.disks:
7745
        continue
7746

    
7747
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7748
                      (idx, node_name))
7749

    
7750
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7751
                                   ldisk=ldisk):
7752
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7753
                                 " replace disks for instance %s" %
7754
                                 (node_name, self.instance.name))
7755

    
7756
  def _CreateNewStorage(self, node_name):
7757
    vgname = self.cfg.GetVGName()
7758
    iv_names = {}
7759

    
7760
    for idx, dev in enumerate(self.instance.disks):
7761
      if idx not in self.disks:
7762
        continue
7763

    
7764
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7765

    
7766
      self.cfg.SetDiskID(dev, node_name)
7767

    
7768
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7769
      names = _GenerateUniqueNames(self.lu, lv_names)
7770

    
7771
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7772
                             logical_id=(vgname, names[0]))
7773
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7774
                             logical_id=(vgname, names[1]))
7775

    
7776
      new_lvs = [lv_data, lv_meta]
7777
      old_lvs = dev.children
7778
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7779

    
7780
      # we pass force_create=True to force the LVM creation
7781
      for new_lv in new_lvs:
7782
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7783
                        _GetInstanceInfoText(self.instance), False)
7784

    
7785
    return iv_names
7786

    
7787
  def _CheckDevices(self, node_name, iv_names):
7788
    for name, (dev, _, _) in iv_names.iteritems():
7789
      self.cfg.SetDiskID(dev, node_name)
7790

    
7791
      result = self.rpc.call_blockdev_find(node_name, dev)
7792

    
7793
      msg = result.fail_msg
7794
      if msg or not result.payload:
7795
        if not msg:
7796
          msg = "disk not found"
7797
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7798
                                 (name, msg))
7799

    
7800
      if result.payload.is_degraded:
7801
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7802

    
7803
  def _RemoveOldStorage(self, node_name, iv_names):
7804
    for name, (_, old_lvs, _) in iv_names.iteritems():
7805
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7806

    
7807
      for lv in old_lvs:
7808
        self.cfg.SetDiskID(lv, node_name)
7809

    
7810
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7811
        if msg:
7812
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7813
                             hint="remove unused LVs manually")
7814

    
7815
  def _ReleaseNodeLock(self, node_name):
7816
    """Releases the lock for a given node."""
7817
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7818

    
7819
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7820
    """Replace a disk on the primary or secondary for DRBD 8.
7821

7822
    The algorithm for replace is quite complicated:
7823

7824
      1. for each disk to be replaced:
7825

7826
        1. create new LVs on the target node with unique names
7827
        1. detach old LVs from the drbd device
7828
        1. rename old LVs to name_replaced.<time_t>
7829
        1. rename new LVs to old LVs
7830
        1. attach the new LVs (with the old names now) to the drbd device
7831

7832
      1. wait for sync across all devices
7833

7834
      1. for each modified disk:
7835

7836
        1. remove old LVs (which have the name name_replaces.<time_t>)
7837

7838
    Failures are not very well handled.
7839

7840
    """
7841
    steps_total = 6
7842

    
7843
    # Step: check device activation
7844
    self.lu.LogStep(1, steps_total, "Check device existence")
7845
    self._CheckDisksExistence([self.other_node, self.target_node])
7846
    self._CheckVolumeGroup([self.target_node, self.other_node])
7847

    
7848
    # Step: check other node consistency
7849
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7850
    self._CheckDisksConsistency(self.other_node,
7851
                                self.other_node == self.instance.primary_node,
7852
                                False)
7853

    
7854
    # Step: create new storage
7855
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7856
    iv_names = self._CreateNewStorage(self.target_node)
7857

    
7858
    # Step: for each lv, detach+rename*2+attach
7859
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7860
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7861
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7862

    
7863
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7864
                                                     old_lvs)
7865
      result.Raise("Can't detach drbd from local storage on node"
7866
                   " %s for device %s" % (self.target_node, dev.iv_name))
7867
      #dev.children = []
7868
      #cfg.Update(instance)
7869

    
7870
      # ok, we created the new LVs, so now we know we have the needed
7871
      # storage; as such, we proceed on the target node to rename
7872
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7873
      # using the assumption that logical_id == physical_id (which in
7874
      # turn is the unique_id on that node)
7875

    
7876
      # FIXME(iustin): use a better name for the replaced LVs
7877
      temp_suffix = int(time.time())
7878
      ren_fn = lambda d, suff: (d.physical_id[0],
7879
                                d.physical_id[1] + "_replaced-%s" % suff)
7880

    
7881
      # Build the rename list based on what LVs exist on the node
7882
      rename_old_to_new = []
7883
      for to_ren in old_lvs:
7884
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7885
        if not result.fail_msg and result.payload:
7886
          # device exists
7887
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7888

    
7889
      self.lu.LogInfo("Renaming the old LVs on the target node")
7890
      result = self.rpc.call_blockdev_rename(self.target_node,
7891
                                             rename_old_to_new)
7892
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7893

    
7894
      # Now we rename the new LVs to the old LVs
7895
      self.lu.LogInfo("Renaming the new LVs on the target node")
7896
      rename_new_to_old = [(new, old.physical_id)
7897
                           for old, new in zip(old_lvs, new_lvs)]
7898
      result = self.rpc.call_blockdev_rename(self.target_node,
7899
                                             rename_new_to_old)
7900
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7901

    
7902
      for old, new in zip(old_lvs, new_lvs):
7903
        new.logical_id = old.logical_id
7904
        self.cfg.SetDiskID(new, self.target_node)
7905

    
7906
      for disk in old_lvs:
7907
        disk.logical_id = ren_fn(disk, temp_suffix)
7908
        self.cfg.SetDiskID(disk, self.target_node)
7909

    
7910
      # Now that the new lvs have the old name, we can add them to the device
7911
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7912
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7913
                                                  new_lvs)
7914
      msg = result.fail_msg
7915
      if msg:
7916
        for new_lv in new_lvs:
7917
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7918
                                               new_lv).fail_msg
7919
          if msg2:
7920
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7921
                               hint=("cleanup manually the unused logical"
7922
                                     "volumes"))
7923
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7924

    
7925
      dev.children = new_lvs
7926

    
7927
      self.cfg.Update(self.instance, feedback_fn)
7928

    
7929
    cstep = 5
7930
    if self.early_release:
7931
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7932
      cstep += 1
7933
      self._RemoveOldStorage(self.target_node, iv_names)
7934
      # WARNING: we release both node locks here, do not do other RPCs
7935
      # than WaitForSync to the primary node
7936
      self._ReleaseNodeLock([self.target_node, self.other_node])
7937

    
7938
    # Wait for sync
7939
    # This can fail as the old devices are degraded and _WaitForSync
7940
    # does a combined result over all disks, so we don't check its return value
7941
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7942
    cstep += 1
7943
    _WaitForSync(self.lu, self.instance)
7944

    
7945
    # Check all devices manually
7946
    self._CheckDevices(self.instance.primary_node, iv_names)
7947

    
7948
    # Step: remove old storage
7949
    if not self.early_release:
7950
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7951
      cstep += 1
7952
      self._RemoveOldStorage(self.target_node, iv_names)
7953

    
7954
  def _ExecDrbd8Secondary(self, feedback_fn):
7955
    """Replace the secondary node for DRBD 8.
7956

7957
    The algorithm for replace is quite complicated:
7958
      - for all disks of the instance:
7959
        - create new LVs on the new node with same names
7960
        - shutdown the drbd device on the old secondary
7961
        - disconnect the drbd network on the primary
7962
        - create the drbd device on the new secondary
7963
        - network attach the drbd on the primary, using an artifice:
7964
          the drbd code for Attach() will connect to the network if it
7965
          finds a device which is connected to the good local disks but
7966
          not network enabled
7967
      - wait for sync across all devices
7968
      - remove all disks from the old secondary
7969

7970
    Failures are not very well handled.
7971

7972
    """
7973
    steps_total = 6
7974

    
7975
    # Step: check device activation
7976
    self.lu.LogStep(1, steps_total, "Check device existence")
7977
    self._CheckDisksExistence([self.instance.primary_node])
7978
    self._CheckVolumeGroup([self.instance.primary_node])
7979

    
7980
    # Step: check other node consistency
7981
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7982
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7983

    
7984
    # Step: create new storage
7985
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7986
    for idx, dev in enumerate(self.instance.disks):
7987
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7988
                      (self.new_node, idx))
7989
      # we pass force_create=True to force LVM creation
7990
      for new_lv in dev.children:
7991
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7992
                        _GetInstanceInfoText(self.instance), False)
7993

    
7994
    # Step 4: dbrd minors and drbd setups changes
7995
    # after this, we must manually remove the drbd minors on both the
7996
    # error and the success paths
7997
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7998
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7999
                                         for dev in self.instance.disks],
8000
                                        self.instance.name)
8001
    logging.debug("Allocated minors %r", minors)
8002

    
8003
    iv_names = {}
8004
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8005
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8006
                      (self.new_node, idx))
8007
      # create new devices on new_node; note that we create two IDs:
8008
      # one without port, so the drbd will be activated without
8009
      # networking information on the new node at this stage, and one
8010
      # with network, for the latter activation in step 4
8011
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8012
      if self.instance.primary_node == o_node1:
8013
        p_minor = o_minor1
8014
      else:
8015
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8016
        p_minor = o_minor2
8017

    
8018
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8019
                      p_minor, new_minor, o_secret)
8020
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8021
                    p_minor, new_minor, o_secret)
8022

    
8023
      iv_names[idx] = (dev, dev.children, new_net_id)
8024
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8025
                    new_net_id)
8026
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8027
                              logical_id=new_alone_id,
8028
                              children=dev.children,
8029
                              size=dev.size)
8030
      try:
8031
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8032
                              _GetInstanceInfoText(self.instance), False)
8033
      except errors.GenericError:
8034
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8035
        raise
8036

    
8037
    # We have new devices, shutdown the drbd on the old secondary
8038
    for idx, dev in enumerate(self.instance.disks):
8039
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8040
      self.cfg.SetDiskID(dev, self.target_node)
8041
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8042
      if msg:
8043
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8044
                           "node: %s" % (idx, msg),
8045
                           hint=("Please cleanup this device manually as"
8046
                                 " soon as possible"))
8047

    
8048
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8049
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8050
                                               self.node_secondary_ip,
8051
                                               self.instance.disks)\
8052
                                              [self.instance.primary_node]
8053

    
8054
    msg = result.fail_msg
8055
    if msg:
8056
      # detaches didn't succeed (unlikely)
8057
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8058
      raise errors.OpExecError("Can't detach the disks from the network on"
8059
                               " old node: %s" % (msg,))
8060

    
8061
    # if we managed to detach at least one, we update all the disks of
8062
    # the instance to point to the new secondary
8063
    self.lu.LogInfo("Updating instance configuration")
8064
    for dev, _, new_logical_id in iv_names.itervalues():
8065
      dev.logical_id = new_logical_id
8066
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8067

    
8068
    self.cfg.Update(self.instance, feedback_fn)
8069

    
8070
    # and now perform the drbd attach
8071
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8072
                    " (standalone => connected)")
8073
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8074
                                            self.new_node],
8075
                                           self.node_secondary_ip,
8076
                                           self.instance.disks,
8077
                                           self.instance.name,
8078
                                           False)
8079
    for to_node, to_result in result.items():
8080
      msg = to_result.fail_msg
8081
      if msg:
8082
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8083
                           to_node, msg,
8084
                           hint=("please do a gnt-instance info to see the"
8085
                                 " status of disks"))
8086
    cstep = 5
8087
    if self.early_release:
8088
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8089
      cstep += 1
8090
      self._RemoveOldStorage(self.target_node, iv_names)
8091
      # WARNING: we release all node locks here, do not do other RPCs
8092
      # than WaitForSync to the primary node
8093
      self._ReleaseNodeLock([self.instance.primary_node,
8094
                             self.target_node,
8095
                             self.new_node])
8096

    
8097
    # Wait for sync
8098
    # This can fail as the old devices are degraded and _WaitForSync
8099
    # does a combined result over all disks, so we don't check its return value
8100
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8101
    cstep += 1
8102
    _WaitForSync(self.lu, self.instance)
8103

    
8104
    # Check all devices manually
8105
    self._CheckDevices(self.instance.primary_node, iv_names)
8106

    
8107
    # Step: remove old storage
8108
    if not self.early_release:
8109
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8110
      self._RemoveOldStorage(self.target_node, iv_names)
8111

    
8112

    
8113
class LURepairNodeStorage(NoHooksLU):
8114
  """Repairs the volume group on a node.
8115

8116
  """
8117
  _OP_PARAMS = [
8118
    _PNodeName,
8119
    ("storage_type", _NoDefault, _CheckStorageType),
8120
    ("name", _NoDefault, _TNonEmptyString),
8121
    ("ignore_consistency", False, _TBool),
8122
    ]
8123
  REQ_BGL = False
8124

    
8125
  def CheckArguments(self):
8126
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8127

    
8128
    storage_type = self.op.storage_type
8129

    
8130
    if (constants.SO_FIX_CONSISTENCY not in
8131
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8132
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8133
                                 " repaired" % storage_type,
8134
                                 errors.ECODE_INVAL)
8135

    
8136
  def ExpandNames(self):
8137
    self.needed_locks = {
8138
      locking.LEVEL_NODE: [self.op.node_name],
8139
      }
8140

    
8141
  def _CheckFaultyDisks(self, instance, node_name):
8142
    """Ensure faulty disks abort the opcode or at least warn."""
8143
    try:
8144
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8145
                                  node_name, True):
8146
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8147
                                   " node '%s'" % (instance.name, node_name),
8148
                                   errors.ECODE_STATE)
8149
    except errors.OpPrereqError, err:
8150
      if self.op.ignore_consistency:
8151
        self.proc.LogWarning(str(err.args[0]))
8152
      else:
8153
        raise
8154

    
8155
  def CheckPrereq(self):
8156
    """Check prerequisites.
8157

8158
    """
8159
    # Check whether any instance on this node has faulty disks
8160
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8161
      if not inst.admin_up:
8162
        continue
8163
      check_nodes = set(inst.all_nodes)
8164
      check_nodes.discard(self.op.node_name)
8165
      for inst_node_name in check_nodes:
8166
        self._CheckFaultyDisks(inst, inst_node_name)
8167

    
8168
  def Exec(self, feedback_fn):
8169
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8170
                (self.op.name, self.op.node_name))
8171

    
8172
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8173
    result = self.rpc.call_storage_execute(self.op.node_name,
8174
                                           self.op.storage_type, st_args,
8175
                                           self.op.name,
8176
                                           constants.SO_FIX_CONSISTENCY)
8177
    result.Raise("Failed to repair storage unit '%s' on %s" %
8178
                 (self.op.name, self.op.node_name))
8179

    
8180

    
8181
class LUNodeEvacuationStrategy(NoHooksLU):
8182
  """Computes the node evacuation strategy.
8183

8184
  """
8185
  _OP_PARAMS = [
8186
    ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8187
    ("remote_node", None, _TMaybeString),
8188
    ("iallocator", None, _TMaybeString),
8189
    ]
8190
  REQ_BGL = False
8191

    
8192
  def CheckArguments(self):
8193
    if self.op.remote_node is not None and self.op.iallocator is not None:
8194
      raise errors.OpPrereqError("Give either the iallocator or the new"
8195
                                 " secondary, not both", errors.ECODE_INVAL)
8196

    
8197
  def ExpandNames(self):
8198
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8199
    self.needed_locks = locks = {}
8200
    if self.op.remote_node is None:
8201
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8202
    else:
8203
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8204
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8205

    
8206
  def Exec(self, feedback_fn):
8207
    if self.op.remote_node is not None:
8208
      instances = []
8209
      for node in self.op.nodes:
8210
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8211
      result = []
8212
      for i in instances:
8213
        if i.primary_node == self.op.remote_node:
8214
          raise errors.OpPrereqError("Node %s is the primary node of"
8215
                                     " instance %s, cannot use it as"
8216
                                     " secondary" %
8217
                                     (self.op.remote_node, i.name),
8218
                                     errors.ECODE_INVAL)
8219
        result.append([i.name, self.op.remote_node])
8220
    else:
8221
      ial = IAllocator(self.cfg, self.rpc,
8222
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8223
                       evac_nodes=self.op.nodes)
8224
      ial.Run(self.op.iallocator, validate=True)
8225
      if not ial.success:
8226
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8227
                                 errors.ECODE_NORES)
8228
      result = ial.result
8229
    return result
8230

    
8231

    
8232
class LUGrowDisk(LogicalUnit):
8233
  """Grow a disk of an instance.
8234

8235
  """
8236
  HPATH = "disk-grow"
8237
  HTYPE = constants.HTYPE_INSTANCE
8238
  _OP_PARAMS = [
8239
    _PInstanceName,
8240
    ("disk", _NoDefault, _TInt),
8241
    ("amount", _NoDefault, _TInt),
8242
    ("wait_for_sync", True, _TBool),
8243
    ]
8244
  REQ_BGL = False
8245

    
8246
  def ExpandNames(self):
8247
    self._ExpandAndLockInstance()
8248
    self.needed_locks[locking.LEVEL_NODE] = []
8249
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8250

    
8251
  def DeclareLocks(self, level):
8252
    if level == locking.LEVEL_NODE:
8253
      self._LockInstancesNodes()
8254

    
8255
  def BuildHooksEnv(self):
8256
    """Build hooks env.
8257

8258
    This runs on the master, the primary and all the secondaries.
8259

8260
    """
8261
    env = {
8262
      "DISK": self.op.disk,
8263
      "AMOUNT": self.op.amount,
8264
      }
8265
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8266
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8267
    return env, nl, nl
8268

    
8269
  def CheckPrereq(self):
8270
    """Check prerequisites.
8271

8272
    This checks that the instance is in the cluster.
8273

8274
    """
8275
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8276
    assert instance is not None, \
8277
      "Cannot retrieve locked instance %s" % self.op.instance_name
8278
    nodenames = list(instance.all_nodes)
8279
    for node in nodenames:
8280
      _CheckNodeOnline(self, node)
8281

    
8282
    self.instance = instance
8283

    
8284
    if instance.disk_template not in constants.DTS_GROWABLE:
8285
      raise errors.OpPrereqError("Instance's disk layout does not support"
8286
                                 " growing.", errors.ECODE_INVAL)
8287

    
8288
    self.disk = instance.FindDisk(self.op.disk)
8289

    
8290
    if instance.disk_template != constants.DT_FILE:
8291
      # TODO: check the free disk space for file, when that feature will be
8292
      # supported
8293
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8294

    
8295
  def Exec(self, feedback_fn):
8296
    """Execute disk grow.
8297

8298
    """
8299
    instance = self.instance
8300
    disk = self.disk
8301

    
8302
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8303
    if not disks_ok:
8304
      raise errors.OpExecError("Cannot activate block device to grow")
8305

    
8306
    for node in instance.all_nodes:
8307
      self.cfg.SetDiskID(disk, node)
8308
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8309
      result.Raise("Grow request failed to node %s" % node)
8310

    
8311
      # TODO: Rewrite code to work properly
8312
      # DRBD goes into sync mode for a short amount of time after executing the
8313
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8314
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8315
      # time is a work-around.
8316
      time.sleep(5)
8317

    
8318
    disk.RecordGrow(self.op.amount)
8319
    self.cfg.Update(instance, feedback_fn)
8320
    if self.op.wait_for_sync:
8321
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8322
      if disk_abort:
8323
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8324
                             " status.\nPlease check the instance.")
8325
      if not instance.admin_up:
8326
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8327
    elif not instance.admin_up:
8328
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8329
                           " not supposed to be running because no wait for"
8330
                           " sync mode was requested.")
8331

    
8332

    
8333
class LUQueryInstanceData(NoHooksLU):
8334
  """Query runtime instance data.
8335

8336
  """
8337
  _OP_PARAMS = [
8338
    ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8339
    ("static", False, _TBool),
8340
    ]
8341
  REQ_BGL = False
8342

    
8343
  def ExpandNames(self):
8344
    self.needed_locks = {}
8345
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8346

    
8347
    if self.op.instances:
8348
      self.wanted_names = []
8349
      for name in self.op.instances:
8350
        full_name = _ExpandInstanceName(self.cfg, name)
8351
        self.wanted_names.append(full_name)
8352
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8353
    else:
8354
      self.wanted_names = None
8355
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8356

    
8357
    self.needed_locks[locking.LEVEL_NODE] = []
8358
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8359

    
8360
  def DeclareLocks(self, level):
8361
    if level == locking.LEVEL_NODE:
8362
      self._LockInstancesNodes()
8363

    
8364
  def CheckPrereq(self):
8365
    """Check prerequisites.
8366

8367
    This only checks the optional instance list against the existing names.
8368

8369
    """
8370
    if self.wanted_names is None:
8371
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8372

    
8373
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8374
                             in self.wanted_names]
8375

    
8376
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8377
    """Returns the status of a block device
8378

8379
    """
8380
    if self.op.static or not node:
8381
      return None
8382

    
8383
    self.cfg.SetDiskID(dev, node)
8384

    
8385
    result = self.rpc.call_blockdev_find(node, dev)
8386
    if result.offline:
8387
      return None
8388

    
8389
    result.Raise("Can't compute disk status for %s" % instance_name)
8390

    
8391
    status = result.payload
8392
    if status is None:
8393
      return None
8394

    
8395
    return (status.dev_path, status.major, status.minor,
8396
            status.sync_percent, status.estimated_time,
8397
            status.is_degraded, status.ldisk_status)
8398

    
8399
  def _ComputeDiskStatus(self, instance, snode, dev):
8400
    """Compute block device status.
8401

8402
    """
8403
    if dev.dev_type in constants.LDS_DRBD:
8404
      # we change the snode then (otherwise we use the one passed in)
8405
      if dev.logical_id[0] == instance.primary_node:
8406
        snode = dev.logical_id[1]
8407
      else:
8408
        snode = dev.logical_id[0]
8409

    
8410
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8411
                                              instance.name, dev)
8412
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8413

    
8414
    if dev.children:
8415
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8416
                      for child in dev.children]
8417
    else:
8418
      dev_children = []
8419

    
8420
    data = {
8421
      "iv_name": dev.iv_name,
8422
      "dev_type": dev.dev_type,
8423
      "logical_id": dev.logical_id,
8424
      "physical_id": dev.physical_id,
8425
      "pstatus": dev_pstatus,
8426
      "sstatus": dev_sstatus,
8427
      "children": dev_children,
8428
      "mode": dev.mode,
8429
      "size": dev.size,
8430
      }
8431

    
8432
    return data
8433

    
8434
  def Exec(self, feedback_fn):
8435
    """Gather and return data"""
8436
    result = {}
8437

    
8438
    cluster = self.cfg.GetClusterInfo()
8439

    
8440
    for instance in self.wanted_instances:
8441
      if not self.op.static:
8442
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8443
                                                  instance.name,
8444
                                                  instance.hypervisor)
8445
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8446
        remote_info = remote_info.payload
8447
        if remote_info and "state" in remote_info:
8448
          remote_state = "up"
8449
        else:
8450
          remote_state = "down"
8451
      else:
8452
        remote_state = None
8453
      if instance.admin_up:
8454
        config_state = "up"
8455
      else:
8456
        config_state = "down"
8457

    
8458
      disks = [self._ComputeDiskStatus(instance, None, device)
8459
               for device in instance.disks]
8460

    
8461
      idict = {
8462
        "name": instance.name,
8463
        "config_state": config_state,
8464
        "run_state": remote_state,
8465
        "pnode": instance.primary_node,
8466
        "snodes": instance.secondary_nodes,
8467
        "os": instance.os,
8468
        # this happens to be the same format used for hooks
8469
        "nics": _NICListToTuple(self, instance.nics),
8470
        "disk_template": instance.disk_template,
8471
        "disks": disks,
8472
        "hypervisor": instance.hypervisor,
8473
        "network_port": instance.network_port,
8474
        "hv_instance": instance.hvparams,
8475
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8476
        "be_instance": instance.beparams,
8477
        "be_actual": cluster.FillBE(instance),
8478
        "os_instance": instance.osparams,
8479
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8480
        "serial_no": instance.serial_no,
8481
        "mtime": instance.mtime,
8482
        "ctime": instance.ctime,
8483
        "uuid": instance.uuid,
8484
        }
8485

    
8486
      result[instance.name] = idict
8487

    
8488
    return result
8489

    
8490

    
8491
class LUSetInstanceParams(LogicalUnit):
8492
  """Modifies an instances's parameters.
8493

8494
  """
8495
  HPATH = "instance-modify"
8496
  HTYPE = constants.HTYPE_INSTANCE
8497
  _OP_PARAMS = [
8498
    _PInstanceName,
8499
    ("nics", _EmptyList, _TList),
8500
    ("disks", _EmptyList, _TList),
8501
    ("beparams", _EmptyDict, _TDict),
8502
    ("hvparams", _EmptyDict, _TDict),
8503
    ("disk_template", None, _TMaybeString),
8504
    ("remote_node", None, _TMaybeString),
8505
    ("os_name", None, _TMaybeString),
8506
    ("force_variant", False, _TBool),
8507
    ("osparams", None, _TOr(_TDict, _TNone)),
8508
    _PForce,
8509
    ]
8510
  REQ_BGL = False
8511

    
8512
  def CheckArguments(self):
8513
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8514
            self.op.hvparams or self.op.beparams or self.op.os_name):
8515
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8516

    
8517
    if self.op.hvparams:
8518
      _CheckGlobalHvParams(self.op.hvparams)
8519

    
8520
    # Disk validation
8521
    disk_addremove = 0
8522
    for disk_op, disk_dict in self.op.disks:
8523
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8524
      if disk_op == constants.DDM_REMOVE:
8525
        disk_addremove += 1
8526
        continue
8527
      elif disk_op == constants.DDM_ADD:
8528
        disk_addremove += 1
8529
      else:
8530
        if not isinstance(disk_op, int):
8531
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8532
        if not isinstance(disk_dict, dict):
8533
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8534
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8535

    
8536
      if disk_op == constants.DDM_ADD:
8537
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8538
        if mode not in constants.DISK_ACCESS_SET:
8539
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8540
                                     errors.ECODE_INVAL)
8541
        size = disk_dict.get('size', None)
8542
        if size is None:
8543
          raise errors.OpPrereqError("Required disk parameter size missing",
8544
                                     errors.ECODE_INVAL)
8545
        try:
8546
          size = int(size)
8547
        except (TypeError, ValueError), err:
8548
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8549
                                     str(err), errors.ECODE_INVAL)
8550
        disk_dict['size'] = size
8551
      else:
8552
        # modification of disk
8553
        if 'size' in disk_dict:
8554
          raise errors.OpPrereqError("Disk size change not possible, use"
8555
                                     " grow-disk", errors.ECODE_INVAL)
8556

    
8557
    if disk_addremove > 1:
8558
      raise errors.OpPrereqError("Only one disk add or remove operation"
8559
                                 " supported at a time", errors.ECODE_INVAL)
8560

    
8561
    if self.op.disks and self.op.disk_template is not None:
8562
      raise errors.OpPrereqError("Disk template conversion and other disk"
8563
                                 " changes not supported at the same time",
8564
                                 errors.ECODE_INVAL)
8565

    
8566
    if self.op.disk_template:
8567
      _CheckDiskTemplate(self.op.disk_template)
8568
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8569
          self.op.remote_node is None):
8570
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8571
                                   " one requires specifying a secondary node",
8572
                                   errors.ECODE_INVAL)
8573

    
8574
    # NIC validation
8575
    nic_addremove = 0
8576
    for nic_op, nic_dict in self.op.nics:
8577
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8578
      if nic_op == constants.DDM_REMOVE:
8579
        nic_addremove += 1
8580
        continue
8581
      elif nic_op == constants.DDM_ADD:
8582
        nic_addremove += 1
8583
      else:
8584
        if not isinstance(nic_op, int):
8585
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8586
        if not isinstance(nic_dict, dict):
8587
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8588
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8589

    
8590
      # nic_dict should be a dict
8591
      nic_ip = nic_dict.get('ip', None)
8592
      if nic_ip is not None:
8593
        if nic_ip.lower() == constants.VALUE_NONE:
8594
          nic_dict['ip'] = None
8595
        else:
8596
          if not utils.IsValidIP4(nic_ip):
8597
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8598
                                       errors.ECODE_INVAL)
8599

    
8600
      nic_bridge = nic_dict.get('bridge', None)
8601
      nic_link = nic_dict.get('link', None)
8602
      if nic_bridge and nic_link:
8603
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8604
                                   " at the same time", errors.ECODE_INVAL)
8605
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8606
        nic_dict['bridge'] = None
8607
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8608
        nic_dict['link'] = None
8609

    
8610
      if nic_op == constants.DDM_ADD:
8611
        nic_mac = nic_dict.get('mac', None)
8612
        if nic_mac is None:
8613
          nic_dict['mac'] = constants.VALUE_AUTO
8614

    
8615
      if 'mac' in nic_dict:
8616
        nic_mac = nic_dict['mac']
8617
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8618
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8619

    
8620
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8621
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8622
                                     " modifying an existing nic",
8623
                                     errors.ECODE_INVAL)
8624

    
8625
    if nic_addremove > 1:
8626
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8627
                                 " supported at a time", errors.ECODE_INVAL)
8628

    
8629
  def ExpandNames(self):
8630
    self._ExpandAndLockInstance()
8631
    self.needed_locks[locking.LEVEL_NODE] = []
8632
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8633

    
8634
  def DeclareLocks(self, level):
8635
    if level == locking.LEVEL_NODE:
8636
      self._LockInstancesNodes()
8637
      if self.op.disk_template and self.op.remote_node:
8638
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8639
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8640

    
8641
  def BuildHooksEnv(self):
8642
    """Build hooks env.
8643

8644
    This runs on the master, primary and secondaries.
8645

8646
    """
8647
    args = dict()
8648
    if constants.BE_MEMORY in self.be_new:
8649
      args['memory'] = self.be_new[constants.BE_MEMORY]
8650
    if constants.BE_VCPUS in self.be_new:
8651
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8652
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8653
    # information at all.
8654
    if self.op.nics:
8655
      args['nics'] = []
8656
      nic_override = dict(self.op.nics)
8657
      for idx, nic in enumerate(self.instance.nics):
8658
        if idx in nic_override:
8659
          this_nic_override = nic_override[idx]
8660
        else:
8661
          this_nic_override = {}
8662
        if 'ip' in this_nic_override:
8663
          ip = this_nic_override['ip']
8664
        else:
8665
          ip = nic.ip
8666
        if 'mac' in this_nic_override:
8667
          mac = this_nic_override['mac']
8668
        else:
8669
          mac = nic.mac
8670
        if idx in self.nic_pnew:
8671
          nicparams = self.nic_pnew[idx]
8672
        else:
8673
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8674
        mode = nicparams[constants.NIC_MODE]
8675
        link = nicparams[constants.NIC_LINK]
8676
        args['nics'].append((ip, mac, mode, link))
8677
      if constants.DDM_ADD in nic_override:
8678
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8679
        mac = nic_override[constants.DDM_ADD]['mac']
8680
        nicparams = self.nic_pnew[constants.DDM_ADD]
8681
        mode = nicparams[constants.NIC_MODE]
8682
        link = nicparams[constants.NIC_LINK]
8683
        args['nics'].append((ip, mac, mode, link))
8684
      elif constants.DDM_REMOVE in nic_override:
8685
        del args['nics'][-1]
8686

    
8687
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8688
    if self.op.disk_template:
8689
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8690
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8691
    return env, nl, nl
8692

    
8693
  def CheckPrereq(self):
8694
    """Check prerequisites.
8695

8696
    This only checks the instance list against the existing names.
8697

8698
    """
8699
    # checking the new params on the primary/secondary nodes
8700

    
8701
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8702
    cluster = self.cluster = self.cfg.GetClusterInfo()
8703
    assert self.instance is not None, \
8704
      "Cannot retrieve locked instance %s" % self.op.instance_name
8705
    pnode = instance.primary_node
8706
    nodelist = list(instance.all_nodes)
8707

    
8708
    # OS change
8709
    if self.op.os_name and not self.op.force:
8710
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8711
                      self.op.force_variant)
8712
      instance_os = self.op.os_name
8713
    else:
8714
      instance_os = instance.os
8715

    
8716
    if self.op.disk_template:
8717
      if instance.disk_template == self.op.disk_template:
8718
        raise errors.OpPrereqError("Instance already has disk template %s" %
8719
                                   instance.disk_template, errors.ECODE_INVAL)
8720

    
8721
      if (instance.disk_template,
8722
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8723
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8724
                                   " %s to %s" % (instance.disk_template,
8725
                                                  self.op.disk_template),
8726
                                   errors.ECODE_INVAL)
8727
      _CheckInstanceDown(self, instance, "cannot change disk template")
8728
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8729
        _CheckNodeOnline(self, self.op.remote_node)
8730
        _CheckNodeNotDrained(self, self.op.remote_node)
8731
        disks = [{"size": d.size} for d in instance.disks]
8732
        required = _ComputeDiskSize(self.op.disk_template, disks)
8733
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8734

    
8735
    # hvparams processing
8736
    if self.op.hvparams:
8737
      hv_type = instance.hypervisor
8738
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8739
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8740
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8741

    
8742
      # local check
8743
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8744
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8745
      self.hv_new = hv_new # the new actual values
8746
      self.hv_inst = i_hvdict # the new dict (without defaults)
8747
    else:
8748
      self.hv_new = self.hv_inst = {}
8749

    
8750
    # beparams processing
8751
    if self.op.beparams:
8752
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8753
                                   use_none=True)
8754
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8755
      be_new = cluster.SimpleFillBE(i_bedict)
8756
      self.be_new = be_new # the new actual values
8757
      self.be_inst = i_bedict # the new dict (without defaults)
8758
    else:
8759
      self.be_new = self.be_inst = {}
8760

    
8761
    # osparams processing
8762
    if self.op.osparams:
8763
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8764
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8765
      self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8766
      self.os_inst = i_osdict # the new dict (without defaults)
8767
    else:
8768
      self.os_new = self.os_inst = {}
8769

    
8770
    self.warn = []
8771

    
8772
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8773
      mem_check_list = [pnode]
8774
      if be_new[constants.BE_AUTO_BALANCE]:
8775
        # either we changed auto_balance to yes or it was from before
8776
        mem_check_list.extend(instance.secondary_nodes)
8777
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8778
                                                  instance.hypervisor)
8779
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8780
                                         instance.hypervisor)
8781
      pninfo = nodeinfo[pnode]
8782
      msg = pninfo.fail_msg
8783
      if msg:
8784
        # Assume the primary node is unreachable and go ahead
8785
        self.warn.append("Can't get info from primary node %s: %s" %
8786
                         (pnode,  msg))
8787
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8788
        self.warn.append("Node data from primary node %s doesn't contain"
8789
                         " free memory information" % pnode)
8790
      elif instance_info.fail_msg:
8791
        self.warn.append("Can't get instance runtime information: %s" %
8792
                        instance_info.fail_msg)
8793
      else:
8794
        if instance_info.payload:
8795
          current_mem = int(instance_info.payload['memory'])
8796
        else:
8797
          # Assume instance not running
8798
          # (there is a slight race condition here, but it's not very probable,
8799
          # and we have no other way to check)
8800
          current_mem = 0
8801
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8802
                    pninfo.payload['memory_free'])
8803
        if miss_mem > 0:
8804
          raise errors.OpPrereqError("This change will prevent the instance"
8805
                                     " from starting, due to %d MB of memory"
8806
                                     " missing on its primary node" % miss_mem,
8807
                                     errors.ECODE_NORES)
8808

    
8809
      if be_new[constants.BE_AUTO_BALANCE]:
8810
        for node, nres in nodeinfo.items():
8811
          if node not in instance.secondary_nodes:
8812
            continue
8813
          msg = nres.fail_msg
8814
          if msg:
8815
            self.warn.append("Can't get info from secondary node %s: %s" %
8816
                             (node, msg))
8817
          elif not isinstance(nres.payload.get('memory_free', None), int):
8818
            self.warn.append("Secondary node %s didn't return free"
8819
                             " memory information" % node)
8820
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8821
            self.warn.append("Not enough memory to failover instance to"
8822
                             " secondary node %s" % node)
8823

    
8824
    # NIC processing
8825
    self.nic_pnew = {}
8826
    self.nic_pinst = {}
8827
    for nic_op, nic_dict in self.op.nics:
8828
      if nic_op == constants.DDM_REMOVE:
8829
        if not instance.nics:
8830
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8831
                                     errors.ECODE_INVAL)
8832
        continue
8833
      if nic_op != constants.DDM_ADD:
8834
        # an existing nic
8835
        if not instance.nics:
8836
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8837
                                     " no NICs" % nic_op,
8838
                                     errors.ECODE_INVAL)
8839
        if nic_op < 0 or nic_op >= len(instance.nics):
8840
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8841
                                     " are 0 to %d" %
8842
                                     (nic_op, len(instance.nics) - 1),
8843
                                     errors.ECODE_INVAL)
8844
        old_nic_params = instance.nics[nic_op].nicparams
8845
        old_nic_ip = instance.nics[nic_op].ip
8846
      else:
8847
        old_nic_params = {}
8848
        old_nic_ip = None
8849

    
8850
      update_params_dict = dict([(key, nic_dict[key])
8851
                                 for key in constants.NICS_PARAMETERS
8852
                                 if key in nic_dict])
8853

    
8854
      if 'bridge' in nic_dict:
8855
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8856

    
8857
      new_nic_params = _GetUpdatedParams(old_nic_params,
8858
                                         update_params_dict)
8859
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8860
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8861
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8862
      self.nic_pinst[nic_op] = new_nic_params
8863
      self.nic_pnew[nic_op] = new_filled_nic_params
8864
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8865

    
8866
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8867
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8868
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8869
        if msg:
8870
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8871
          if self.op.force:
8872
            self.warn.append(msg)
8873
          else:
8874
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8875
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8876
        if 'ip' in nic_dict:
8877
          nic_ip = nic_dict['ip']
8878
        else:
8879
          nic_ip = old_nic_ip
8880
        if nic_ip is None:
8881
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8882
                                     ' on a routed nic', errors.ECODE_INVAL)
8883
      if 'mac' in nic_dict:
8884
        nic_mac = nic_dict['mac']
8885
        if nic_mac is None:
8886
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8887
                                     errors.ECODE_INVAL)
8888
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8889
          # otherwise generate the mac
8890
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8891
        else:
8892
          # or validate/reserve the current one
8893
          try:
8894
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8895
          except errors.ReservationError:
8896
            raise errors.OpPrereqError("MAC address %s already in use"
8897
                                       " in cluster" % nic_mac,
8898
                                       errors.ECODE_NOTUNIQUE)
8899

    
8900
    # DISK processing
8901
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8902
      raise errors.OpPrereqError("Disk operations not supported for"
8903
                                 " diskless instances",
8904
                                 errors.ECODE_INVAL)
8905
    for disk_op, _ in self.op.disks:
8906
      if disk_op == constants.DDM_REMOVE:
8907
        if len(instance.disks) == 1:
8908
          raise errors.OpPrereqError("Cannot remove the last disk of"
8909
                                     " an instance", errors.ECODE_INVAL)
8910
        _CheckInstanceDown(self, instance, "cannot remove disks")
8911

    
8912
      if (disk_op == constants.DDM_ADD and
8913
          len(instance.nics) >= constants.MAX_DISKS):
8914
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8915
                                   " add more" % constants.MAX_DISKS,
8916
                                   errors.ECODE_STATE)
8917
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8918
        # an existing disk
8919
        if disk_op < 0 or disk_op >= len(instance.disks):
8920
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8921
                                     " are 0 to %d" %
8922
                                     (disk_op, len(instance.disks)),
8923
                                     errors.ECODE_INVAL)
8924

    
8925
    return
8926

    
8927
  def _ConvertPlainToDrbd(self, feedback_fn):
8928
    """Converts an instance from plain to drbd.
8929

8930
    """
8931
    feedback_fn("Converting template to drbd")
8932
    instance = self.instance
8933
    pnode = instance.primary_node
8934
    snode = self.op.remote_node
8935

    
8936
    # create a fake disk info for _GenerateDiskTemplate
8937
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8938
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8939
                                      instance.name, pnode, [snode],
8940
                                      disk_info, None, None, 0)
8941
    info = _GetInstanceInfoText(instance)
8942
    feedback_fn("Creating aditional volumes...")
8943
    # first, create the missing data and meta devices
8944
    for disk in new_disks:
8945
      # unfortunately this is... not too nice
8946
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8947
                            info, True)
8948
      for child in disk.children:
8949
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8950
    # at this stage, all new LVs have been created, we can rename the
8951
    # old ones
8952
    feedback_fn("Renaming original volumes...")
8953
    rename_list = [(o, n.children[0].logical_id)
8954
                   for (o, n) in zip(instance.disks, new_disks)]
8955
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8956
    result.Raise("Failed to rename original LVs")
8957

    
8958
    feedback_fn("Initializing DRBD devices...")
8959
    # all child devices are in place, we can now create the DRBD devices
8960
    for disk in new_disks:
8961
      for node in [pnode, snode]:
8962
        f_create = node == pnode
8963
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8964

    
8965
    # at this point, the instance has been modified
8966
    instance.disk_template = constants.DT_DRBD8
8967
    instance.disks = new_disks
8968
    self.cfg.Update(instance, feedback_fn)
8969

    
8970
    # disks are created, waiting for sync
8971
    disk_abort = not _WaitForSync(self, instance)
8972
    if disk_abort:
8973
      raise errors.OpExecError("There are some degraded disks for"
8974
                               " this instance, please cleanup manually")
8975

    
8976
  def _ConvertDrbdToPlain(self, feedback_fn):
8977
    """Converts an instance from drbd to plain.
8978

8979
    """
8980
    instance = self.instance
8981
    assert len(instance.secondary_nodes) == 1
8982
    pnode = instance.primary_node
8983
    snode = instance.secondary_nodes[0]
8984
    feedback_fn("Converting template to plain")
8985

    
8986
    old_disks = instance.disks
8987
    new_disks = [d.children[0] for d in old_disks]
8988

    
8989
    # copy over size and mode
8990
    for parent, child in zip(old_disks, new_disks):
8991
      child.size = parent.size
8992
      child.mode = parent.mode
8993

    
8994
    # update instance structure
8995
    instance.disks = new_disks
8996
    instance.disk_template = constants.DT_PLAIN
8997
    self.cfg.Update(instance, feedback_fn)
8998

    
8999
    feedback_fn("Removing volumes on the secondary node...")
9000
    for disk in old_disks:
9001
      self.cfg.SetDiskID(disk, snode)
9002
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9003
      if msg:
9004
        self.LogWarning("Could not remove block device %s on node %s,"
9005
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9006

    
9007
    feedback_fn("Removing unneeded volumes on the primary node...")
9008
    for idx, disk in enumerate(old_disks):
9009
      meta = disk.children[1]
9010
      self.cfg.SetDiskID(meta, pnode)
9011
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9012
      if msg:
9013
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9014
                        " continuing anyway: %s", idx, pnode, msg)
9015

    
9016

    
9017
  def Exec(self, feedback_fn):
9018
    """Modifies an instance.
9019

9020
    All parameters take effect only at the next restart of the instance.
9021

9022
    """
9023
    # Process here the warnings from CheckPrereq, as we don't have a
9024
    # feedback_fn there.
9025
    for warn in self.warn:
9026
      feedback_fn("WARNING: %s" % warn)
9027

    
9028
    result = []
9029
    instance = self.instance
9030
    # disk changes
9031
    for disk_op, disk_dict in self.op.disks:
9032
      if disk_op == constants.DDM_REMOVE:
9033
        # remove the last disk
9034
        device = instance.disks.pop()
9035
        device_idx = len(instance.disks)
9036
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9037
          self.cfg.SetDiskID(disk, node)
9038
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9039
          if msg:
9040
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9041
                            " continuing anyway", device_idx, node, msg)
9042
        result.append(("disk/%d" % device_idx, "remove"))
9043
      elif disk_op == constants.DDM_ADD:
9044
        # add a new disk
9045
        if instance.disk_template == constants.DT_FILE:
9046
          file_driver, file_path = instance.disks[0].logical_id
9047
          file_path = os.path.dirname(file_path)
9048
        else:
9049
          file_driver = file_path = None
9050
        disk_idx_base = len(instance.disks)
9051
        new_disk = _GenerateDiskTemplate(self,
9052
                                         instance.disk_template,
9053
                                         instance.name, instance.primary_node,
9054
                                         instance.secondary_nodes,
9055
                                         [disk_dict],
9056
                                         file_path,
9057
                                         file_driver,
9058
                                         disk_idx_base)[0]
9059
        instance.disks.append(new_disk)
9060
        info = _GetInstanceInfoText(instance)
9061

    
9062
        logging.info("Creating volume %s for instance %s",
9063
                     new_disk.iv_name, instance.name)
9064
        # Note: this needs to be kept in sync with _CreateDisks
9065
        #HARDCODE
9066
        for node in instance.all_nodes:
9067
          f_create = node == instance.primary_node
9068
          try:
9069
            _CreateBlockDev(self, node, instance, new_disk,
9070
                            f_create, info, f_create)
9071
          except errors.OpExecError, err:
9072
            self.LogWarning("Failed to create volume %s (%s) on"
9073
                            " node %s: %s",
9074
                            new_disk.iv_name, new_disk, node, err)
9075
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9076
                       (new_disk.size, new_disk.mode)))
9077
      else:
9078
        # change a given disk
9079
        instance.disks[disk_op].mode = disk_dict['mode']
9080
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9081

    
9082
    if self.op.disk_template:
9083
      r_shut = _ShutdownInstanceDisks(self, instance)
9084
      if not r_shut:
9085
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9086
                                 " proceed with disk template conversion")
9087
      mode = (instance.disk_template, self.op.disk_template)
9088
      try:
9089
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9090
      except:
9091
        self.cfg.ReleaseDRBDMinors(instance.name)
9092
        raise
9093
      result.append(("disk_template", self.op.disk_template))
9094

    
9095
    # NIC changes
9096
    for nic_op, nic_dict in self.op.nics:
9097
      if nic_op == constants.DDM_REMOVE:
9098
        # remove the last nic
9099
        del instance.nics[-1]
9100
        result.append(("nic.%d" % len(instance.nics), "remove"))
9101
      elif nic_op == constants.DDM_ADD:
9102
        # mac and bridge should be set, by now
9103
        mac = nic_dict['mac']
9104
        ip = nic_dict.get('ip', None)
9105
        nicparams = self.nic_pinst[constants.DDM_ADD]
9106
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9107
        instance.nics.append(new_nic)
9108
        result.append(("nic.%d" % (len(instance.nics) - 1),
9109
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9110
                       (new_nic.mac, new_nic.ip,
9111
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9112
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9113
                       )))
9114
      else:
9115
        for key in 'mac', 'ip':
9116
          if key in nic_dict:
9117
            setattr(instance.nics[nic_op], key, nic_dict[key])
9118
        if nic_op in self.nic_pinst:
9119
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9120
        for key, val in nic_dict.iteritems():
9121
          result.append(("nic.%s/%d" % (key, nic_op), val))
9122

    
9123
    # hvparams changes
9124
    if self.op.hvparams:
9125
      instance.hvparams = self.hv_inst
9126
      for key, val in self.op.hvparams.iteritems():
9127
        result.append(("hv/%s" % key, val))
9128

    
9129
    # beparams changes
9130
    if self.op.beparams:
9131
      instance.beparams = self.be_inst
9132
      for key, val in self.op.beparams.iteritems():
9133
        result.append(("be/%s" % key, val))
9134

    
9135
    # OS change
9136
    if self.op.os_name:
9137
      instance.os = self.op.os_name
9138

    
9139
    # osparams changes
9140
    if self.op.osparams:
9141
      instance.osparams = self.os_inst
9142
      for key, val in self.op.osparams.iteritems():
9143
        result.append(("os/%s" % key, val))
9144

    
9145
    self.cfg.Update(instance, feedback_fn)
9146

    
9147
    return result
9148

    
9149
  _DISK_CONVERSIONS = {
9150
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9151
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9152
    }
9153

    
9154

    
9155
class LUQueryExports(NoHooksLU):
9156
  """Query the exports list
9157

9158
  """
9159
  _OP_PARAMS = [
9160
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9161
    ("use_locking", False, _TBool),
9162
    ]
9163
  REQ_BGL = False
9164

    
9165
  def ExpandNames(self):
9166
    self.needed_locks = {}
9167
    self.share_locks[locking.LEVEL_NODE] = 1
9168
    if not self.op.nodes:
9169
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9170
    else:
9171
      self.needed_locks[locking.LEVEL_NODE] = \
9172
        _GetWantedNodes(self, self.op.nodes)
9173

    
9174
  def Exec(self, feedback_fn):
9175
    """Compute the list of all the exported system images.
9176

9177
    @rtype: dict
9178
    @return: a dictionary with the structure node->(export-list)
9179
        where export-list is a list of the instances exported on
9180
        that node.
9181

9182
    """
9183
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9184
    rpcresult = self.rpc.call_export_list(self.nodes)
9185
    result = {}
9186
    for node in rpcresult:
9187
      if rpcresult[node].fail_msg:
9188
        result[node] = False
9189
      else:
9190
        result[node] = rpcresult[node].payload
9191

    
9192
    return result
9193

    
9194

    
9195
class LUPrepareExport(NoHooksLU):
9196
  """Prepares an instance for an export and returns useful information.
9197

9198
  """
9199
  _OP_PARAMS = [
9200
    _PInstanceName,
9201
    ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9202
    ]
9203
  REQ_BGL = False
9204

    
9205
  def ExpandNames(self):
9206
    self._ExpandAndLockInstance()
9207

    
9208
  def CheckPrereq(self):
9209
    """Check prerequisites.
9210

9211
    """
9212
    instance_name = self.op.instance_name
9213

    
9214
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9215
    assert self.instance is not None, \
9216
          "Cannot retrieve locked instance %s" % self.op.instance_name
9217
    _CheckNodeOnline(self, self.instance.primary_node)
9218

    
9219
    self._cds = _GetClusterDomainSecret()
9220

    
9221
  def Exec(self, feedback_fn):
9222
    """Prepares an instance for an export.
9223

9224
    """
9225
    instance = self.instance
9226

    
9227
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9228
      salt = utils.GenerateSecret(8)
9229

    
9230
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9231
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9232
                                              constants.RIE_CERT_VALIDITY)
9233
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9234

    
9235
      (name, cert_pem) = result.payload
9236

    
9237
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9238
                                             cert_pem)
9239

    
9240
      return {
9241
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9242
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9243
                          salt),
9244
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9245
        }
9246

    
9247
    return None
9248

    
9249

    
9250
class LUExportInstance(LogicalUnit):
9251
  """Export an instance to an image in the cluster.
9252

9253
  """
9254
  HPATH = "instance-export"
9255
  HTYPE = constants.HTYPE_INSTANCE
9256
  _OP_PARAMS = [
9257
    _PInstanceName,
9258
    ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9259
    ("shutdown", True, _TBool),
9260
    _PShutdownTimeout,
9261
    ("remove_instance", False, _TBool),
9262
    ("ignore_remove_failures", False, _TBool),
9263
    ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9264
    ("x509_key_name", None, _TOr(_TList, _TNone)),
9265
    ("destination_x509_ca", None, _TMaybeString),
9266
    ]
9267
  REQ_BGL = False
9268

    
9269
  def CheckArguments(self):
9270
    """Check the arguments.
9271

9272
    """
9273
    self.x509_key_name = self.op.x509_key_name
9274
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9275

    
9276
    if self.op.remove_instance and not self.op.shutdown:
9277
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9278
                                 " down before")
9279

    
9280
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9281
      if not self.x509_key_name:
9282
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9283
                                   errors.ECODE_INVAL)
9284

    
9285
      if not self.dest_x509_ca_pem:
9286
        raise errors.OpPrereqError("Missing destination X509 CA",
9287
                                   errors.ECODE_INVAL)
9288

    
9289
  def ExpandNames(self):
9290
    self._ExpandAndLockInstance()
9291

    
9292
    # Lock all nodes for local exports
9293
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9294
      # FIXME: lock only instance primary and destination node
9295
      #
9296
      # Sad but true, for now we have do lock all nodes, as we don't know where
9297
      # the previous export might be, and in this LU we search for it and
9298
      # remove it from its current node. In the future we could fix this by:
9299
      #  - making a tasklet to search (share-lock all), then create the
9300
      #    new one, then one to remove, after
9301
      #  - removing the removal operation altogether
9302
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9303

    
9304
  def DeclareLocks(self, level):
9305
    """Last minute lock declaration."""
9306
    # All nodes are locked anyway, so nothing to do here.
9307

    
9308
  def BuildHooksEnv(self):
9309
    """Build hooks env.
9310

9311
    This will run on the master, primary node and target node.
9312

9313
    """
9314
    env = {
9315
      "EXPORT_MODE": self.op.mode,
9316
      "EXPORT_NODE": self.op.target_node,
9317
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9318
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9319
      # TODO: Generic function for boolean env variables
9320
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9321
      }
9322

    
9323
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9324

    
9325
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9326

    
9327
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9328
      nl.append(self.op.target_node)
9329

    
9330
    return env, nl, nl
9331

    
9332
  def CheckPrereq(self):
9333
    """Check prerequisites.
9334

9335
    This checks that the instance and node names are valid.
9336

9337
    """
9338
    instance_name = self.op.instance_name
9339

    
9340
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9341
    assert self.instance is not None, \
9342
          "Cannot retrieve locked instance %s" % self.op.instance_name
9343
    _CheckNodeOnline(self, self.instance.primary_node)
9344

    
9345
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9346
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9347
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9348
      assert self.dst_node is not None
9349

    
9350
      _CheckNodeOnline(self, self.dst_node.name)
9351
      _CheckNodeNotDrained(self, self.dst_node.name)
9352

    
9353
      self._cds = None
9354
      self.dest_disk_info = None
9355
      self.dest_x509_ca = None
9356

    
9357
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9358
      self.dst_node = None
9359

    
9360
      if len(self.op.target_node) != len(self.instance.disks):
9361
        raise errors.OpPrereqError(("Received destination information for %s"
9362
                                    " disks, but instance %s has %s disks") %
9363
                                   (len(self.op.target_node), instance_name,
9364
                                    len(self.instance.disks)),
9365
                                   errors.ECODE_INVAL)
9366

    
9367
      cds = _GetClusterDomainSecret()
9368

    
9369
      # Check X509 key name
9370
      try:
9371
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9372
      except (TypeError, ValueError), err:
9373
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9374

    
9375
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9376
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9377
                                   errors.ECODE_INVAL)
9378

    
9379
      # Load and verify CA
9380
      try:
9381
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9382
      except OpenSSL.crypto.Error, err:
9383
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9384
                                   (err, ), errors.ECODE_INVAL)
9385

    
9386
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9387
      if errcode is not None:
9388
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9389
                                   (msg, ), errors.ECODE_INVAL)
9390

    
9391
      self.dest_x509_ca = cert
9392

    
9393
      # Verify target information
9394
      disk_info = []
9395
      for idx, disk_data in enumerate(self.op.target_node):
9396
        try:
9397
          (host, port, magic) = \
9398
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9399
        except errors.GenericError, err:
9400
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9401
                                     (idx, err), errors.ECODE_INVAL)
9402

    
9403
        disk_info.append((host, port, magic))
9404

    
9405
      assert len(disk_info) == len(self.op.target_node)
9406
      self.dest_disk_info = disk_info
9407

    
9408
    else:
9409
      raise errors.ProgrammerError("Unhandled export mode %r" %
9410
                                   self.op.mode)
9411

    
9412
    # instance disk type verification
9413
    # TODO: Implement export support for file-based disks
9414
    for disk in self.instance.disks:
9415
      if disk.dev_type == constants.LD_FILE:
9416
        raise errors.OpPrereqError("Export not supported for instances with"
9417
                                   " file-based disks", errors.ECODE_INVAL)
9418

    
9419
  def _CleanupExports(self, feedback_fn):
9420
    """Removes exports of current instance from all other nodes.
9421

9422
    If an instance in a cluster with nodes A..D was exported to node C, its
9423
    exports will be removed from the nodes A, B and D.
9424

9425
    """
9426
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9427

    
9428
    nodelist = self.cfg.GetNodeList()
9429
    nodelist.remove(self.dst_node.name)
9430

    
9431
    # on one-node clusters nodelist will be empty after the removal
9432
    # if we proceed the backup would be removed because OpQueryExports
9433
    # substitutes an empty list with the full cluster node list.
9434
    iname = self.instance.name
9435
    if nodelist:
9436
      feedback_fn("Removing old exports for instance %s" % iname)
9437
      exportlist = self.rpc.call_export_list(nodelist)
9438
      for node in exportlist:
9439
        if exportlist[node].fail_msg:
9440
          continue
9441
        if iname in exportlist[node].payload:
9442
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9443
          if msg:
9444
            self.LogWarning("Could not remove older export for instance %s"
9445
                            " on node %s: %s", iname, node, msg)
9446

    
9447
  def Exec(self, feedback_fn):
9448
    """Export an instance to an image in the cluster.
9449

9450
    """
9451
    assert self.op.mode in constants.EXPORT_MODES
9452

    
9453
    instance = self.instance
9454
    src_node = instance.primary_node
9455

    
9456
    if self.op.shutdown:
9457
      # shutdown the instance, but not the disks
9458
      feedback_fn("Shutting down instance %s" % instance.name)
9459
      result = self.rpc.call_instance_shutdown(src_node, instance,
9460
                                               self.op.shutdown_timeout)
9461
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9462
      result.Raise("Could not shutdown instance %s on"
9463
                   " node %s" % (instance.name, src_node))
9464

    
9465
    # set the disks ID correctly since call_instance_start needs the
9466
    # correct drbd minor to create the symlinks
9467
    for disk in instance.disks:
9468
      self.cfg.SetDiskID(disk, src_node)
9469

    
9470
    activate_disks = (not instance.admin_up)
9471

    
9472
    if activate_disks:
9473
      # Activate the instance disks if we'exporting a stopped instance
9474
      feedback_fn("Activating disks for %s" % instance.name)
9475
      _StartInstanceDisks(self, instance, None)
9476

    
9477
    try:
9478
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9479
                                                     instance)
9480

    
9481
      helper.CreateSnapshots()
9482
      try:
9483
        if (self.op.shutdown and instance.admin_up and
9484
            not self.op.remove_instance):
9485
          assert not activate_disks
9486
          feedback_fn("Starting instance %s" % instance.name)
9487
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9488
          msg = result.fail_msg
9489
          if msg:
9490
            feedback_fn("Failed to start instance: %s" % msg)
9491
            _ShutdownInstanceDisks(self, instance)
9492
            raise errors.OpExecError("Could not start instance: %s" % msg)
9493

    
9494
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9495
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9496
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9497
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9498
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9499

    
9500
          (key_name, _, _) = self.x509_key_name
9501

    
9502
          dest_ca_pem = \
9503
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9504
                                            self.dest_x509_ca)
9505

    
9506
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9507
                                                     key_name, dest_ca_pem,
9508
                                                     timeouts)
9509
      finally:
9510
        helper.Cleanup()
9511

    
9512
      # Check for backwards compatibility
9513
      assert len(dresults) == len(instance.disks)
9514
      assert compat.all(isinstance(i, bool) for i in dresults), \
9515
             "Not all results are boolean: %r" % dresults
9516

    
9517
    finally:
9518
      if activate_disks:
9519
        feedback_fn("Deactivating disks for %s" % instance.name)
9520
        _ShutdownInstanceDisks(self, instance)
9521

    
9522
    if not (compat.all(dresults) and fin_resu):
9523
      failures = []
9524
      if not fin_resu:
9525
        failures.append("export finalization")
9526
      if not compat.all(dresults):
9527
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9528
                               if not dsk)
9529
        failures.append("disk export: disk(s) %s" % fdsk)
9530

    
9531
      raise errors.OpExecError("Export failed, errors in %s" %
9532
                               utils.CommaJoin(failures))
9533

    
9534
    # At this point, the export was successful, we can cleanup/finish
9535

    
9536
    # Remove instance if requested
9537
    if self.op.remove_instance:
9538
      feedback_fn("Removing instance %s" % instance.name)
9539
      _RemoveInstance(self, feedback_fn, instance,
9540
                      self.op.ignore_remove_failures)
9541

    
9542
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9543
      self._CleanupExports(feedback_fn)
9544

    
9545
    return fin_resu, dresults
9546

    
9547

    
9548
class LURemoveExport(NoHooksLU):
9549
  """Remove exports related to the named instance.
9550

9551
  """
9552
  _OP_PARAMS = [
9553
    _PInstanceName,
9554
    ]
9555
  REQ_BGL = False
9556

    
9557
  def ExpandNames(self):
9558
    self.needed_locks = {}
9559
    # We need all nodes to be locked in order for RemoveExport to work, but we
9560
    # don't need to lock the instance itself, as nothing will happen to it (and
9561
    # we can remove exports also for a removed instance)
9562
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9563

    
9564
  def Exec(self, feedback_fn):
9565
    """Remove any export.
9566

9567
    """
9568
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9569
    # If the instance was not found we'll try with the name that was passed in.
9570
    # This will only work if it was an FQDN, though.
9571
    fqdn_warn = False
9572
    if not instance_name:
9573
      fqdn_warn = True
9574
      instance_name = self.op.instance_name
9575

    
9576
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9577
    exportlist = self.rpc.call_export_list(locked_nodes)
9578
    found = False
9579
    for node in exportlist:
9580
      msg = exportlist[node].fail_msg
9581
      if msg:
9582
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9583
        continue
9584
      if instance_name in exportlist[node].payload:
9585
        found = True
9586
        result = self.rpc.call_export_remove(node, instance_name)
9587
        msg = result.fail_msg
9588
        if msg:
9589
          logging.error("Could not remove export for instance %s"
9590
                        " on node %s: %s", instance_name, node, msg)
9591

    
9592
    if fqdn_warn and not found:
9593
      feedback_fn("Export not found. If trying to remove an export belonging"
9594
                  " to a deleted instance please use its Fully Qualified"
9595
                  " Domain Name.")
9596

    
9597

    
9598
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9599
  """Generic tags LU.
9600

9601
  This is an abstract class which is the parent of all the other tags LUs.
9602

9603
  """
9604

    
9605
  def ExpandNames(self):
9606
    self.needed_locks = {}
9607
    if self.op.kind == constants.TAG_NODE:
9608
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9609
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9610
    elif self.op.kind == constants.TAG_INSTANCE:
9611
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9612
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9613

    
9614
  def CheckPrereq(self):
9615
    """Check prerequisites.
9616

9617
    """
9618
    if self.op.kind == constants.TAG_CLUSTER:
9619
      self.target = self.cfg.GetClusterInfo()
9620
    elif self.op.kind == constants.TAG_NODE:
9621
      self.target = self.cfg.GetNodeInfo(self.op.name)
9622
    elif self.op.kind == constants.TAG_INSTANCE:
9623
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9624
    else:
9625
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9626
                                 str(self.op.kind), errors.ECODE_INVAL)
9627

    
9628

    
9629
class LUGetTags(TagsLU):
9630
  """Returns the tags of a given object.
9631

9632
  """
9633
  _OP_PARAMS = [
9634
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9635
    ("name", _NoDefault, _TNonEmptyString),
9636
    ]
9637
  REQ_BGL = False
9638

    
9639
  def Exec(self, feedback_fn):
9640
    """Returns the tag list.
9641

9642
    """
9643
    return list(self.target.GetTags())
9644

    
9645

    
9646
class LUSearchTags(NoHooksLU):
9647
  """Searches the tags for a given pattern.
9648

9649
  """
9650
  _OP_PARAMS = [
9651
    ("pattern", _NoDefault, _TNonEmptyString),
9652
    ]
9653
  REQ_BGL = False
9654

    
9655
  def ExpandNames(self):
9656
    self.needed_locks = {}
9657

    
9658
  def CheckPrereq(self):
9659
    """Check prerequisites.
9660

9661
    This checks the pattern passed for validity by compiling it.
9662

9663
    """
9664
    try:
9665
      self.re = re.compile(self.op.pattern)
9666
    except re.error, err:
9667
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9668
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9669

    
9670
  def Exec(self, feedback_fn):
9671
    """Returns the tag list.
9672

9673
    """
9674
    cfg = self.cfg
9675
    tgts = [("/cluster", cfg.GetClusterInfo())]
9676
    ilist = cfg.GetAllInstancesInfo().values()
9677
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9678
    nlist = cfg.GetAllNodesInfo().values()
9679
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9680
    results = []
9681
    for path, target in tgts:
9682
      for tag in target.GetTags():
9683
        if self.re.search(tag):
9684
          results.append((path, tag))
9685
    return results
9686

    
9687

    
9688
class LUAddTags(TagsLU):
9689
  """Sets a tag on a given object.
9690

9691
  """
9692
  _OP_PARAMS = [
9693
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9694
    ("name", _NoDefault, _TNonEmptyString),
9695
    ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9696
    ]
9697
  REQ_BGL = False
9698

    
9699
  def CheckPrereq(self):
9700
    """Check prerequisites.
9701

9702
    This checks the type and length of the tag name and value.
9703

9704
    """
9705
    TagsLU.CheckPrereq(self)
9706
    for tag in self.op.tags:
9707
      objects.TaggableObject.ValidateTag(tag)
9708

    
9709
  def Exec(self, feedback_fn):
9710
    """Sets the tag.
9711

9712
    """
9713
    try:
9714
      for tag in self.op.tags:
9715
        self.target.AddTag(tag)
9716
    except errors.TagError, err:
9717
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9718
    self.cfg.Update(self.target, feedback_fn)
9719

    
9720

    
9721
class LUDelTags(TagsLU):
9722
  """Delete a list of tags from a given object.
9723

9724
  """
9725
  _OP_PARAMS = [
9726
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9727
    ("name", _NoDefault, _TNonEmptyString),
9728
    ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9729
    ]
9730
  REQ_BGL = False
9731

    
9732
  def CheckPrereq(self):
9733
    """Check prerequisites.
9734

9735
    This checks that we have the given tag.
9736

9737
    """
9738
    TagsLU.CheckPrereq(self)
9739
    for tag in self.op.tags:
9740
      objects.TaggableObject.ValidateTag(tag)
9741
    del_tags = frozenset(self.op.tags)
9742
    cur_tags = self.target.GetTags()
9743
    if not del_tags <= cur_tags:
9744
      diff_tags = del_tags - cur_tags
9745
      diff_names = ["'%s'" % tag for tag in diff_tags]
9746
      diff_names.sort()
9747
      raise errors.OpPrereqError("Tag(s) %s not found" %
9748
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9749

    
9750
  def Exec(self, feedback_fn):
9751
    """Remove the tag from the object.
9752

9753
    """
9754
    for tag in self.op.tags:
9755
      self.target.RemoveTag(tag)
9756
    self.cfg.Update(self.target, feedback_fn)
9757

    
9758

    
9759
class LUTestDelay(NoHooksLU):
9760
  """Sleep for a specified amount of time.
9761

9762
  This LU sleeps on the master and/or nodes for a specified amount of
9763
  time.
9764

9765
  """
9766
  _OP_PARAMS = [
9767
    ("duration", _NoDefault, _TFloat),
9768
    ("on_master", True, _TBool),
9769
    ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9770
    ("repeat", 0, _TPositiveInt)
9771
    ]
9772
  REQ_BGL = False
9773

    
9774
  def ExpandNames(self):
9775
    """Expand names and set required locks.
9776

9777
    This expands the node list, if any.
9778

9779
    """
9780
    self.needed_locks = {}
9781
    if self.op.on_nodes:
9782
      # _GetWantedNodes can be used here, but is not always appropriate to use
9783
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9784
      # more information.
9785
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9786
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9787

    
9788
  def _TestDelay(self):
9789
    """Do the actual sleep.
9790

9791
    """
9792
    if self.op.on_master:
9793
      if not utils.TestDelay(self.op.duration):
9794
        raise errors.OpExecError("Error during master delay test")
9795
    if self.op.on_nodes:
9796
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9797
      for node, node_result in result.items():
9798
        node_result.Raise("Failure during rpc call to node %s" % node)
9799

    
9800
  def Exec(self, feedback_fn):
9801
    """Execute the test delay opcode, with the wanted repetitions.
9802

9803
    """
9804
    if self.op.repeat == 0:
9805
      self._TestDelay()
9806
    else:
9807
      top_value = self.op.repeat - 1
9808
      for i in range(self.op.repeat):
9809
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9810
        self._TestDelay()
9811

    
9812

    
9813
class IAllocator(object):
9814
  """IAllocator framework.
9815

9816
  An IAllocator instance has three sets of attributes:
9817
    - cfg that is needed to query the cluster
9818
    - input data (all members of the _KEYS class attribute are required)
9819
    - four buffer attributes (in|out_data|text), that represent the
9820
      input (to the external script) in text and data structure format,
9821
      and the output from it, again in two formats
9822
    - the result variables from the script (success, info, nodes) for
9823
      easy usage
9824

9825
  """
9826
  # pylint: disable-msg=R0902
9827
  # lots of instance attributes
9828
  _ALLO_KEYS = [
9829
    "name", "mem_size", "disks", "disk_template",
9830
    "os", "tags", "nics", "vcpus", "hypervisor",
9831
    ]
9832
  _RELO_KEYS = [
9833
    "name", "relocate_from",
9834
    ]
9835
  _EVAC_KEYS = [
9836
    "evac_nodes",
9837
    ]
9838

    
9839
  def __init__(self, cfg, rpc, mode, **kwargs):
9840
    self.cfg = cfg
9841
    self.rpc = rpc
9842
    # init buffer variables
9843
    self.in_text = self.out_text = self.in_data = self.out_data = None
9844
    # init all input fields so that pylint is happy
9845
    self.mode = mode
9846
    self.mem_size = self.disks = self.disk_template = None
9847
    self.os = self.tags = self.nics = self.vcpus = None
9848
    self.hypervisor = None
9849
    self.relocate_from = None
9850
    self.name = None
9851
    self.evac_nodes = None
9852
    # computed fields
9853
    self.required_nodes = None
9854
    # init result fields
9855
    self.success = self.info = self.result = None
9856
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9857
      keyset = self._ALLO_KEYS
9858
      fn = self._AddNewInstance
9859
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9860
      keyset = self._RELO_KEYS
9861
      fn = self._AddRelocateInstance
9862
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9863
      keyset = self._EVAC_KEYS
9864
      fn = self._AddEvacuateNodes
9865
    else:
9866
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9867
                                   " IAllocator" % self.mode)
9868
    for key in kwargs:
9869
      if key not in keyset:
9870
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9871
                                     " IAllocator" % key)
9872
      setattr(self, key, kwargs[key])
9873

    
9874
    for key in keyset:
9875
      if key not in kwargs:
9876
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9877
                                     " IAllocator" % key)
9878
    self._BuildInputData(fn)
9879

    
9880
  def _ComputeClusterData(self):
9881
    """Compute the generic allocator input data.
9882

9883
    This is the data that is independent of the actual operation.
9884

9885
    """
9886
    cfg = self.cfg
9887
    cluster_info = cfg.GetClusterInfo()
9888
    # cluster data
9889
    data = {
9890
      "version": constants.IALLOCATOR_VERSION,
9891
      "cluster_name": cfg.GetClusterName(),
9892
      "cluster_tags": list(cluster_info.GetTags()),
9893
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9894
      # we don't have job IDs
9895
      }
9896
    iinfo = cfg.GetAllInstancesInfo().values()
9897
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9898

    
9899
    # node data
9900
    node_results = {}
9901
    node_list = cfg.GetNodeList()
9902

    
9903
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9904
      hypervisor_name = self.hypervisor
9905
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9906
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9907
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9908
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9909

    
9910
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9911
                                        hypervisor_name)
9912
    node_iinfo = \
9913
      self.rpc.call_all_instances_info(node_list,
9914
                                       cluster_info.enabled_hypervisors)
9915
    for nname, nresult in node_data.items():
9916
      # first fill in static (config-based) values
9917
      ninfo = cfg.GetNodeInfo(nname)
9918
      pnr = {
9919
        "tags": list(ninfo.GetTags()),
9920
        "primary_ip": ninfo.primary_ip,
9921
        "secondary_ip": ninfo.secondary_ip,
9922
        "offline": ninfo.offline,
9923
        "drained": ninfo.drained,
9924
        "master_candidate": ninfo.master_candidate,
9925
        }
9926

    
9927
      if not (ninfo.offline or ninfo.drained):
9928
        nresult.Raise("Can't get data for node %s" % nname)
9929
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9930
                                nname)
9931
        remote_info = nresult.payload
9932

    
9933
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9934
                     'vg_size', 'vg_free', 'cpu_total']:
9935
          if attr not in remote_info:
9936
            raise errors.OpExecError("Node '%s' didn't return attribute"
9937
                                     " '%s'" % (nname, attr))
9938
          if not isinstance(remote_info[attr], int):
9939
            raise errors.OpExecError("Node '%s' returned invalid value"
9940
                                     " for '%s': %s" %
9941
                                     (nname, attr, remote_info[attr]))
9942
        # compute memory used by primary instances
9943
        i_p_mem = i_p_up_mem = 0
9944
        for iinfo, beinfo in i_list:
9945
          if iinfo.primary_node == nname:
9946
            i_p_mem += beinfo[constants.BE_MEMORY]
9947
            if iinfo.name not in node_iinfo[nname].payload:
9948
              i_used_mem = 0
9949
            else:
9950
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9951
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9952
            remote_info['memory_free'] -= max(0, i_mem_diff)
9953

    
9954
            if iinfo.admin_up:
9955
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9956

    
9957
        # compute memory used by instances
9958
        pnr_dyn = {
9959
          "total_memory": remote_info['memory_total'],
9960
          "reserved_memory": remote_info['memory_dom0'],
9961
          "free_memory": remote_info['memory_free'],
9962
          "total_disk": remote_info['vg_size'],
9963
          "free_disk": remote_info['vg_free'],
9964
          "total_cpus": remote_info['cpu_total'],
9965
          "i_pri_memory": i_p_mem,
9966
          "i_pri_up_memory": i_p_up_mem,
9967
          }
9968
        pnr.update(pnr_dyn)
9969

    
9970
      node_results[nname] = pnr
9971
    data["nodes"] = node_results
9972

    
9973
    # instance data
9974
    instance_data = {}
9975
    for iinfo, beinfo in i_list:
9976
      nic_data = []
9977
      for nic in iinfo.nics:
9978
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9979
        nic_dict = {"mac": nic.mac,
9980
                    "ip": nic.ip,
9981
                    "mode": filled_params[constants.NIC_MODE],
9982
                    "link": filled_params[constants.NIC_LINK],
9983
                   }
9984
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9985
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9986
        nic_data.append(nic_dict)
9987
      pir = {
9988
        "tags": list(iinfo.GetTags()),
9989
        "admin_up": iinfo.admin_up,
9990
        "vcpus": beinfo[constants.BE_VCPUS],
9991
        "memory": beinfo[constants.BE_MEMORY],
9992
        "os": iinfo.os,
9993
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9994
        "nics": nic_data,
9995
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9996
        "disk_template": iinfo.disk_template,
9997
        "hypervisor": iinfo.hypervisor,
9998
        }
9999
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10000
                                                 pir["disks"])
10001
      instance_data[iinfo.name] = pir
10002

    
10003
    data["instances"] = instance_data
10004

    
10005
    self.in_data = data
10006

    
10007
  def _AddNewInstance(self):
10008
    """Add new instance data to allocator structure.
10009

10010
    This in combination with _AllocatorGetClusterData will create the
10011
    correct structure needed as input for the allocator.
10012

10013
    The checks for the completeness of the opcode must have already been
10014
    done.
10015

10016
    """
10017
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10018

    
10019
    if self.disk_template in constants.DTS_NET_MIRROR:
10020
      self.required_nodes = 2
10021
    else:
10022
      self.required_nodes = 1
10023
    request = {
10024
      "name": self.name,
10025
      "disk_template": self.disk_template,
10026
      "tags": self.tags,
10027
      "os": self.os,
10028
      "vcpus": self.vcpus,
10029
      "memory": self.mem_size,
10030
      "disks": self.disks,
10031
      "disk_space_total": disk_space,
10032
      "nics": self.nics,
10033
      "required_nodes": self.required_nodes,
10034
      }
10035
    return request
10036

    
10037
  def _AddRelocateInstance(self):
10038
    """Add relocate instance data to allocator structure.
10039

10040
    This in combination with _IAllocatorGetClusterData will create the
10041
    correct structure needed as input for the allocator.
10042

10043
    The checks for the completeness of the opcode must have already been
10044
    done.
10045

10046
    """
10047
    instance = self.cfg.GetInstanceInfo(self.name)
10048
    if instance is None:
10049
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
10050
                                   " IAllocator" % self.name)
10051

    
10052
    if instance.disk_template not in constants.DTS_NET_MIRROR:
10053
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10054
                                 errors.ECODE_INVAL)
10055

    
10056
    if len(instance.secondary_nodes) != 1:
10057
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
10058
                                 errors.ECODE_STATE)
10059

    
10060
    self.required_nodes = 1
10061
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
10062
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10063

    
10064
    request = {
10065
      "name": self.name,
10066
      "disk_space_total": disk_space,
10067
      "required_nodes": self.required_nodes,
10068
      "relocate_from": self.relocate_from,
10069
      }
10070
    return request
10071

    
10072
  def _AddEvacuateNodes(self):
10073
    """Add evacuate nodes data to allocator structure.
10074

10075
    """
10076
    request = {
10077
      "evac_nodes": self.evac_nodes
10078
      }
10079
    return request
10080

    
10081
  def _BuildInputData(self, fn):
10082
    """Build input data structures.
10083

10084
    """
10085
    self._ComputeClusterData()
10086

    
10087
    request = fn()
10088
    request["type"] = self.mode
10089
    self.in_data["request"] = request
10090

    
10091
    self.in_text = serializer.Dump(self.in_data)
10092

    
10093
  def Run(self, name, validate=True, call_fn=None):
10094
    """Run an instance allocator and return the results.
10095

10096
    """
10097
    if call_fn is None:
10098
      call_fn = self.rpc.call_iallocator_runner
10099

    
10100
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10101
    result.Raise("Failure while running the iallocator script")
10102

    
10103
    self.out_text = result.payload
10104
    if validate:
10105
      self._ValidateResult()
10106

    
10107
  def _ValidateResult(self):
10108
    """Process the allocator results.
10109

10110
    This will process and if successful save the result in
10111
    self.out_data and the other parameters.
10112

10113
    """
10114
    try:
10115
      rdict = serializer.Load(self.out_text)
10116
    except Exception, err:
10117
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10118

    
10119
    if not isinstance(rdict, dict):
10120
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10121

    
10122
    # TODO: remove backwards compatiblity in later versions
10123
    if "nodes" in rdict and "result" not in rdict:
10124
      rdict["result"] = rdict["nodes"]
10125
      del rdict["nodes"]
10126

    
10127
    for key in "success", "info", "result":
10128
      if key not in rdict:
10129
        raise errors.OpExecError("Can't parse iallocator results:"
10130
                                 " missing key '%s'" % key)
10131
      setattr(self, key, rdict[key])
10132

    
10133
    if not isinstance(rdict["result"], list):
10134
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10135
                               " is not a list")
10136
    self.out_data = rdict
10137

    
10138

    
10139
class LUTestAllocator(NoHooksLU):
10140
  """Run allocator tests.
10141

10142
  This LU runs the allocator tests
10143

10144
  """
10145
  _OP_PARAMS = [
10146
    ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10147
    ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10148
    ("name", _NoDefault, _TNonEmptyString),
10149
    ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10150
      _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10151
               _TOr(_TNone, _TNonEmptyString))))),
10152
    ("disks", _NoDefault, _TOr(_TNone, _TList)),
10153
    ("hypervisor", None, _TMaybeString),
10154
    ("allocator", None, _TMaybeString),
10155
    ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10156
    ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10157
    ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10158
    ("os", None, _TMaybeString),
10159
    ("disk_template", None, _TMaybeString),
10160
    ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10161
    ]
10162

    
10163
  def CheckPrereq(self):
10164
    """Check prerequisites.
10165

10166
    This checks the opcode parameters depending on the director and mode test.
10167

10168
    """
10169
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10170
      for attr in ["mem_size", "disks", "disk_template",
10171
                   "os", "tags", "nics", "vcpus"]:
10172
        if not hasattr(self.op, attr):
10173
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10174
                                     attr, errors.ECODE_INVAL)
10175
      iname = self.cfg.ExpandInstanceName(self.op.name)
10176
      if iname is not None:
10177
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10178
                                   iname, errors.ECODE_EXISTS)
10179
      if not isinstance(self.op.nics, list):
10180
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10181
                                   errors.ECODE_INVAL)
10182
      if not isinstance(self.op.disks, list):
10183
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10184
                                   errors.ECODE_INVAL)
10185
      for row in self.op.disks:
10186
        if (not isinstance(row, dict) or
10187
            "size" not in row or
10188
            not isinstance(row["size"], int) or
10189
            "mode" not in row or
10190
            row["mode"] not in ['r', 'w']):
10191
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10192
                                     " parameter", errors.ECODE_INVAL)
10193
      if self.op.hypervisor is None:
10194
        self.op.hypervisor = self.cfg.GetHypervisorType()
10195
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10196
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10197
      self.op.name = fname
10198
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10199
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10200
      if not hasattr(self.op, "evac_nodes"):
10201
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10202
                                   " opcode input", errors.ECODE_INVAL)
10203
    else:
10204
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10205
                                 self.op.mode, errors.ECODE_INVAL)
10206

    
10207
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10208
      if self.op.allocator is None:
10209
        raise errors.OpPrereqError("Missing allocator name",
10210
                                   errors.ECODE_INVAL)
10211
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10212
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10213
                                 self.op.direction, errors.ECODE_INVAL)
10214

    
10215
  def Exec(self, feedback_fn):
10216
    """Run the allocator test.
10217

10218
    """
10219
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10220
      ial = IAllocator(self.cfg, self.rpc,
10221
                       mode=self.op.mode,
10222
                       name=self.op.name,
10223
                       mem_size=self.op.mem_size,
10224
                       disks=self.op.disks,
10225
                       disk_template=self.op.disk_template,
10226
                       os=self.op.os,
10227
                       tags=self.op.tags,
10228
                       nics=self.op.nics,
10229
                       vcpus=self.op.vcpus,
10230
                       hypervisor=self.op.hypervisor,
10231
                       )
10232
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10233
      ial = IAllocator(self.cfg, self.rpc,
10234
                       mode=self.op.mode,
10235
                       name=self.op.name,
10236
                       relocate_from=list(self.relocate_from),
10237
                       )
10238
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10239
      ial = IAllocator(self.cfg, self.rpc,
10240
                       mode=self.op.mode,
10241
                       evac_nodes=self.op.evac_nodes)
10242
    else:
10243
      raise errors.ProgrammerError("Uncatched mode %s in"
10244
                                   " LUTestAllocator.Exec", self.op.mode)
10245

    
10246
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10247
      result = ial.in_text
10248
    else:
10249
      ial.Run(self.op.allocator, validate=False)
10250
      result = ial.out_text
10251
    return result