Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ b6dd40f5

History | View | Annotate | Download (358.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39

    
40
from ganeti import ssh
41
from ganeti import utils
42
from ganeti import errors
43
from ganeti import hypervisor
44
from ganeti import locking
45
from ganeti import constants
46
from ganeti import objects
47
from ganeti import serializer
48
from ganeti import ssconf
49
from ganeti import uidpool
50
from ganeti import compat
51
from ganeti import masterd
52

    
53
import ganeti.masterd.instance # pylint: disable-msg=W0611
54

    
55

    
56
# Modifiable default values; need to define these here before the
57
# actual LUs
58

    
59
def _EmptyList():
60
  """Returns an empty list.
61

62
  """
63
  return []
64

    
65

    
66
def _EmptyDict():
67
  """Returns an empty dict.
68

69
  """
70
  return {}
71

    
72

    
73
#: The without-default default value
74
_NoDefault = object()
75

    
76

    
77
#: The no-type (value to complex to check it in the type system)
78
_NoType = object()
79

    
80

    
81
# Some basic types
82
def _TNotNone(val):
83
  """Checks if the given value is not None.
84

85
  """
86
  return val is not None
87

    
88

    
89
def _TNone(val):
90
  """Checks if the given value is None.
91

92
  """
93
  return val is None
94

    
95

    
96
def _TBool(val):
97
  """Checks if the given value is a boolean.
98

99
  """
100
  return isinstance(val, bool)
101

    
102

    
103
def _TInt(val):
104
  """Checks if the given value is an integer.
105

106
  """
107
  return isinstance(val, int)
108

    
109

    
110
def _TFloat(val):
111
  """Checks if the given value is a float.
112

113
  """
114
  return isinstance(val, float)
115

    
116

    
117
def _TString(val):
118
  """Checks if the given value is a string.
119

120
  """
121
  return isinstance(val, basestring)
122

    
123

    
124
def _TTrue(val):
125
  """Checks if a given value evaluates to a boolean True value.
126

127
  """
128
  return bool(val)
129

    
130

    
131
def _TElemOf(target_list):
132
  """Builds a function that checks if a given value is a member of a list.
133

134
  """
135
  return lambda val: val in target_list
136

    
137

    
138
# Container types
139
def _TList(val):
140
  """Checks if the given value is a list.
141

142
  """
143
  return isinstance(val, list)
144

    
145

    
146
def _TDict(val):
147
  """Checks if the given value is a dictionary.
148

149
  """
150
  return isinstance(val, dict)
151

    
152

    
153
# Combinator types
154
def _TAnd(*args):
155
  """Combine multiple functions using an AND operation.
156

157
  """
158
  def fn(val):
159
    return compat.all(t(val) for t in args)
160
  return fn
161

    
162

    
163
def _TOr(*args):
164
  """Combine multiple functions using an AND operation.
165

166
  """
167
  def fn(val):
168
    return compat.any(t(val) for t in args)
169
  return fn
170

    
171

    
172
# Type aliases
173

    
174
#: a non-empty string
175
_TNonEmptyString = _TAnd(_TString, _TTrue)
176

    
177

    
178
#: a maybe non-empty string
179
_TMaybeString = _TOr(_TNonEmptyString, _TNone)
180

    
181

    
182
#: a maybe boolean (bool or none)
183
_TMaybeBool = _TOr(_TBool, _TNone)
184

    
185

    
186
#: a positive integer
187
_TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
188

    
189
#: a strictly positive integer
190
_TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
191

    
192

    
193
def _TListOf(my_type):
194
  """Checks if a given value is a list with all elements of the same type.
195

196
  """
197
  return _TAnd(_TList,
198
               lambda lst: compat.all(my_type(v) for v in lst))
199

    
200

    
201
def _TDictOf(key_type, val_type):
202
  """Checks a dict type for the type of its key/values.
203

204
  """
205
  return _TAnd(_TDict,
206
               lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
207
                                and compat.all(val_type(v)
208
                                               for v in my_dict.values())))
209

    
210

    
211
# Common opcode attributes
212

    
213
#: output fields for a query operation
214
_POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
215

    
216

    
217
#: the shutdown timeout
218
_PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
219
                     _TPositiveInt)
220

    
221
#: the force parameter
222
_PForce = ("force", False, _TBool)
223

    
224
#: a required instance name (for single-instance LUs)
225
_PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
226

    
227

    
228
#: a required node name (for single-node LUs)
229
_PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
230

    
231

    
232
# End types
233
class LogicalUnit(object):
234
  """Logical Unit base class.
235

236
  Subclasses must follow these rules:
237
    - implement ExpandNames
238
    - implement CheckPrereq (except when tasklets are used)
239
    - implement Exec (except when tasklets are used)
240
    - implement BuildHooksEnv
241
    - redefine HPATH and HTYPE
242
    - optionally redefine their run requirements:
243
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
244

245
  Note that all commands require root permissions.
246

247
  @ivar dry_run_result: the value (if any) that will be returned to the caller
248
      in dry-run mode (signalled by opcode dry_run parameter)
249
  @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
250
      they should get if not already defined, and types they must match
251

252
  """
253
  HPATH = None
254
  HTYPE = None
255
  _OP_PARAMS = []
256
  REQ_BGL = True
257

    
258
  def __init__(self, processor, op, context, rpc):
259
    """Constructor for LogicalUnit.
260

261
    This needs to be overridden in derived classes in order to check op
262
    validity.
263

264
    """
265
    self.proc = processor
266
    self.op = op
267
    self.cfg = context.cfg
268
    self.context = context
269
    self.rpc = rpc
270
    # Dicts used to declare locking needs to mcpu
271
    self.needed_locks = None
272
    self.acquired_locks = {}
273
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
274
    self.add_locks = {}
275
    self.remove_locks = {}
276
    # Used to force good behavior when calling helper functions
277
    self.recalculate_locks = {}
278
    self.__ssh = None
279
    # logging
280
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
281
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
282
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
283
    # support for dry-run
284
    self.dry_run_result = None
285
    # support for generic debug attribute
286
    if (not hasattr(self.op, "debug_level") or
287
        not isinstance(self.op.debug_level, int)):
288
      self.op.debug_level = 0
289

    
290
    # Tasklets
291
    self.tasklets = None
292

    
293
    # The new kind-of-type-system
294
    op_id = self.op.OP_ID
295
    for attr_name, aval, test in self._OP_PARAMS:
296
      if not hasattr(op, attr_name):
297
        if aval == _NoDefault:
298
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
299
                                     (op_id, attr_name), errors.ECODE_INVAL)
300
        else:
301
          if callable(aval):
302
            dval = aval()
303
          else:
304
            dval = aval
305
          setattr(self.op, attr_name, dval)
306
      attr_val = getattr(op, attr_name)
307
      if test == _NoType:
308
        # no tests here
309
        continue
310
      if not callable(test):
311
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
312
                                     " given type is not a proper type (%s)" %
313
                                     (op_id, attr_name, test))
314
      if not test(attr_val):
315
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
316
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
317
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
318
                                   (op_id, attr_name), errors.ECODE_INVAL)
319

    
320
    self.CheckArguments()
321

    
322
  def __GetSSH(self):
323
    """Returns the SshRunner object
324

325
    """
326
    if not self.__ssh:
327
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
328
    return self.__ssh
329

    
330
  ssh = property(fget=__GetSSH)
331

    
332
  def CheckArguments(self):
333
    """Check syntactic validity for the opcode arguments.
334

335
    This method is for doing a simple syntactic check and ensure
336
    validity of opcode parameters, without any cluster-related
337
    checks. While the same can be accomplished in ExpandNames and/or
338
    CheckPrereq, doing these separate is better because:
339

340
      - ExpandNames is left as as purely a lock-related function
341
      - CheckPrereq is run after we have acquired locks (and possible
342
        waited for them)
343

344
    The function is allowed to change the self.op attribute so that
345
    later methods can no longer worry about missing parameters.
346

347
    """
348
    pass
349

    
350
  def ExpandNames(self):
351
    """Expand names for this LU.
352

353
    This method is called before starting to execute the opcode, and it should
354
    update all the parameters of the opcode to their canonical form (e.g. a
355
    short node name must be fully expanded after this method has successfully
356
    completed). This way locking, hooks, logging, ecc. can work correctly.
357

358
    LUs which implement this method must also populate the self.needed_locks
359
    member, as a dict with lock levels as keys, and a list of needed lock names
360
    as values. Rules:
361

362
      - use an empty dict if you don't need any lock
363
      - if you don't need any lock at a particular level omit that level
364
      - don't put anything for the BGL level
365
      - if you want all locks at a level use locking.ALL_SET as a value
366

367
    If you need to share locks (rather than acquire them exclusively) at one
368
    level you can modify self.share_locks, setting a true value (usually 1) for
369
    that level. By default locks are not shared.
370

371
    This function can also define a list of tasklets, which then will be
372
    executed in order instead of the usual LU-level CheckPrereq and Exec
373
    functions, if those are not defined by the LU.
374

375
    Examples::
376

377
      # Acquire all nodes and one instance
378
      self.needed_locks = {
379
        locking.LEVEL_NODE: locking.ALL_SET,
380
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
381
      }
382
      # Acquire just two nodes
383
      self.needed_locks = {
384
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
385
      }
386
      # Acquire no locks
387
      self.needed_locks = {} # No, you can't leave it to the default value None
388

389
    """
390
    # The implementation of this method is mandatory only if the new LU is
391
    # concurrent, so that old LUs don't need to be changed all at the same
392
    # time.
393
    if self.REQ_BGL:
394
      self.needed_locks = {} # Exclusive LUs don't need locks.
395
    else:
396
      raise NotImplementedError
397

    
398
  def DeclareLocks(self, level):
399
    """Declare LU locking needs for a level
400

401
    While most LUs can just declare their locking needs at ExpandNames time,
402
    sometimes there's the need to calculate some locks after having acquired
403
    the ones before. This function is called just before acquiring locks at a
404
    particular level, but after acquiring the ones at lower levels, and permits
405
    such calculations. It can be used to modify self.needed_locks, and by
406
    default it does nothing.
407

408
    This function is only called if you have something already set in
409
    self.needed_locks for the level.
410

411
    @param level: Locking level which is going to be locked
412
    @type level: member of ganeti.locking.LEVELS
413

414
    """
415

    
416
  def CheckPrereq(self):
417
    """Check prerequisites for this LU.
418

419
    This method should check that the prerequisites for the execution
420
    of this LU are fulfilled. It can do internode communication, but
421
    it should be idempotent - no cluster or system changes are
422
    allowed.
423

424
    The method should raise errors.OpPrereqError in case something is
425
    not fulfilled. Its return value is ignored.
426

427
    This method should also update all the parameters of the opcode to
428
    their canonical form if it hasn't been done by ExpandNames before.
429

430
    """
431
    if self.tasklets is not None:
432
      for (idx, tl) in enumerate(self.tasklets):
433
        logging.debug("Checking prerequisites for tasklet %s/%s",
434
                      idx + 1, len(self.tasklets))
435
        tl.CheckPrereq()
436
    else:
437
      pass
438

    
439
  def Exec(self, feedback_fn):
440
    """Execute the LU.
441

442
    This method should implement the actual work. It should raise
443
    errors.OpExecError for failures that are somewhat dealt with in
444
    code, or expected.
445

446
    """
447
    if self.tasklets is not None:
448
      for (idx, tl) in enumerate(self.tasklets):
449
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
450
        tl.Exec(feedback_fn)
451
    else:
452
      raise NotImplementedError
453

    
454
  def BuildHooksEnv(self):
455
    """Build hooks environment for this LU.
456

457
    This method should return a three-node tuple consisting of: a dict
458
    containing the environment that will be used for running the
459
    specific hook for this LU, a list of node names on which the hook
460
    should run before the execution, and a list of node names on which
461
    the hook should run after the execution.
462

463
    The keys of the dict must not have 'GANETI_' prefixed as this will
464
    be handled in the hooks runner. Also note additional keys will be
465
    added by the hooks runner. If the LU doesn't define any
466
    environment, an empty dict (and not None) should be returned.
467

468
    No nodes should be returned as an empty list (and not None).
469

470
    Note that if the HPATH for a LU class is None, this function will
471
    not be called.
472

473
    """
474
    raise NotImplementedError
475

    
476
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
477
    """Notify the LU about the results of its hooks.
478

479
    This method is called every time a hooks phase is executed, and notifies
480
    the Logical Unit about the hooks' result. The LU can then use it to alter
481
    its result based on the hooks.  By default the method does nothing and the
482
    previous result is passed back unchanged but any LU can define it if it
483
    wants to use the local cluster hook-scripts somehow.
484

485
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
486
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
487
    @param hook_results: the results of the multi-node hooks rpc call
488
    @param feedback_fn: function used send feedback back to the caller
489
    @param lu_result: the previous Exec result this LU had, or None
490
        in the PRE phase
491
    @return: the new Exec result, based on the previous result
492
        and hook results
493

494
    """
495
    # API must be kept, thus we ignore the unused argument and could
496
    # be a function warnings
497
    # pylint: disable-msg=W0613,R0201
498
    return lu_result
499

    
500
  def _ExpandAndLockInstance(self):
501
    """Helper function to expand and lock an instance.
502

503
    Many LUs that work on an instance take its name in self.op.instance_name
504
    and need to expand it and then declare the expanded name for locking. This
505
    function does it, and then updates self.op.instance_name to the expanded
506
    name. It also initializes needed_locks as a dict, if this hasn't been done
507
    before.
508

509
    """
510
    if self.needed_locks is None:
511
      self.needed_locks = {}
512
    else:
513
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
514
        "_ExpandAndLockInstance called with instance-level locks set"
515
    self.op.instance_name = _ExpandInstanceName(self.cfg,
516
                                                self.op.instance_name)
517
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
518

    
519
  def _LockInstancesNodes(self, primary_only=False):
520
    """Helper function to declare instances' nodes for locking.
521

522
    This function should be called after locking one or more instances to lock
523
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
524
    with all primary or secondary nodes for instances already locked and
525
    present in self.needed_locks[locking.LEVEL_INSTANCE].
526

527
    It should be called from DeclareLocks, and for safety only works if
528
    self.recalculate_locks[locking.LEVEL_NODE] is set.
529

530
    In the future it may grow parameters to just lock some instance's nodes, or
531
    to just lock primaries or secondary nodes, if needed.
532

533
    If should be called in DeclareLocks in a way similar to::
534

535
      if level == locking.LEVEL_NODE:
536
        self._LockInstancesNodes()
537

538
    @type primary_only: boolean
539
    @param primary_only: only lock primary nodes of locked instances
540

541
    """
542
    assert locking.LEVEL_NODE in self.recalculate_locks, \
543
      "_LockInstancesNodes helper function called with no nodes to recalculate"
544

    
545
    # TODO: check if we're really been called with the instance locks held
546

    
547
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
548
    # future we might want to have different behaviors depending on the value
549
    # of self.recalculate_locks[locking.LEVEL_NODE]
550
    wanted_nodes = []
551
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
552
      instance = self.context.cfg.GetInstanceInfo(instance_name)
553
      wanted_nodes.append(instance.primary_node)
554
      if not primary_only:
555
        wanted_nodes.extend(instance.secondary_nodes)
556

    
557
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
558
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
559
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
560
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
561

    
562
    del self.recalculate_locks[locking.LEVEL_NODE]
563

    
564

    
565
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
566
  """Simple LU which runs no hooks.
567

568
  This LU is intended as a parent for other LogicalUnits which will
569
  run no hooks, in order to reduce duplicate code.
570

571
  """
572
  HPATH = None
573
  HTYPE = None
574

    
575
  def BuildHooksEnv(self):
576
    """Empty BuildHooksEnv for NoHooksLu.
577

578
    This just raises an error.
579

580
    """
581
    assert False, "BuildHooksEnv called for NoHooksLUs"
582

    
583

    
584
class Tasklet:
585
  """Tasklet base class.
586

587
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
588
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
589
  tasklets know nothing about locks.
590

591
  Subclasses must follow these rules:
592
    - Implement CheckPrereq
593
    - Implement Exec
594

595
  """
596
  def __init__(self, lu):
597
    self.lu = lu
598

    
599
    # Shortcuts
600
    self.cfg = lu.cfg
601
    self.rpc = lu.rpc
602

    
603
  def CheckPrereq(self):
604
    """Check prerequisites for this tasklets.
605

606
    This method should check whether the prerequisites for the execution of
607
    this tasklet are fulfilled. It can do internode communication, but it
608
    should be idempotent - no cluster or system changes are allowed.
609

610
    The method should raise errors.OpPrereqError in case something is not
611
    fulfilled. Its return value is ignored.
612

613
    This method should also update all parameters to their canonical form if it
614
    hasn't been done before.
615

616
    """
617
    pass
618

    
619
  def Exec(self, feedback_fn):
620
    """Execute the tasklet.
621

622
    This method should implement the actual work. It should raise
623
    errors.OpExecError for failures that are somewhat dealt with in code, or
624
    expected.
625

626
    """
627
    raise NotImplementedError
628

    
629

    
630
def _GetWantedNodes(lu, nodes):
631
  """Returns list of checked and expanded node names.
632

633
  @type lu: L{LogicalUnit}
634
  @param lu: the logical unit on whose behalf we execute
635
  @type nodes: list
636
  @param nodes: list of node names or None for all nodes
637
  @rtype: list
638
  @return: the list of nodes, sorted
639
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
640

641
  """
642
  if not nodes:
643
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
644
      " non-empty list of nodes whose name is to be expanded.")
645

    
646
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
647
  return utils.NiceSort(wanted)
648

    
649

    
650
def _GetWantedInstances(lu, instances):
651
  """Returns list of checked and expanded instance names.
652

653
  @type lu: L{LogicalUnit}
654
  @param lu: the logical unit on whose behalf we execute
655
  @type instances: list
656
  @param instances: list of instance names or None for all instances
657
  @rtype: list
658
  @return: the list of instances, sorted
659
  @raise errors.OpPrereqError: if the instances parameter is wrong type
660
  @raise errors.OpPrereqError: if any of the passed instances is not found
661

662
  """
663
  if instances:
664
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
665
  else:
666
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
667
  return wanted
668

    
669

    
670
def _GetUpdatedParams(old_params, update_dict,
671
                      use_default=True, use_none=False):
672
  """Return the new version of a parameter dictionary.
673

674
  @type old_params: dict
675
  @param old_params: old parameters
676
  @type update_dict: dict
677
  @param update_dict: dict containing new parameter values, or
678
      constants.VALUE_DEFAULT to reset the parameter to its default
679
      value
680
  @param use_default: boolean
681
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
682
      values as 'to be deleted' values
683
  @param use_none: boolean
684
  @type use_none: whether to recognise C{None} values as 'to be
685
      deleted' values
686
  @rtype: dict
687
  @return: the new parameter dictionary
688

689
  """
690
  params_copy = copy.deepcopy(old_params)
691
  for key, val in update_dict.iteritems():
692
    if ((use_default and val == constants.VALUE_DEFAULT) or
693
        (use_none and val is None)):
694
      try:
695
        del params_copy[key]
696
      except KeyError:
697
        pass
698
    else:
699
      params_copy[key] = val
700
  return params_copy
701

    
702

    
703
def _CheckOutputFields(static, dynamic, selected):
704
  """Checks whether all selected fields are valid.
705

706
  @type static: L{utils.FieldSet}
707
  @param static: static fields set
708
  @type dynamic: L{utils.FieldSet}
709
  @param dynamic: dynamic fields set
710

711
  """
712
  f = utils.FieldSet()
713
  f.Extend(static)
714
  f.Extend(dynamic)
715

    
716
  delta = f.NonMatching(selected)
717
  if delta:
718
    raise errors.OpPrereqError("Unknown output fields selected: %s"
719
                               % ",".join(delta), errors.ECODE_INVAL)
720

    
721

    
722
def _CheckGlobalHvParams(params):
723
  """Validates that given hypervisor params are not global ones.
724

725
  This will ensure that instances don't get customised versions of
726
  global params.
727

728
  """
729
  used_globals = constants.HVC_GLOBALS.intersection(params)
730
  if used_globals:
731
    msg = ("The following hypervisor parameters are global and cannot"
732
           " be customized at instance level, please modify them at"
733
           " cluster level: %s" % utils.CommaJoin(used_globals))
734
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
735

    
736

    
737
def _CheckNodeOnline(lu, node):
738
  """Ensure that a given node is online.
739

740
  @param lu: the LU on behalf of which we make the check
741
  @param node: the node to check
742
  @raise errors.OpPrereqError: if the node is offline
743

744
  """
745
  if lu.cfg.GetNodeInfo(node).offline:
746
    raise errors.OpPrereqError("Can't use offline node %s" % node,
747
                               errors.ECODE_INVAL)
748

    
749

    
750
def _CheckNodeNotDrained(lu, node):
751
  """Ensure that a given node is not drained.
752

753
  @param lu: the LU on behalf of which we make the check
754
  @param node: the node to check
755
  @raise errors.OpPrereqError: if the node is drained
756

757
  """
758
  if lu.cfg.GetNodeInfo(node).drained:
759
    raise errors.OpPrereqError("Can't use drained node %s" % node,
760
                               errors.ECODE_INVAL)
761

    
762

    
763
def _CheckNodeHasOS(lu, node, os_name, force_variant):
764
  """Ensure that a node supports a given OS.
765

766
  @param lu: the LU on behalf of which we make the check
767
  @param node: the node to check
768
  @param os_name: the OS to query about
769
  @param force_variant: whether to ignore variant errors
770
  @raise errors.OpPrereqError: if the node is not supporting the OS
771

772
  """
773
  result = lu.rpc.call_os_get(node, os_name)
774
  result.Raise("OS '%s' not in supported OS list for node %s" %
775
               (os_name, node),
776
               prereq=True, ecode=errors.ECODE_INVAL)
777
  if not force_variant:
778
    _CheckOSVariant(result.payload, os_name)
779

    
780

    
781
def _RequireFileStorage():
782
  """Checks that file storage is enabled.
783

784
  @raise errors.OpPrereqError: when file storage is disabled
785

786
  """
787
  if not constants.ENABLE_FILE_STORAGE:
788
    raise errors.OpPrereqError("File storage disabled at configure time",
789
                               errors.ECODE_INVAL)
790

    
791

    
792
def _CheckDiskTemplate(template):
793
  """Ensure a given disk template is valid.
794

795
  """
796
  if template not in constants.DISK_TEMPLATES:
797
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
798
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
799
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
800
  if template == constants.DT_FILE:
801
    _RequireFileStorage()
802
  return True
803

    
804

    
805
def _CheckStorageType(storage_type):
806
  """Ensure a given storage type is valid.
807

808
  """
809
  if storage_type not in constants.VALID_STORAGE_TYPES:
810
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
811
                               errors.ECODE_INVAL)
812
  if storage_type == constants.ST_FILE:
813
    _RequireFileStorage()
814
  return True
815

    
816

    
817
def _GetClusterDomainSecret():
818
  """Reads the cluster domain secret.
819

820
  """
821
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
822
                               strict=True)
823

    
824

    
825
def _CheckInstanceDown(lu, instance, reason):
826
  """Ensure that an instance is not running."""
827
  if instance.admin_up:
828
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
829
                               (instance.name, reason), errors.ECODE_STATE)
830

    
831
  pnode = instance.primary_node
832
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
833
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
834
              prereq=True, ecode=errors.ECODE_ENVIRON)
835

    
836
  if instance.name in ins_l.payload:
837
    raise errors.OpPrereqError("Instance %s is running, %s" %
838
                               (instance.name, reason), errors.ECODE_STATE)
839

    
840

    
841
def _ExpandItemName(fn, name, kind):
842
  """Expand an item name.
843

844
  @param fn: the function to use for expansion
845
  @param name: requested item name
846
  @param kind: text description ('Node' or 'Instance')
847
  @return: the resolved (full) name
848
  @raise errors.OpPrereqError: if the item is not found
849

850
  """
851
  full_name = fn(name)
852
  if full_name is None:
853
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
854
                               errors.ECODE_NOENT)
855
  return full_name
856

    
857

    
858
def _ExpandNodeName(cfg, name):
859
  """Wrapper over L{_ExpandItemName} for nodes."""
860
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
861

    
862

    
863
def _ExpandInstanceName(cfg, name):
864
  """Wrapper over L{_ExpandItemName} for instance."""
865
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
866

    
867

    
868
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
869
                          memory, vcpus, nics, disk_template, disks,
870
                          bep, hvp, hypervisor_name):
871
  """Builds instance related env variables for hooks
872

873
  This builds the hook environment from individual variables.
874

875
  @type name: string
876
  @param name: the name of the instance
877
  @type primary_node: string
878
  @param primary_node: the name of the instance's primary node
879
  @type secondary_nodes: list
880
  @param secondary_nodes: list of secondary nodes as strings
881
  @type os_type: string
882
  @param os_type: the name of the instance's OS
883
  @type status: boolean
884
  @param status: the should_run status of the instance
885
  @type memory: string
886
  @param memory: the memory size of the instance
887
  @type vcpus: string
888
  @param vcpus: the count of VCPUs the instance has
889
  @type nics: list
890
  @param nics: list of tuples (ip, mac, mode, link) representing
891
      the NICs the instance has
892
  @type disk_template: string
893
  @param disk_template: the disk template of the instance
894
  @type disks: list
895
  @param disks: the list of (size, mode) pairs
896
  @type bep: dict
897
  @param bep: the backend parameters for the instance
898
  @type hvp: dict
899
  @param hvp: the hypervisor parameters for the instance
900
  @type hypervisor_name: string
901
  @param hypervisor_name: the hypervisor for the instance
902
  @rtype: dict
903
  @return: the hook environment for this instance
904

905
  """
906
  if status:
907
    str_status = "up"
908
  else:
909
    str_status = "down"
910
  env = {
911
    "OP_TARGET": name,
912
    "INSTANCE_NAME": name,
913
    "INSTANCE_PRIMARY": primary_node,
914
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
915
    "INSTANCE_OS_TYPE": os_type,
916
    "INSTANCE_STATUS": str_status,
917
    "INSTANCE_MEMORY": memory,
918
    "INSTANCE_VCPUS": vcpus,
919
    "INSTANCE_DISK_TEMPLATE": disk_template,
920
    "INSTANCE_HYPERVISOR": hypervisor_name,
921
  }
922

    
923
  if nics:
924
    nic_count = len(nics)
925
    for idx, (ip, mac, mode, link) in enumerate(nics):
926
      if ip is None:
927
        ip = ""
928
      env["INSTANCE_NIC%d_IP" % idx] = ip
929
      env["INSTANCE_NIC%d_MAC" % idx] = mac
930
      env["INSTANCE_NIC%d_MODE" % idx] = mode
931
      env["INSTANCE_NIC%d_LINK" % idx] = link
932
      if mode == constants.NIC_MODE_BRIDGED:
933
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
934
  else:
935
    nic_count = 0
936

    
937
  env["INSTANCE_NIC_COUNT"] = nic_count
938

    
939
  if disks:
940
    disk_count = len(disks)
941
    for idx, (size, mode) in enumerate(disks):
942
      env["INSTANCE_DISK%d_SIZE" % idx] = size
943
      env["INSTANCE_DISK%d_MODE" % idx] = mode
944
  else:
945
    disk_count = 0
946

    
947
  env["INSTANCE_DISK_COUNT"] = disk_count
948

    
949
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
950
    for key, value in source.items():
951
      env["INSTANCE_%s_%s" % (kind, key)] = value
952

    
953
  return env
954

    
955

    
956
def _NICListToTuple(lu, nics):
957
  """Build a list of nic information tuples.
958

959
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
960
  value in LUQueryInstanceData.
961

962
  @type lu:  L{LogicalUnit}
963
  @param lu: the logical unit on whose behalf we execute
964
  @type nics: list of L{objects.NIC}
965
  @param nics: list of nics to convert to hooks tuples
966

967
  """
968
  hooks_nics = []
969
  cluster = lu.cfg.GetClusterInfo()
970
  for nic in nics:
971
    ip = nic.ip
972
    mac = nic.mac
973
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
974
    mode = filled_params[constants.NIC_MODE]
975
    link = filled_params[constants.NIC_LINK]
976
    hooks_nics.append((ip, mac, mode, link))
977
  return hooks_nics
978

    
979

    
980
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
981
  """Builds instance related env variables for hooks from an object.
982

983
  @type lu: L{LogicalUnit}
984
  @param lu: the logical unit on whose behalf we execute
985
  @type instance: L{objects.Instance}
986
  @param instance: the instance for which we should build the
987
      environment
988
  @type override: dict
989
  @param override: dictionary with key/values that will override
990
      our values
991
  @rtype: dict
992
  @return: the hook environment dictionary
993

994
  """
995
  cluster = lu.cfg.GetClusterInfo()
996
  bep = cluster.FillBE(instance)
997
  hvp = cluster.FillHV(instance)
998
  args = {
999
    'name': instance.name,
1000
    'primary_node': instance.primary_node,
1001
    'secondary_nodes': instance.secondary_nodes,
1002
    'os_type': instance.os,
1003
    'status': instance.admin_up,
1004
    'memory': bep[constants.BE_MEMORY],
1005
    'vcpus': bep[constants.BE_VCPUS],
1006
    'nics': _NICListToTuple(lu, instance.nics),
1007
    'disk_template': instance.disk_template,
1008
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1009
    'bep': bep,
1010
    'hvp': hvp,
1011
    'hypervisor_name': instance.hypervisor,
1012
  }
1013
  if override:
1014
    args.update(override)
1015
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1016

    
1017

    
1018
def _AdjustCandidatePool(lu, exceptions):
1019
  """Adjust the candidate pool after node operations.
1020

1021
  """
1022
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1023
  if mod_list:
1024
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1025
               utils.CommaJoin(node.name for node in mod_list))
1026
    for name in mod_list:
1027
      lu.context.ReaddNode(name)
1028
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1029
  if mc_now > mc_max:
1030
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1031
               (mc_now, mc_max))
1032

    
1033

    
1034
def _DecideSelfPromotion(lu, exceptions=None):
1035
  """Decide whether I should promote myself as a master candidate.
1036

1037
  """
1038
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1039
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1040
  # the new node will increase mc_max with one, so:
1041
  mc_should = min(mc_should + 1, cp_size)
1042
  return mc_now < mc_should
1043

    
1044

    
1045
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1046
  """Check that the brigdes needed by a list of nics exist.
1047

1048
  """
1049
  cluster = lu.cfg.GetClusterInfo()
1050
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1051
  brlist = [params[constants.NIC_LINK] for params in paramslist
1052
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1053
  if brlist:
1054
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1055
    result.Raise("Error checking bridges on destination node '%s'" %
1056
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1057

    
1058

    
1059
def _CheckInstanceBridgesExist(lu, instance, node=None):
1060
  """Check that the brigdes needed by an instance exist.
1061

1062
  """
1063
  if node is None:
1064
    node = instance.primary_node
1065
  _CheckNicsBridgesExist(lu, instance.nics, node)
1066

    
1067

    
1068
def _CheckOSVariant(os_obj, name):
1069
  """Check whether an OS name conforms to the os variants specification.
1070

1071
  @type os_obj: L{objects.OS}
1072
  @param os_obj: OS object to check
1073
  @type name: string
1074
  @param name: OS name passed by the user, to check for validity
1075

1076
  """
1077
  if not os_obj.supported_variants:
1078
    return
1079
  try:
1080
    variant = name.split("+", 1)[1]
1081
  except IndexError:
1082
    raise errors.OpPrereqError("OS name must include a variant",
1083
                               errors.ECODE_INVAL)
1084

    
1085
  if variant not in os_obj.supported_variants:
1086
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1087

    
1088

    
1089
def _GetNodeInstancesInner(cfg, fn):
1090
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1091

    
1092

    
1093
def _GetNodeInstances(cfg, node_name):
1094
  """Returns a list of all primary and secondary instances on a node.
1095

1096
  """
1097

    
1098
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1099

    
1100

    
1101
def _GetNodePrimaryInstances(cfg, node_name):
1102
  """Returns primary instances on a node.
1103

1104
  """
1105
  return _GetNodeInstancesInner(cfg,
1106
                                lambda inst: node_name == inst.primary_node)
1107

    
1108

    
1109
def _GetNodeSecondaryInstances(cfg, node_name):
1110
  """Returns secondary instances on a node.
1111

1112
  """
1113
  return _GetNodeInstancesInner(cfg,
1114
                                lambda inst: node_name in inst.secondary_nodes)
1115

    
1116

    
1117
def _GetStorageTypeArgs(cfg, storage_type):
1118
  """Returns the arguments for a storage type.
1119

1120
  """
1121
  # Special case for file storage
1122
  if storage_type == constants.ST_FILE:
1123
    # storage.FileStorage wants a list of storage directories
1124
    return [[cfg.GetFileStorageDir()]]
1125

    
1126
  return []
1127

    
1128

    
1129
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1130
  faulty = []
1131

    
1132
  for dev in instance.disks:
1133
    cfg.SetDiskID(dev, node_name)
1134

    
1135
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1136
  result.Raise("Failed to get disk status from node %s" % node_name,
1137
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1138

    
1139
  for idx, bdev_status in enumerate(result.payload):
1140
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1141
      faulty.append(idx)
1142

    
1143
  return faulty
1144

    
1145

    
1146
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1147
  """Check the sanity of iallocator and node arguments and use the
1148
  cluster-wide iallocator if appropriate.
1149

1150
  Check that at most one of (iallocator, node) is specified. If none is
1151
  specified, then the LU's opcode's iallocator slot is filled with the
1152
  cluster-wide default iallocator.
1153

1154
  @type iallocator_slot: string
1155
  @param iallocator_slot: the name of the opcode iallocator slot
1156
  @type node_slot: string
1157
  @param node_slot: the name of the opcode target node slot
1158

1159
  """
1160
  node = getattr(lu.op, node_slot, None)
1161
  iallocator = getattr(lu.op, iallocator_slot, None)
1162

    
1163
  if node is not None and iallocator is not None:
1164
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1165
                               errors.ECODE_INVAL)
1166
  elif node is None and iallocator is None:
1167
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1168
    if default_iallocator:
1169
      setattr(lu.op, iallocator_slot, default_iallocator)
1170
    else:
1171
      raise errors.OpPrereqError("No iallocator or node given and no"
1172
                                 " cluster-wide default iallocator found."
1173
                                 " Please specify either an iallocator or a"
1174
                                 " node, or set a cluster-wide default"
1175
                                 " iallocator.")
1176

    
1177

    
1178
class LUPostInitCluster(LogicalUnit):
1179
  """Logical unit for running hooks after cluster initialization.
1180

1181
  """
1182
  HPATH = "cluster-init"
1183
  HTYPE = constants.HTYPE_CLUSTER
1184

    
1185
  def BuildHooksEnv(self):
1186
    """Build hooks env.
1187

1188
    """
1189
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1190
    mn = self.cfg.GetMasterNode()
1191
    return env, [], [mn]
1192

    
1193
  def Exec(self, feedback_fn):
1194
    """Nothing to do.
1195

1196
    """
1197
    return True
1198

    
1199

    
1200
class LUDestroyCluster(LogicalUnit):
1201
  """Logical unit for destroying the cluster.
1202

1203
  """
1204
  HPATH = "cluster-destroy"
1205
  HTYPE = constants.HTYPE_CLUSTER
1206

    
1207
  def BuildHooksEnv(self):
1208
    """Build hooks env.
1209

1210
    """
1211
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1212
    return env, [], []
1213

    
1214
  def CheckPrereq(self):
1215
    """Check prerequisites.
1216

1217
    This checks whether the cluster is empty.
1218

1219
    Any errors are signaled by raising errors.OpPrereqError.
1220

1221
    """
1222
    master = self.cfg.GetMasterNode()
1223

    
1224
    nodelist = self.cfg.GetNodeList()
1225
    if len(nodelist) != 1 or nodelist[0] != master:
1226
      raise errors.OpPrereqError("There are still %d node(s) in"
1227
                                 " this cluster." % (len(nodelist) - 1),
1228
                                 errors.ECODE_INVAL)
1229
    instancelist = self.cfg.GetInstanceList()
1230
    if instancelist:
1231
      raise errors.OpPrereqError("There are still %d instance(s) in"
1232
                                 " this cluster." % len(instancelist),
1233
                                 errors.ECODE_INVAL)
1234

    
1235
  def Exec(self, feedback_fn):
1236
    """Destroys the cluster.
1237

1238
    """
1239
    master = self.cfg.GetMasterNode()
1240
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1241

    
1242
    # Run post hooks on master node before it's removed
1243
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1244
    try:
1245
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1246
    except:
1247
      # pylint: disable-msg=W0702
1248
      self.LogWarning("Errors occurred running hooks on %s" % master)
1249

    
1250
    result = self.rpc.call_node_stop_master(master, False)
1251
    result.Raise("Could not disable the master role")
1252

    
1253
    if modify_ssh_setup:
1254
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1255
      utils.CreateBackup(priv_key)
1256
      utils.CreateBackup(pub_key)
1257

    
1258
    return master
1259

    
1260

    
1261
def _VerifyCertificate(filename):
1262
  """Verifies a certificate for LUVerifyCluster.
1263

1264
  @type filename: string
1265
  @param filename: Path to PEM file
1266

1267
  """
1268
  try:
1269
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1270
                                           utils.ReadFile(filename))
1271
  except Exception, err: # pylint: disable-msg=W0703
1272
    return (LUVerifyCluster.ETYPE_ERROR,
1273
            "Failed to load X509 certificate %s: %s" % (filename, err))
1274

    
1275
  (errcode, msg) = \
1276
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1277
                                constants.SSL_CERT_EXPIRATION_ERROR)
1278

    
1279
  if msg:
1280
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1281
  else:
1282
    fnamemsg = None
1283

    
1284
  if errcode is None:
1285
    return (None, fnamemsg)
1286
  elif errcode == utils.CERT_WARNING:
1287
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1288
  elif errcode == utils.CERT_ERROR:
1289
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1290

    
1291
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1292

    
1293

    
1294
class LUVerifyCluster(LogicalUnit):
1295
  """Verifies the cluster status.
1296

1297
  """
1298
  HPATH = "cluster-verify"
1299
  HTYPE = constants.HTYPE_CLUSTER
1300
  _OP_PARAMS = [
1301
    ("skip_checks", _EmptyList,
1302
     _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1303
    ("verbose", False, _TBool),
1304
    ("error_codes", False, _TBool),
1305
    ("debug_simulate_errors", False, _TBool),
1306
    ]
1307
  REQ_BGL = False
1308

    
1309
  TCLUSTER = "cluster"
1310
  TNODE = "node"
1311
  TINSTANCE = "instance"
1312

    
1313
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1314
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1315
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1316
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1317
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1318
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1319
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1320
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1321
  ENODEDRBD = (TNODE, "ENODEDRBD")
1322
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1323
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1324
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1325
  ENODEHV = (TNODE, "ENODEHV")
1326
  ENODELVM = (TNODE, "ENODELVM")
1327
  ENODEN1 = (TNODE, "ENODEN1")
1328
  ENODENET = (TNODE, "ENODENET")
1329
  ENODEOS = (TNODE, "ENODEOS")
1330
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1331
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1332
  ENODERPC = (TNODE, "ENODERPC")
1333
  ENODESSH = (TNODE, "ENODESSH")
1334
  ENODEVERSION = (TNODE, "ENODEVERSION")
1335
  ENODESETUP = (TNODE, "ENODESETUP")
1336
  ENODETIME = (TNODE, "ENODETIME")
1337

    
1338
  ETYPE_FIELD = "code"
1339
  ETYPE_ERROR = "ERROR"
1340
  ETYPE_WARNING = "WARNING"
1341

    
1342
  class NodeImage(object):
1343
    """A class representing the logical and physical status of a node.
1344

1345
    @type name: string
1346
    @ivar name: the node name to which this object refers
1347
    @ivar volumes: a structure as returned from
1348
        L{ganeti.backend.GetVolumeList} (runtime)
1349
    @ivar instances: a list of running instances (runtime)
1350
    @ivar pinst: list of configured primary instances (config)
1351
    @ivar sinst: list of configured secondary instances (config)
1352
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1353
        of this node (config)
1354
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1355
    @ivar dfree: free disk, as reported by the node (runtime)
1356
    @ivar offline: the offline status (config)
1357
    @type rpc_fail: boolean
1358
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1359
        not whether the individual keys were correct) (runtime)
1360
    @type lvm_fail: boolean
1361
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1362
    @type hyp_fail: boolean
1363
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1364
    @type ghost: boolean
1365
    @ivar ghost: whether this is a known node or not (config)
1366
    @type os_fail: boolean
1367
    @ivar os_fail: whether the RPC call didn't return valid OS data
1368
    @type oslist: list
1369
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1370

1371
    """
1372
    def __init__(self, offline=False, name=None):
1373
      self.name = name
1374
      self.volumes = {}
1375
      self.instances = []
1376
      self.pinst = []
1377
      self.sinst = []
1378
      self.sbp = {}
1379
      self.mfree = 0
1380
      self.dfree = 0
1381
      self.offline = offline
1382
      self.rpc_fail = False
1383
      self.lvm_fail = False
1384
      self.hyp_fail = False
1385
      self.ghost = False
1386
      self.os_fail = False
1387
      self.oslist = {}
1388

    
1389
  def ExpandNames(self):
1390
    self.needed_locks = {
1391
      locking.LEVEL_NODE: locking.ALL_SET,
1392
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1393
    }
1394
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1395

    
1396
  def _Error(self, ecode, item, msg, *args, **kwargs):
1397
    """Format an error message.
1398

1399
    Based on the opcode's error_codes parameter, either format a
1400
    parseable error code, or a simpler error string.
1401

1402
    This must be called only from Exec and functions called from Exec.
1403

1404
    """
1405
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1406
    itype, etxt = ecode
1407
    # first complete the msg
1408
    if args:
1409
      msg = msg % args
1410
    # then format the whole message
1411
    if self.op.error_codes:
1412
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1413
    else:
1414
      if item:
1415
        item = " " + item
1416
      else:
1417
        item = ""
1418
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1419
    # and finally report it via the feedback_fn
1420
    self._feedback_fn("  - %s" % msg)
1421

    
1422
  def _ErrorIf(self, cond, *args, **kwargs):
1423
    """Log an error message if the passed condition is True.
1424

1425
    """
1426
    cond = bool(cond) or self.op.debug_simulate_errors
1427
    if cond:
1428
      self._Error(*args, **kwargs)
1429
    # do not mark the operation as failed for WARN cases only
1430
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1431
      self.bad = self.bad or cond
1432

    
1433
  def _VerifyNode(self, ninfo, nresult):
1434
    """Run multiple tests against a node.
1435

1436
    Test list:
1437

1438
      - compares ganeti version
1439
      - checks vg existence and size > 20G
1440
      - checks config file checksum
1441
      - checks ssh to other nodes
1442

1443
    @type ninfo: L{objects.Node}
1444
    @param ninfo: the node to check
1445
    @param nresult: the results from the node
1446
    @rtype: boolean
1447
    @return: whether overall this call was successful (and we can expect
1448
         reasonable values in the respose)
1449

1450
    """
1451
    node = ninfo.name
1452
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1453

    
1454
    # main result, nresult should be a non-empty dict
1455
    test = not nresult or not isinstance(nresult, dict)
1456
    _ErrorIf(test, self.ENODERPC, node,
1457
                  "unable to verify node: no data returned")
1458
    if test:
1459
      return False
1460

    
1461
    # compares ganeti version
1462
    local_version = constants.PROTOCOL_VERSION
1463
    remote_version = nresult.get("version", None)
1464
    test = not (remote_version and
1465
                isinstance(remote_version, (list, tuple)) and
1466
                len(remote_version) == 2)
1467
    _ErrorIf(test, self.ENODERPC, node,
1468
             "connection to node returned invalid data")
1469
    if test:
1470
      return False
1471

    
1472
    test = local_version != remote_version[0]
1473
    _ErrorIf(test, self.ENODEVERSION, node,
1474
             "incompatible protocol versions: master %s,"
1475
             " node %s", local_version, remote_version[0])
1476
    if test:
1477
      return False
1478

    
1479
    # node seems compatible, we can actually try to look into its results
1480

    
1481
    # full package version
1482
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1483
                  self.ENODEVERSION, node,
1484
                  "software version mismatch: master %s, node %s",
1485
                  constants.RELEASE_VERSION, remote_version[1],
1486
                  code=self.ETYPE_WARNING)
1487

    
1488
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1489
    if isinstance(hyp_result, dict):
1490
      for hv_name, hv_result in hyp_result.iteritems():
1491
        test = hv_result is not None
1492
        _ErrorIf(test, self.ENODEHV, node,
1493
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1494

    
1495

    
1496
    test = nresult.get(constants.NV_NODESETUP,
1497
                           ["Missing NODESETUP results"])
1498
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1499
             "; ".join(test))
1500

    
1501
    return True
1502

    
1503
  def _VerifyNodeTime(self, ninfo, nresult,
1504
                      nvinfo_starttime, nvinfo_endtime):
1505
    """Check the node time.
1506

1507
    @type ninfo: L{objects.Node}
1508
    @param ninfo: the node to check
1509
    @param nresult: the remote results for the node
1510
    @param nvinfo_starttime: the start time of the RPC call
1511
    @param nvinfo_endtime: the end time of the RPC call
1512

1513
    """
1514
    node = ninfo.name
1515
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1516

    
1517
    ntime = nresult.get(constants.NV_TIME, None)
1518
    try:
1519
      ntime_merged = utils.MergeTime(ntime)
1520
    except (ValueError, TypeError):
1521
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1522
      return
1523

    
1524
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1525
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1526
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1527
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1528
    else:
1529
      ntime_diff = None
1530

    
1531
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1532
             "Node time diverges by at least %s from master node time",
1533
             ntime_diff)
1534

    
1535
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1536
    """Check the node time.
1537

1538
    @type ninfo: L{objects.Node}
1539
    @param ninfo: the node to check
1540
    @param nresult: the remote results for the node
1541
    @param vg_name: the configured VG name
1542

1543
    """
1544
    if vg_name is None:
1545
      return
1546

    
1547
    node = ninfo.name
1548
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1549

    
1550
    # checks vg existence and size > 20G
1551
    vglist = nresult.get(constants.NV_VGLIST, None)
1552
    test = not vglist
1553
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1554
    if not test:
1555
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1556
                                            constants.MIN_VG_SIZE)
1557
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1558

    
1559
    # check pv names
1560
    pvlist = nresult.get(constants.NV_PVLIST, None)
1561
    test = pvlist is None
1562
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1563
    if not test:
1564
      # check that ':' is not present in PV names, since it's a
1565
      # special character for lvcreate (denotes the range of PEs to
1566
      # use on the PV)
1567
      for _, pvname, owner_vg in pvlist:
1568
        test = ":" in pvname
1569
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1570
                 " '%s' of VG '%s'", pvname, owner_vg)
1571

    
1572
  def _VerifyNodeNetwork(self, ninfo, nresult):
1573
    """Check the node time.
1574

1575
    @type ninfo: L{objects.Node}
1576
    @param ninfo: the node to check
1577
    @param nresult: the remote results for the node
1578

1579
    """
1580
    node = ninfo.name
1581
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1582

    
1583
    test = constants.NV_NODELIST not in nresult
1584
    _ErrorIf(test, self.ENODESSH, node,
1585
             "node hasn't returned node ssh connectivity data")
1586
    if not test:
1587
      if nresult[constants.NV_NODELIST]:
1588
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1589
          _ErrorIf(True, self.ENODESSH, node,
1590
                   "ssh communication with node '%s': %s", a_node, a_msg)
1591

    
1592
    test = constants.NV_NODENETTEST not in nresult
1593
    _ErrorIf(test, self.ENODENET, node,
1594
             "node hasn't returned node tcp connectivity data")
1595
    if not test:
1596
      if nresult[constants.NV_NODENETTEST]:
1597
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1598
        for anode in nlist:
1599
          _ErrorIf(True, self.ENODENET, node,
1600
                   "tcp communication with node '%s': %s",
1601
                   anode, nresult[constants.NV_NODENETTEST][anode])
1602

    
1603
    test = constants.NV_MASTERIP not in nresult
1604
    _ErrorIf(test, self.ENODENET, node,
1605
             "node hasn't returned node master IP reachability data")
1606
    if not test:
1607
      if not nresult[constants.NV_MASTERIP]:
1608
        if node == self.master_node:
1609
          msg = "the master node cannot reach the master IP (not configured?)"
1610
        else:
1611
          msg = "cannot reach the master IP"
1612
        _ErrorIf(True, self.ENODENET, node, msg)
1613

    
1614

    
1615
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1616
    """Verify an instance.
1617

1618
    This function checks to see if the required block devices are
1619
    available on the instance's node.
1620

1621
    """
1622
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1623
    node_current = instanceconfig.primary_node
1624

    
1625
    node_vol_should = {}
1626
    instanceconfig.MapLVsByNode(node_vol_should)
1627

    
1628
    for node in node_vol_should:
1629
      n_img = node_image[node]
1630
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1631
        # ignore missing volumes on offline or broken nodes
1632
        continue
1633
      for volume in node_vol_should[node]:
1634
        test = volume not in n_img.volumes
1635
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1636
                 "volume %s missing on node %s", volume, node)
1637

    
1638
    if instanceconfig.admin_up:
1639
      pri_img = node_image[node_current]
1640
      test = instance not in pri_img.instances and not pri_img.offline
1641
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1642
               "instance not running on its primary node %s",
1643
               node_current)
1644

    
1645
    for node, n_img in node_image.items():
1646
      if (not node == node_current):
1647
        test = instance in n_img.instances
1648
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1649
                 "instance should not run on node %s", node)
1650

    
1651
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1652
    """Verify if there are any unknown volumes in the cluster.
1653

1654
    The .os, .swap and backup volumes are ignored. All other volumes are
1655
    reported as unknown.
1656

1657
    """
1658
    for node, n_img in node_image.items():
1659
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1660
        # skip non-healthy nodes
1661
        continue
1662
      for volume in n_img.volumes:
1663
        test = (node not in node_vol_should or
1664
                volume not in node_vol_should[node])
1665
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1666
                      "volume %s is unknown", volume)
1667

    
1668
  def _VerifyOrphanInstances(self, instancelist, node_image):
1669
    """Verify the list of running instances.
1670

1671
    This checks what instances are running but unknown to the cluster.
1672

1673
    """
1674
    for node, n_img in node_image.items():
1675
      for o_inst in n_img.instances:
1676
        test = o_inst not in instancelist
1677
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1678
                      "instance %s on node %s should not exist", o_inst, node)
1679

    
1680
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1681
    """Verify N+1 Memory Resilience.
1682

1683
    Check that if one single node dies we can still start all the
1684
    instances it was primary for.
1685

1686
    """
1687
    for node, n_img in node_image.items():
1688
      # This code checks that every node which is now listed as
1689
      # secondary has enough memory to host all instances it is
1690
      # supposed to should a single other node in the cluster fail.
1691
      # FIXME: not ready for failover to an arbitrary node
1692
      # FIXME: does not support file-backed instances
1693
      # WARNING: we currently take into account down instances as well
1694
      # as up ones, considering that even if they're down someone
1695
      # might want to start them even in the event of a node failure.
1696
      for prinode, instances in n_img.sbp.items():
1697
        needed_mem = 0
1698
        for instance in instances:
1699
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1700
          if bep[constants.BE_AUTO_BALANCE]:
1701
            needed_mem += bep[constants.BE_MEMORY]
1702
        test = n_img.mfree < needed_mem
1703
        self._ErrorIf(test, self.ENODEN1, node,
1704
                      "not enough memory on to accommodate"
1705
                      " failovers should peer node %s fail", prinode)
1706

    
1707
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1708
                       master_files):
1709
    """Verifies and computes the node required file checksums.
1710

1711
    @type ninfo: L{objects.Node}
1712
    @param ninfo: the node to check
1713
    @param nresult: the remote results for the node
1714
    @param file_list: required list of files
1715
    @param local_cksum: dictionary of local files and their checksums
1716
    @param master_files: list of files that only masters should have
1717

1718
    """
1719
    node = ninfo.name
1720
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1721

    
1722
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1723
    test = not isinstance(remote_cksum, dict)
1724
    _ErrorIf(test, self.ENODEFILECHECK, node,
1725
             "node hasn't returned file checksum data")
1726
    if test:
1727
      return
1728

    
1729
    for file_name in file_list:
1730
      node_is_mc = ninfo.master_candidate
1731
      must_have = (file_name not in master_files) or node_is_mc
1732
      # missing
1733
      test1 = file_name not in remote_cksum
1734
      # invalid checksum
1735
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1736
      # existing and good
1737
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1738
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1739
               "file '%s' missing", file_name)
1740
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1741
               "file '%s' has wrong checksum", file_name)
1742
      # not candidate and this is not a must-have file
1743
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1744
               "file '%s' should not exist on non master"
1745
               " candidates (and the file is outdated)", file_name)
1746
      # all good, except non-master/non-must have combination
1747
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1748
               "file '%s' should not exist"
1749
               " on non master candidates", file_name)
1750

    
1751
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1752
                      drbd_map):
1753
    """Verifies and the node DRBD status.
1754

1755
    @type ninfo: L{objects.Node}
1756
    @param ninfo: the node to check
1757
    @param nresult: the remote results for the node
1758
    @param instanceinfo: the dict of instances
1759
    @param drbd_helper: the configured DRBD usermode helper
1760
    @param drbd_map: the DRBD map as returned by
1761
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1762

1763
    """
1764
    node = ninfo.name
1765
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1766

    
1767
    if drbd_helper:
1768
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1769
      test = (helper_result == None)
1770
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1771
               "no drbd usermode helper returned")
1772
      if helper_result:
1773
        status, payload = helper_result
1774
        test = not status
1775
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1776
                 "drbd usermode helper check unsuccessful: %s", payload)
1777
        test = status and (payload != drbd_helper)
1778
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1779
                 "wrong drbd usermode helper: %s", payload)
1780

    
1781
    # compute the DRBD minors
1782
    node_drbd = {}
1783
    for minor, instance in drbd_map[node].items():
1784
      test = instance not in instanceinfo
1785
      _ErrorIf(test, self.ECLUSTERCFG, None,
1786
               "ghost instance '%s' in temporary DRBD map", instance)
1787
        # ghost instance should not be running, but otherwise we
1788
        # don't give double warnings (both ghost instance and
1789
        # unallocated minor in use)
1790
      if test:
1791
        node_drbd[minor] = (instance, False)
1792
      else:
1793
        instance = instanceinfo[instance]
1794
        node_drbd[minor] = (instance.name, instance.admin_up)
1795

    
1796
    # and now check them
1797
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1798
    test = not isinstance(used_minors, (tuple, list))
1799
    _ErrorIf(test, self.ENODEDRBD, node,
1800
             "cannot parse drbd status file: %s", str(used_minors))
1801
    if test:
1802
      # we cannot check drbd status
1803
      return
1804

    
1805
    for minor, (iname, must_exist) in node_drbd.items():
1806
      test = minor not in used_minors and must_exist
1807
      _ErrorIf(test, self.ENODEDRBD, node,
1808
               "drbd minor %d of instance %s is not active", minor, iname)
1809
    for minor in used_minors:
1810
      test = minor not in node_drbd
1811
      _ErrorIf(test, self.ENODEDRBD, node,
1812
               "unallocated drbd minor %d is in use", minor)
1813

    
1814
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1815
    """Builds the node OS structures.
1816

1817
    @type ninfo: L{objects.Node}
1818
    @param ninfo: the node to check
1819
    @param nresult: the remote results for the node
1820
    @param nimg: the node image object
1821

1822
    """
1823
    node = ninfo.name
1824
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1825

    
1826
    remote_os = nresult.get(constants.NV_OSLIST, None)
1827
    test = (not isinstance(remote_os, list) or
1828
            not compat.all(isinstance(v, list) and len(v) == 7
1829
                           for v in remote_os))
1830

    
1831
    _ErrorIf(test, self.ENODEOS, node,
1832
             "node hasn't returned valid OS data")
1833

    
1834
    nimg.os_fail = test
1835

    
1836
    if test:
1837
      return
1838

    
1839
    os_dict = {}
1840

    
1841
    for (name, os_path, status, diagnose,
1842
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1843

    
1844
      if name not in os_dict:
1845
        os_dict[name] = []
1846

    
1847
      # parameters is a list of lists instead of list of tuples due to
1848
      # JSON lacking a real tuple type, fix it:
1849
      parameters = [tuple(v) for v in parameters]
1850
      os_dict[name].append((os_path, status, diagnose,
1851
                            set(variants), set(parameters), set(api_ver)))
1852

    
1853
    nimg.oslist = os_dict
1854

    
1855
  def _VerifyNodeOS(self, ninfo, nimg, base):
1856
    """Verifies the node OS list.
1857

1858
    @type ninfo: L{objects.Node}
1859
    @param ninfo: the node to check
1860
    @param nimg: the node image object
1861
    @param base: the 'template' node we match against (e.g. from the master)
1862

1863
    """
1864
    node = ninfo.name
1865
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1866

    
1867
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1868

    
1869
    for os_name, os_data in nimg.oslist.items():
1870
      assert os_data, "Empty OS status for OS %s?!" % os_name
1871
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1872
      _ErrorIf(not f_status, self.ENODEOS, node,
1873
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1874
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1875
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1876
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1877
      # this will catched in backend too
1878
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1879
               and not f_var, self.ENODEOS, node,
1880
               "OS %s with API at least %d does not declare any variant",
1881
               os_name, constants.OS_API_V15)
1882
      # comparisons with the 'base' image
1883
      test = os_name not in base.oslist
1884
      _ErrorIf(test, self.ENODEOS, node,
1885
               "Extra OS %s not present on reference node (%s)",
1886
               os_name, base.name)
1887
      if test:
1888
        continue
1889
      assert base.oslist[os_name], "Base node has empty OS status?"
1890
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1891
      if not b_status:
1892
        # base OS is invalid, skipping
1893
        continue
1894
      for kind, a, b in [("API version", f_api, b_api),
1895
                         ("variants list", f_var, b_var),
1896
                         ("parameters", f_param, b_param)]:
1897
        _ErrorIf(a != b, self.ENODEOS, node,
1898
                 "OS %s %s differs from reference node %s: %s vs. %s",
1899
                 kind, os_name, base.name,
1900
                 utils.CommaJoin(a), utils.CommaJoin(b))
1901

    
1902
    # check any missing OSes
1903
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1904
    _ErrorIf(missing, self.ENODEOS, node,
1905
             "OSes present on reference node %s but missing on this node: %s",
1906
             base.name, utils.CommaJoin(missing))
1907

    
1908
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1909
    """Verifies and updates the node volume data.
1910

1911
    This function will update a L{NodeImage}'s internal structures
1912
    with data from the remote call.
1913

1914
    @type ninfo: L{objects.Node}
1915
    @param ninfo: the node to check
1916
    @param nresult: the remote results for the node
1917
    @param nimg: the node image object
1918
    @param vg_name: the configured VG name
1919

1920
    """
1921
    node = ninfo.name
1922
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1923

    
1924
    nimg.lvm_fail = True
1925
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1926
    if vg_name is None:
1927
      pass
1928
    elif isinstance(lvdata, basestring):
1929
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1930
               utils.SafeEncode(lvdata))
1931
    elif not isinstance(lvdata, dict):
1932
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1933
    else:
1934
      nimg.volumes = lvdata
1935
      nimg.lvm_fail = False
1936

    
1937
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1938
    """Verifies and updates the node instance list.
1939

1940
    If the listing was successful, then updates this node's instance
1941
    list. Otherwise, it marks the RPC call as failed for the instance
1942
    list key.
1943

1944
    @type ninfo: L{objects.Node}
1945
    @param ninfo: the node to check
1946
    @param nresult: the remote results for the node
1947
    @param nimg: the node image object
1948

1949
    """
1950
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1951
    test = not isinstance(idata, list)
1952
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1953
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1954
    if test:
1955
      nimg.hyp_fail = True
1956
    else:
1957
      nimg.instances = idata
1958

    
1959
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1960
    """Verifies and computes a node information map
1961

1962
    @type ninfo: L{objects.Node}
1963
    @param ninfo: the node to check
1964
    @param nresult: the remote results for the node
1965
    @param nimg: the node image object
1966
    @param vg_name: the configured VG name
1967

1968
    """
1969
    node = ninfo.name
1970
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1971

    
1972
    # try to read free memory (from the hypervisor)
1973
    hv_info = nresult.get(constants.NV_HVINFO, None)
1974
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1975
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1976
    if not test:
1977
      try:
1978
        nimg.mfree = int(hv_info["memory_free"])
1979
      except (ValueError, TypeError):
1980
        _ErrorIf(True, self.ENODERPC, node,
1981
                 "node returned invalid nodeinfo, check hypervisor")
1982

    
1983
    # FIXME: devise a free space model for file based instances as well
1984
    if vg_name is not None:
1985
      test = (constants.NV_VGLIST not in nresult or
1986
              vg_name not in nresult[constants.NV_VGLIST])
1987
      _ErrorIf(test, self.ENODELVM, node,
1988
               "node didn't return data for the volume group '%s'"
1989
               " - it is either missing or broken", vg_name)
1990
      if not test:
1991
        try:
1992
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1993
        except (ValueError, TypeError):
1994
          _ErrorIf(True, self.ENODERPC, node,
1995
                   "node returned invalid LVM info, check LVM status")
1996

    
1997
  def BuildHooksEnv(self):
1998
    """Build hooks env.
1999

2000
    Cluster-Verify hooks just ran in the post phase and their failure makes
2001
    the output be logged in the verify output and the verification to fail.
2002

2003
    """
2004
    all_nodes = self.cfg.GetNodeList()
2005
    env = {
2006
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2007
      }
2008
    for node in self.cfg.GetAllNodesInfo().values():
2009
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2010

    
2011
    return env, [], all_nodes
2012

    
2013
  def Exec(self, feedback_fn):
2014
    """Verify integrity of cluster, performing various test on nodes.
2015

2016
    """
2017
    self.bad = False
2018
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2019
    verbose = self.op.verbose
2020
    self._feedback_fn = feedback_fn
2021
    feedback_fn("* Verifying global settings")
2022
    for msg in self.cfg.VerifyConfig():
2023
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2024

    
2025
    # Check the cluster certificates
2026
    for cert_filename in constants.ALL_CERT_FILES:
2027
      (errcode, msg) = _VerifyCertificate(cert_filename)
2028
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2029

    
2030
    vg_name = self.cfg.GetVGName()
2031
    drbd_helper = self.cfg.GetDRBDHelper()
2032
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2033
    cluster = self.cfg.GetClusterInfo()
2034
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2035
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2036
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2037
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2038
                        for iname in instancelist)
2039
    i_non_redundant = [] # Non redundant instances
2040
    i_non_a_balanced = [] # Non auto-balanced instances
2041
    n_offline = 0 # Count of offline nodes
2042
    n_drained = 0 # Count of nodes being drained
2043
    node_vol_should = {}
2044

    
2045
    # FIXME: verify OS list
2046
    # do local checksums
2047
    master_files = [constants.CLUSTER_CONF_FILE]
2048
    master_node = self.master_node = self.cfg.GetMasterNode()
2049
    master_ip = self.cfg.GetMasterIP()
2050

    
2051
    file_names = ssconf.SimpleStore().GetFileList()
2052
    file_names.extend(constants.ALL_CERT_FILES)
2053
    file_names.extend(master_files)
2054
    if cluster.modify_etc_hosts:
2055
      file_names.append(constants.ETC_HOSTS)
2056

    
2057
    local_checksums = utils.FingerprintFiles(file_names)
2058

    
2059
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2060
    node_verify_param = {
2061
      constants.NV_FILELIST: file_names,
2062
      constants.NV_NODELIST: [node.name for node in nodeinfo
2063
                              if not node.offline],
2064
      constants.NV_HYPERVISOR: hypervisors,
2065
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2066
                                  node.secondary_ip) for node in nodeinfo
2067
                                 if not node.offline],
2068
      constants.NV_INSTANCELIST: hypervisors,
2069
      constants.NV_VERSION: None,
2070
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2071
      constants.NV_NODESETUP: None,
2072
      constants.NV_TIME: None,
2073
      constants.NV_MASTERIP: (master_node, master_ip),
2074
      constants.NV_OSLIST: None,
2075
      }
2076

    
2077
    if vg_name is not None:
2078
      node_verify_param[constants.NV_VGLIST] = None
2079
      node_verify_param[constants.NV_LVLIST] = vg_name
2080
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2081
      node_verify_param[constants.NV_DRBDLIST] = None
2082

    
2083
    if drbd_helper:
2084
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2085

    
2086
    # Build our expected cluster state
2087
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2088
                                                 name=node.name))
2089
                      for node in nodeinfo)
2090

    
2091
    for instance in instancelist:
2092
      inst_config = instanceinfo[instance]
2093

    
2094
      for nname in inst_config.all_nodes:
2095
        if nname not in node_image:
2096
          # ghost node
2097
          gnode = self.NodeImage(name=nname)
2098
          gnode.ghost = True
2099
          node_image[nname] = gnode
2100

    
2101
      inst_config.MapLVsByNode(node_vol_should)
2102

    
2103
      pnode = inst_config.primary_node
2104
      node_image[pnode].pinst.append(instance)
2105

    
2106
      for snode in inst_config.secondary_nodes:
2107
        nimg = node_image[snode]
2108
        nimg.sinst.append(instance)
2109
        if pnode not in nimg.sbp:
2110
          nimg.sbp[pnode] = []
2111
        nimg.sbp[pnode].append(instance)
2112

    
2113
    # At this point, we have the in-memory data structures complete,
2114
    # except for the runtime information, which we'll gather next
2115

    
2116
    # Due to the way our RPC system works, exact response times cannot be
2117
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2118
    # time before and after executing the request, we can at least have a time
2119
    # window.
2120
    nvinfo_starttime = time.time()
2121
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2122
                                           self.cfg.GetClusterName())
2123
    nvinfo_endtime = time.time()
2124

    
2125
    all_drbd_map = self.cfg.ComputeDRBDMap()
2126

    
2127
    feedback_fn("* Verifying node status")
2128

    
2129
    refos_img = None
2130

    
2131
    for node_i in nodeinfo:
2132
      node = node_i.name
2133
      nimg = node_image[node]
2134

    
2135
      if node_i.offline:
2136
        if verbose:
2137
          feedback_fn("* Skipping offline node %s" % (node,))
2138
        n_offline += 1
2139
        continue
2140

    
2141
      if node == master_node:
2142
        ntype = "master"
2143
      elif node_i.master_candidate:
2144
        ntype = "master candidate"
2145
      elif node_i.drained:
2146
        ntype = "drained"
2147
        n_drained += 1
2148
      else:
2149
        ntype = "regular"
2150
      if verbose:
2151
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2152

    
2153
      msg = all_nvinfo[node].fail_msg
2154
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2155
      if msg:
2156
        nimg.rpc_fail = True
2157
        continue
2158

    
2159
      nresult = all_nvinfo[node].payload
2160

    
2161
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2162
      self._VerifyNodeNetwork(node_i, nresult)
2163
      self._VerifyNodeLVM(node_i, nresult, vg_name)
2164
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2165
                            master_files)
2166
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2167
                           all_drbd_map)
2168
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2169

    
2170
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2171
      self._UpdateNodeInstances(node_i, nresult, nimg)
2172
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2173
      self._UpdateNodeOS(node_i, nresult, nimg)
2174
      if not nimg.os_fail:
2175
        if refos_img is None:
2176
          refos_img = nimg
2177
        self._VerifyNodeOS(node_i, nimg, refos_img)
2178

    
2179
    feedback_fn("* Verifying instance status")
2180
    for instance in instancelist:
2181
      if verbose:
2182
        feedback_fn("* Verifying instance %s" % instance)
2183
      inst_config = instanceinfo[instance]
2184
      self._VerifyInstance(instance, inst_config, node_image)
2185
      inst_nodes_offline = []
2186

    
2187
      pnode = inst_config.primary_node
2188
      pnode_img = node_image[pnode]
2189
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2190
               self.ENODERPC, pnode, "instance %s, connection to"
2191
               " primary node failed", instance)
2192

    
2193
      if pnode_img.offline:
2194
        inst_nodes_offline.append(pnode)
2195

    
2196
      # If the instance is non-redundant we cannot survive losing its primary
2197
      # node, so we are not N+1 compliant. On the other hand we have no disk
2198
      # templates with more than one secondary so that situation is not well
2199
      # supported either.
2200
      # FIXME: does not support file-backed instances
2201
      if not inst_config.secondary_nodes:
2202
        i_non_redundant.append(instance)
2203
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2204
               instance, "instance has multiple secondary nodes: %s",
2205
               utils.CommaJoin(inst_config.secondary_nodes),
2206
               code=self.ETYPE_WARNING)
2207

    
2208
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2209
        i_non_a_balanced.append(instance)
2210

    
2211
      for snode in inst_config.secondary_nodes:
2212
        s_img = node_image[snode]
2213
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2214
                 "instance %s, connection to secondary node failed", instance)
2215

    
2216
        if s_img.offline:
2217
          inst_nodes_offline.append(snode)
2218

    
2219
      # warn that the instance lives on offline nodes
2220
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2221
               "instance lives on offline node(s) %s",
2222
               utils.CommaJoin(inst_nodes_offline))
2223
      # ... or ghost nodes
2224
      for node in inst_config.all_nodes:
2225
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2226
                 "instance lives on ghost node %s", node)
2227

    
2228
    feedback_fn("* Verifying orphan volumes")
2229
    self._VerifyOrphanVolumes(node_vol_should, node_image)
2230

    
2231
    feedback_fn("* Verifying orphan instances")
2232
    self._VerifyOrphanInstances(instancelist, node_image)
2233

    
2234
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2235
      feedback_fn("* Verifying N+1 Memory redundancy")
2236
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2237

    
2238
    feedback_fn("* Other Notes")
2239
    if i_non_redundant:
2240
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2241
                  % len(i_non_redundant))
2242

    
2243
    if i_non_a_balanced:
2244
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2245
                  % len(i_non_a_balanced))
2246

    
2247
    if n_offline:
2248
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2249

    
2250
    if n_drained:
2251
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2252

    
2253
    return not self.bad
2254

    
2255
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2256
    """Analyze the post-hooks' result
2257

2258
    This method analyses the hook result, handles it, and sends some
2259
    nicely-formatted feedback back to the user.
2260

2261
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2262
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2263
    @param hooks_results: the results of the multi-node hooks rpc call
2264
    @param feedback_fn: function used send feedback back to the caller
2265
    @param lu_result: previous Exec result
2266
    @return: the new Exec result, based on the previous result
2267
        and hook results
2268

2269
    """
2270
    # We only really run POST phase hooks, and are only interested in
2271
    # their results
2272
    if phase == constants.HOOKS_PHASE_POST:
2273
      # Used to change hooks' output to proper indentation
2274
      indent_re = re.compile('^', re.M)
2275
      feedback_fn("* Hooks Results")
2276
      assert hooks_results, "invalid result from hooks"
2277

    
2278
      for node_name in hooks_results:
2279
        res = hooks_results[node_name]
2280
        msg = res.fail_msg
2281
        test = msg and not res.offline
2282
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2283
                      "Communication failure in hooks execution: %s", msg)
2284
        if res.offline or msg:
2285
          # No need to investigate payload if node is offline or gave an error.
2286
          # override manually lu_result here as _ErrorIf only
2287
          # overrides self.bad
2288
          lu_result = 1
2289
          continue
2290
        for script, hkr, output in res.payload:
2291
          test = hkr == constants.HKR_FAIL
2292
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2293
                        "Script %s failed, output:", script)
2294
          if test:
2295
            output = indent_re.sub('      ', output)
2296
            feedback_fn("%s" % output)
2297
            lu_result = 0
2298

    
2299
      return lu_result
2300

    
2301

    
2302
class LUVerifyDisks(NoHooksLU):
2303
  """Verifies the cluster disks status.
2304

2305
  """
2306
  REQ_BGL = False
2307

    
2308
  def ExpandNames(self):
2309
    self.needed_locks = {
2310
      locking.LEVEL_NODE: locking.ALL_SET,
2311
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2312
    }
2313
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2314

    
2315
  def Exec(self, feedback_fn):
2316
    """Verify integrity of cluster disks.
2317

2318
    @rtype: tuple of three items
2319
    @return: a tuple of (dict of node-to-node_error, list of instances
2320
        which need activate-disks, dict of instance: (node, volume) for
2321
        missing volumes
2322

2323
    """
2324
    result = res_nodes, res_instances, res_missing = {}, [], {}
2325

    
2326
    vg_name = self.cfg.GetVGName()
2327
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2328
    instances = [self.cfg.GetInstanceInfo(name)
2329
                 for name in self.cfg.GetInstanceList()]
2330

    
2331
    nv_dict = {}
2332
    for inst in instances:
2333
      inst_lvs = {}
2334
      if (not inst.admin_up or
2335
          inst.disk_template not in constants.DTS_NET_MIRROR):
2336
        continue
2337
      inst.MapLVsByNode(inst_lvs)
2338
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2339
      for node, vol_list in inst_lvs.iteritems():
2340
        for vol in vol_list:
2341
          nv_dict[(node, vol)] = inst
2342

    
2343
    if not nv_dict:
2344
      return result
2345

    
2346
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2347

    
2348
    for node in nodes:
2349
      # node_volume
2350
      node_res = node_lvs[node]
2351
      if node_res.offline:
2352
        continue
2353
      msg = node_res.fail_msg
2354
      if msg:
2355
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2356
        res_nodes[node] = msg
2357
        continue
2358

    
2359
      lvs = node_res.payload
2360
      for lv_name, (_, _, lv_online) in lvs.items():
2361
        inst = nv_dict.pop((node, lv_name), None)
2362
        if (not lv_online and inst is not None
2363
            and inst.name not in res_instances):
2364
          res_instances.append(inst.name)
2365

    
2366
    # any leftover items in nv_dict are missing LVs, let's arrange the
2367
    # data better
2368
    for key, inst in nv_dict.iteritems():
2369
      if inst.name not in res_missing:
2370
        res_missing[inst.name] = []
2371
      res_missing[inst.name].append(key)
2372

    
2373
    return result
2374

    
2375

    
2376
class LURepairDiskSizes(NoHooksLU):
2377
  """Verifies the cluster disks sizes.
2378

2379
  """
2380
  _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2381
  REQ_BGL = False
2382

    
2383
  def ExpandNames(self):
2384
    if self.op.instances:
2385
      self.wanted_names = []
2386
      for name in self.op.instances:
2387
        full_name = _ExpandInstanceName(self.cfg, name)
2388
        self.wanted_names.append(full_name)
2389
      self.needed_locks = {
2390
        locking.LEVEL_NODE: [],
2391
        locking.LEVEL_INSTANCE: self.wanted_names,
2392
        }
2393
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2394
    else:
2395
      self.wanted_names = None
2396
      self.needed_locks = {
2397
        locking.LEVEL_NODE: locking.ALL_SET,
2398
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2399
        }
2400
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2401

    
2402
  def DeclareLocks(self, level):
2403
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2404
      self._LockInstancesNodes(primary_only=True)
2405

    
2406
  def CheckPrereq(self):
2407
    """Check prerequisites.
2408

2409
    This only checks the optional instance list against the existing names.
2410

2411
    """
2412
    if self.wanted_names is None:
2413
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2414

    
2415
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2416
                             in self.wanted_names]
2417

    
2418
  def _EnsureChildSizes(self, disk):
2419
    """Ensure children of the disk have the needed disk size.
2420

2421
    This is valid mainly for DRBD8 and fixes an issue where the
2422
    children have smaller disk size.
2423

2424
    @param disk: an L{ganeti.objects.Disk} object
2425

2426
    """
2427
    if disk.dev_type == constants.LD_DRBD8:
2428
      assert disk.children, "Empty children for DRBD8?"
2429
      fchild = disk.children[0]
2430
      mismatch = fchild.size < disk.size
2431
      if mismatch:
2432
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2433
                     fchild.size, disk.size)
2434
        fchild.size = disk.size
2435

    
2436
      # and we recurse on this child only, not on the metadev
2437
      return self._EnsureChildSizes(fchild) or mismatch
2438
    else:
2439
      return False
2440

    
2441
  def Exec(self, feedback_fn):
2442
    """Verify the size of cluster disks.
2443

2444
    """
2445
    # TODO: check child disks too
2446
    # TODO: check differences in size between primary/secondary nodes
2447
    per_node_disks = {}
2448
    for instance in self.wanted_instances:
2449
      pnode = instance.primary_node
2450
      if pnode not in per_node_disks:
2451
        per_node_disks[pnode] = []
2452
      for idx, disk in enumerate(instance.disks):
2453
        per_node_disks[pnode].append((instance, idx, disk))
2454

    
2455
    changed = []
2456
    for node, dskl in per_node_disks.items():
2457
      newl = [v[2].Copy() for v in dskl]
2458
      for dsk in newl:
2459
        self.cfg.SetDiskID(dsk, node)
2460
      result = self.rpc.call_blockdev_getsizes(node, newl)
2461
      if result.fail_msg:
2462
        self.LogWarning("Failure in blockdev_getsizes call to node"
2463
                        " %s, ignoring", node)
2464
        continue
2465
      if len(result.data) != len(dskl):
2466
        self.LogWarning("Invalid result from node %s, ignoring node results",
2467
                        node)
2468
        continue
2469
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2470
        if size is None:
2471
          self.LogWarning("Disk %d of instance %s did not return size"
2472
                          " information, ignoring", idx, instance.name)
2473
          continue
2474
        if not isinstance(size, (int, long)):
2475
          self.LogWarning("Disk %d of instance %s did not return valid"
2476
                          " size information, ignoring", idx, instance.name)
2477
          continue
2478
        size = size >> 20
2479
        if size != disk.size:
2480
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2481
                       " correcting: recorded %d, actual %d", idx,
2482
                       instance.name, disk.size, size)
2483
          disk.size = size
2484
          self.cfg.Update(instance, feedback_fn)
2485
          changed.append((instance.name, idx, size))
2486
        if self._EnsureChildSizes(disk):
2487
          self.cfg.Update(instance, feedback_fn)
2488
          changed.append((instance.name, idx, disk.size))
2489
    return changed
2490

    
2491

    
2492
class LURenameCluster(LogicalUnit):
2493
  """Rename the cluster.
2494

2495
  """
2496
  HPATH = "cluster-rename"
2497
  HTYPE = constants.HTYPE_CLUSTER
2498
  _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2499

    
2500
  def BuildHooksEnv(self):
2501
    """Build hooks env.
2502

2503
    """
2504
    env = {
2505
      "OP_TARGET": self.cfg.GetClusterName(),
2506
      "NEW_NAME": self.op.name,
2507
      }
2508
    mn = self.cfg.GetMasterNode()
2509
    all_nodes = self.cfg.GetNodeList()
2510
    return env, [mn], all_nodes
2511

    
2512
  def CheckPrereq(self):
2513
    """Verify that the passed name is a valid one.
2514

2515
    """
2516
    hostname = utils.GetHostInfo(self.op.name)
2517

    
2518
    new_name = hostname.name
2519
    self.ip = new_ip = hostname.ip
2520
    old_name = self.cfg.GetClusterName()
2521
    old_ip = self.cfg.GetMasterIP()
2522
    if new_name == old_name and new_ip == old_ip:
2523
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2524
                                 " cluster has changed",
2525
                                 errors.ECODE_INVAL)
2526
    if new_ip != old_ip:
2527
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2528
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2529
                                   " reachable on the network. Aborting." %
2530
                                   new_ip, errors.ECODE_NOTUNIQUE)
2531

    
2532
    self.op.name = new_name
2533

    
2534
  def Exec(self, feedback_fn):
2535
    """Rename the cluster.
2536

2537
    """
2538
    clustername = self.op.name
2539
    ip = self.ip
2540

    
2541
    # shutdown the master IP
2542
    master = self.cfg.GetMasterNode()
2543
    result = self.rpc.call_node_stop_master(master, False)
2544
    result.Raise("Could not disable the master role")
2545

    
2546
    try:
2547
      cluster = self.cfg.GetClusterInfo()
2548
      cluster.cluster_name = clustername
2549
      cluster.master_ip = ip
2550
      self.cfg.Update(cluster, feedback_fn)
2551

    
2552
      # update the known hosts file
2553
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2554
      node_list = self.cfg.GetNodeList()
2555
      try:
2556
        node_list.remove(master)
2557
      except ValueError:
2558
        pass
2559
      result = self.rpc.call_upload_file(node_list,
2560
                                         constants.SSH_KNOWN_HOSTS_FILE)
2561
      for to_node, to_result in result.iteritems():
2562
        msg = to_result.fail_msg
2563
        if msg:
2564
          msg = ("Copy of file %s to node %s failed: %s" %
2565
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2566
          self.proc.LogWarning(msg)
2567

    
2568
    finally:
2569
      result = self.rpc.call_node_start_master(master, False, False)
2570
      msg = result.fail_msg
2571
      if msg:
2572
        self.LogWarning("Could not re-enable the master role on"
2573
                        " the master, please restart manually: %s", msg)
2574

    
2575

    
2576
class LUSetClusterParams(LogicalUnit):
2577
  """Change the parameters of the cluster.
2578

2579
  """
2580
  HPATH = "cluster-modify"
2581
  HTYPE = constants.HTYPE_CLUSTER
2582
  _OP_PARAMS = [
2583
    ("vg_name", None, _TMaybeString),
2584
    ("enabled_hypervisors", None,
2585
     _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2586
    ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2587
    ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2588
    ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2589
    ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2590
    ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2591
    ("uid_pool", None, _NoType),
2592
    ("add_uids", None, _NoType),
2593
    ("remove_uids", None, _NoType),
2594
    ("maintain_node_health", None, _TMaybeBool),
2595
    ("nicparams", None, _TOr(_TDict, _TNone)),
2596
    ("drbd_helper", None, _TOr(_TString, _TNone)),
2597
    ("default_iallocator", None, _TMaybeString),
2598
    ]
2599
  REQ_BGL = False
2600

    
2601
  def CheckArguments(self):
2602
    """Check parameters
2603

2604
    """
2605
    if self.op.uid_pool:
2606
      uidpool.CheckUidPool(self.op.uid_pool)
2607

    
2608
    if self.op.add_uids:
2609
      uidpool.CheckUidPool(self.op.add_uids)
2610

    
2611
    if self.op.remove_uids:
2612
      uidpool.CheckUidPool(self.op.remove_uids)
2613

    
2614
  def ExpandNames(self):
2615
    # FIXME: in the future maybe other cluster params won't require checking on
2616
    # all nodes to be modified.
2617
    self.needed_locks = {
2618
      locking.LEVEL_NODE: locking.ALL_SET,
2619
    }
2620
    self.share_locks[locking.LEVEL_NODE] = 1
2621

    
2622
  def BuildHooksEnv(self):
2623
    """Build hooks env.
2624

2625
    """
2626
    env = {
2627
      "OP_TARGET": self.cfg.GetClusterName(),
2628
      "NEW_VG_NAME": self.op.vg_name,
2629
      }
2630
    mn = self.cfg.GetMasterNode()
2631
    return env, [mn], [mn]
2632

    
2633
  def CheckPrereq(self):
2634
    """Check prerequisites.
2635

2636
    This checks whether the given params don't conflict and
2637
    if the given volume group is valid.
2638

2639
    """
2640
    if self.op.vg_name is not None and not self.op.vg_name:
2641
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2642
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2643
                                   " instances exist", errors.ECODE_INVAL)
2644

    
2645
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2646
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2647
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2648
                                   " drbd-based instances exist",
2649
                                   errors.ECODE_INVAL)
2650

    
2651
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2652

    
2653
    # if vg_name not None, checks given volume group on all nodes
2654
    if self.op.vg_name:
2655
      vglist = self.rpc.call_vg_list(node_list)
2656
      for node in node_list:
2657
        msg = vglist[node].fail_msg
2658
        if msg:
2659
          # ignoring down node
2660
          self.LogWarning("Error while gathering data on node %s"
2661
                          " (ignoring node): %s", node, msg)
2662
          continue
2663
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2664
                                              self.op.vg_name,
2665
                                              constants.MIN_VG_SIZE)
2666
        if vgstatus:
2667
          raise errors.OpPrereqError("Error on node '%s': %s" %
2668
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2669

    
2670
    if self.op.drbd_helper:
2671
      # checks given drbd helper on all nodes
2672
      helpers = self.rpc.call_drbd_helper(node_list)
2673
      for node in node_list:
2674
        ninfo = self.cfg.GetNodeInfo(node)
2675
        if ninfo.offline:
2676
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2677
          continue
2678
        msg = helpers[node].fail_msg
2679
        if msg:
2680
          raise errors.OpPrereqError("Error checking drbd helper on node"
2681
                                     " '%s': %s" % (node, msg),
2682
                                     errors.ECODE_ENVIRON)
2683
        node_helper = helpers[node].payload
2684
        if node_helper != self.op.drbd_helper:
2685
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2686
                                     (node, node_helper), errors.ECODE_ENVIRON)
2687

    
2688
    self.cluster = cluster = self.cfg.GetClusterInfo()
2689
    # validate params changes
2690
    if self.op.beparams:
2691
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2692
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2693

    
2694
    if self.op.nicparams:
2695
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2696
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2697
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2698
      nic_errors = []
2699

    
2700
      # check all instances for consistency
2701
      for instance in self.cfg.GetAllInstancesInfo().values():
2702
        for nic_idx, nic in enumerate(instance.nics):
2703
          params_copy = copy.deepcopy(nic.nicparams)
2704
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2705

    
2706
          # check parameter syntax
2707
          try:
2708
            objects.NIC.CheckParameterSyntax(params_filled)
2709
          except errors.ConfigurationError, err:
2710
            nic_errors.append("Instance %s, nic/%d: %s" %
2711
                              (instance.name, nic_idx, err))
2712

    
2713
          # if we're moving instances to routed, check that they have an ip
2714
          target_mode = params_filled[constants.NIC_MODE]
2715
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2716
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2717
                              (instance.name, nic_idx))
2718
      if nic_errors:
2719
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2720
                                   "\n".join(nic_errors))
2721

    
2722
    # hypervisor list/parameters
2723
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2724
    if self.op.hvparams:
2725
      for hv_name, hv_dict in self.op.hvparams.items():
2726
        if hv_name not in self.new_hvparams:
2727
          self.new_hvparams[hv_name] = hv_dict
2728
        else:
2729
          self.new_hvparams[hv_name].update(hv_dict)
2730

    
2731
    # os hypervisor parameters
2732
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2733
    if self.op.os_hvp:
2734
      for os_name, hvs in self.op.os_hvp.items():
2735
        if os_name not in self.new_os_hvp:
2736
          self.new_os_hvp[os_name] = hvs
2737
        else:
2738
          for hv_name, hv_dict in hvs.items():
2739
            if hv_name not in self.new_os_hvp[os_name]:
2740
              self.new_os_hvp[os_name][hv_name] = hv_dict
2741
            else:
2742
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2743

    
2744
    # os parameters
2745
    self.new_osp = objects.FillDict(cluster.osparams, {})
2746
    if self.op.osparams:
2747
      for os_name, osp in self.op.osparams.items():
2748
        if os_name not in self.new_osp:
2749
          self.new_osp[os_name] = {}
2750

    
2751
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2752
                                                  use_none=True)
2753

    
2754
        if not self.new_osp[os_name]:
2755
          # we removed all parameters
2756
          del self.new_osp[os_name]
2757
        else:
2758
          # check the parameter validity (remote check)
2759
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2760
                         os_name, self.new_osp[os_name])
2761

    
2762
    # changes to the hypervisor list
2763
    if self.op.enabled_hypervisors is not None:
2764
      self.hv_list = self.op.enabled_hypervisors
2765
      for hv in self.hv_list:
2766
        # if the hypervisor doesn't already exist in the cluster
2767
        # hvparams, we initialize it to empty, and then (in both
2768
        # cases) we make sure to fill the defaults, as we might not
2769
        # have a complete defaults list if the hypervisor wasn't
2770
        # enabled before
2771
        if hv not in new_hvp:
2772
          new_hvp[hv] = {}
2773
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2774
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2775
    else:
2776
      self.hv_list = cluster.enabled_hypervisors
2777

    
2778
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2779
      # either the enabled list has changed, or the parameters have, validate
2780
      for hv_name, hv_params in self.new_hvparams.items():
2781
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2782
            (self.op.enabled_hypervisors and
2783
             hv_name in self.op.enabled_hypervisors)):
2784
          # either this is a new hypervisor, or its parameters have changed
2785
          hv_class = hypervisor.GetHypervisor(hv_name)
2786
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2787
          hv_class.CheckParameterSyntax(hv_params)
2788
          _CheckHVParams(self, node_list, hv_name, hv_params)
2789

    
2790
    if self.op.os_hvp:
2791
      # no need to check any newly-enabled hypervisors, since the
2792
      # defaults have already been checked in the above code-block
2793
      for os_name, os_hvp in self.new_os_hvp.items():
2794
        for hv_name, hv_params in os_hvp.items():
2795
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2796
          # we need to fill in the new os_hvp on top of the actual hv_p
2797
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2798
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2799
          hv_class = hypervisor.GetHypervisor(hv_name)
2800
          hv_class.CheckParameterSyntax(new_osp)
2801
          _CheckHVParams(self, node_list, hv_name, new_osp)
2802

    
2803
    if self.op.default_iallocator:
2804
      alloc_script = utils.FindFile(self.op.default_iallocator,
2805
                                    constants.IALLOCATOR_SEARCH_PATH,
2806
                                    os.path.isfile)
2807
      if alloc_script is None:
2808
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2809
                                   " specified" % self.op.default_iallocator,
2810
                                   errors.ECODE_INVAL)
2811

    
2812
  def Exec(self, feedback_fn):
2813
    """Change the parameters of the cluster.
2814

2815
    """
2816
    if self.op.vg_name is not None:
2817
      new_volume = self.op.vg_name
2818
      if not new_volume:
2819
        new_volume = None
2820
      if new_volume != self.cfg.GetVGName():
2821
        self.cfg.SetVGName(new_volume)
2822
      else:
2823
        feedback_fn("Cluster LVM configuration already in desired"
2824
                    " state, not changing")
2825
    if self.op.drbd_helper is not None:
2826
      new_helper = self.op.drbd_helper
2827
      if not new_helper:
2828
        new_helper = None
2829
      if new_helper != self.cfg.GetDRBDHelper():
2830
        self.cfg.SetDRBDHelper(new_helper)
2831
      else:
2832
        feedback_fn("Cluster DRBD helper already in desired state,"
2833
                    " not changing")
2834
    if self.op.hvparams:
2835
      self.cluster.hvparams = self.new_hvparams
2836
    if self.op.os_hvp:
2837
      self.cluster.os_hvp = self.new_os_hvp
2838
    if self.op.enabled_hypervisors is not None:
2839
      self.cluster.hvparams = self.new_hvparams
2840
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2841
    if self.op.beparams:
2842
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2843
    if self.op.nicparams:
2844
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2845
    if self.op.osparams:
2846
      self.cluster.osparams = self.new_osp
2847

    
2848
    if self.op.candidate_pool_size is not None:
2849
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2850
      # we need to update the pool size here, otherwise the save will fail
2851
      _AdjustCandidatePool(self, [])
2852

    
2853
    if self.op.maintain_node_health is not None:
2854
      self.cluster.maintain_node_health = self.op.maintain_node_health
2855

    
2856
    if self.op.add_uids is not None:
2857
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2858

    
2859
    if self.op.remove_uids is not None:
2860
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2861

    
2862
    if self.op.uid_pool is not None:
2863
      self.cluster.uid_pool = self.op.uid_pool
2864

    
2865
    if self.op.default_iallocator is not None:
2866
      self.cluster.default_iallocator = self.op.default_iallocator
2867

    
2868
    self.cfg.Update(self.cluster, feedback_fn)
2869

    
2870

    
2871
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2872
  """Distribute additional files which are part of the cluster configuration.
2873

2874
  ConfigWriter takes care of distributing the config and ssconf files, but
2875
  there are more files which should be distributed to all nodes. This function
2876
  makes sure those are copied.
2877

2878
  @param lu: calling logical unit
2879
  @param additional_nodes: list of nodes not in the config to distribute to
2880

2881
  """
2882
  # 1. Gather target nodes
2883
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2884
  dist_nodes = lu.cfg.GetOnlineNodeList()
2885
  if additional_nodes is not None:
2886
    dist_nodes.extend(additional_nodes)
2887
  if myself.name in dist_nodes:
2888
    dist_nodes.remove(myself.name)
2889

    
2890
  # 2. Gather files to distribute
2891
  dist_files = set([constants.ETC_HOSTS,
2892
                    constants.SSH_KNOWN_HOSTS_FILE,
2893
                    constants.RAPI_CERT_FILE,
2894
                    constants.RAPI_USERS_FILE,
2895
                    constants.CONFD_HMAC_KEY,
2896
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2897
                   ])
2898

    
2899
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2900
  for hv_name in enabled_hypervisors:
2901
    hv_class = hypervisor.GetHypervisor(hv_name)
2902
    dist_files.update(hv_class.GetAncillaryFiles())
2903

    
2904
  # 3. Perform the files upload
2905
  for fname in dist_files:
2906
    if os.path.exists(fname):
2907
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2908
      for to_node, to_result in result.items():
2909
        msg = to_result.fail_msg
2910
        if msg:
2911
          msg = ("Copy of file %s to node %s failed: %s" %
2912
                 (fname, to_node, msg))
2913
          lu.proc.LogWarning(msg)
2914

    
2915

    
2916
class LURedistributeConfig(NoHooksLU):
2917
  """Force the redistribution of cluster configuration.
2918

2919
  This is a very simple LU.
2920

2921
  """
2922
  REQ_BGL = False
2923

    
2924
  def ExpandNames(self):
2925
    self.needed_locks = {
2926
      locking.LEVEL_NODE: locking.ALL_SET,
2927
    }
2928
    self.share_locks[locking.LEVEL_NODE] = 1
2929

    
2930
  def Exec(self, feedback_fn):
2931
    """Redistribute the configuration.
2932

2933
    """
2934
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2935
    _RedistributeAncillaryFiles(self)
2936

    
2937

    
2938
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2939
  """Sleep and poll for an instance's disk to sync.
2940

2941
  """
2942
  if not instance.disks or disks is not None and not disks:
2943
    return True
2944

    
2945
  disks = _ExpandCheckDisks(instance, disks)
2946

    
2947
  if not oneshot:
2948
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2949

    
2950
  node = instance.primary_node
2951

    
2952
  for dev in disks:
2953
    lu.cfg.SetDiskID(dev, node)
2954

    
2955
  # TODO: Convert to utils.Retry
2956

    
2957
  retries = 0
2958
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2959
  while True:
2960
    max_time = 0
2961
    done = True
2962
    cumul_degraded = False
2963
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2964
    msg = rstats.fail_msg
2965
    if msg:
2966
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2967
      retries += 1
2968
      if retries >= 10:
2969
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2970
                                 " aborting." % node)
2971
      time.sleep(6)
2972
      continue
2973
    rstats = rstats.payload
2974
    retries = 0
2975
    for i, mstat in enumerate(rstats):
2976
      if mstat is None:
2977
        lu.LogWarning("Can't compute data for node %s/%s",
2978
                           node, disks[i].iv_name)
2979
        continue
2980

    
2981
      cumul_degraded = (cumul_degraded or
2982
                        (mstat.is_degraded and mstat.sync_percent is None))
2983
      if mstat.sync_percent is not None:
2984
        done = False
2985
        if mstat.estimated_time is not None:
2986
          rem_time = ("%s remaining (estimated)" %
2987
                      utils.FormatSeconds(mstat.estimated_time))
2988
          max_time = mstat.estimated_time
2989
        else:
2990
          rem_time = "no time estimate"
2991
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2992
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
2993

    
2994
    # if we're done but degraded, let's do a few small retries, to
2995
    # make sure we see a stable and not transient situation; therefore
2996
    # we force restart of the loop
2997
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2998
      logging.info("Degraded disks found, %d retries left", degr_retries)
2999
      degr_retries -= 1
3000
      time.sleep(1)
3001
      continue
3002

    
3003
    if done or oneshot:
3004
      break
3005

    
3006
    time.sleep(min(60, max_time))
3007

    
3008
  if done:
3009
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3010
  return not cumul_degraded
3011

    
3012

    
3013
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3014
  """Check that mirrors are not degraded.
3015

3016
  The ldisk parameter, if True, will change the test from the
3017
  is_degraded attribute (which represents overall non-ok status for
3018
  the device(s)) to the ldisk (representing the local storage status).
3019

3020
  """
3021
  lu.cfg.SetDiskID(dev, node)
3022

    
3023
  result = True
3024

    
3025
  if on_primary or dev.AssembleOnSecondary():
3026
    rstats = lu.rpc.call_blockdev_find(node, dev)
3027
    msg = rstats.fail_msg
3028
    if msg:
3029
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3030
      result = False
3031
    elif not rstats.payload:
3032
      lu.LogWarning("Can't find disk on node %s", node)
3033
      result = False
3034
    else:
3035
      if ldisk:
3036
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3037
      else:
3038
        result = result and not rstats.payload.is_degraded
3039

    
3040
  if dev.children:
3041
    for child in dev.children:
3042
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3043

    
3044
  return result
3045

    
3046

    
3047
class LUDiagnoseOS(NoHooksLU):
3048
  """Logical unit for OS diagnose/query.
3049

3050
  """
3051
  _OP_PARAMS = [
3052
    _POutputFields,
3053
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3054
    ]
3055
  REQ_BGL = False
3056
  _FIELDS_STATIC = utils.FieldSet()
3057
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
3058
                                   "parameters", "api_versions")
3059

    
3060
  def CheckArguments(self):
3061
    if self.op.names:
3062
      raise errors.OpPrereqError("Selective OS query not supported",
3063
                                 errors.ECODE_INVAL)
3064

    
3065
    _CheckOutputFields(static=self._FIELDS_STATIC,
3066
                       dynamic=self._FIELDS_DYNAMIC,
3067
                       selected=self.op.output_fields)
3068

    
3069
  def ExpandNames(self):
3070
    # Lock all nodes, in shared mode
3071
    # Temporary removal of locks, should be reverted later
3072
    # TODO: reintroduce locks when they are lighter-weight
3073
    self.needed_locks = {}
3074
    #self.share_locks[locking.LEVEL_NODE] = 1
3075
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3076

    
3077
  @staticmethod
3078
  def _DiagnoseByOS(rlist):
3079
    """Remaps a per-node return list into an a per-os per-node dictionary
3080

3081
    @param rlist: a map with node names as keys and OS objects as values
3082

3083
    @rtype: dict
3084
    @return: a dictionary with osnames as keys and as value another
3085
        map, with nodes as keys and tuples of (path, status, diagnose,
3086
        variants, parameters, api_versions) as values, eg::
3087

3088
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3089
                                     (/srv/..., False, "invalid api")],
3090
                           "node2": [(/srv/..., True, "", [], [])]}
3091
          }
3092

3093
    """
3094
    all_os = {}
3095
    # we build here the list of nodes that didn't fail the RPC (at RPC
3096
    # level), so that nodes with a non-responding node daemon don't
3097
    # make all OSes invalid
3098
    good_nodes = [node_name for node_name in rlist
3099
                  if not rlist[node_name].fail_msg]
3100
    for node_name, nr in rlist.items():
3101
      if nr.fail_msg or not nr.payload:
3102
        continue
3103
      for (name, path, status, diagnose, variants,
3104
           params, api_versions) in nr.payload:
3105
        if name not in all_os:
3106
          # build a list of nodes for this os containing empty lists
3107
          # for each node in node_list
3108
          all_os[name] = {}
3109
          for nname in good_nodes:
3110
            all_os[name][nname] = []
3111
        # convert params from [name, help] to (name, help)
3112
        params = [tuple(v) for v in params]
3113
        all_os[name][node_name].append((path, status, diagnose,
3114
                                        variants, params, api_versions))
3115
    return all_os
3116

    
3117
  def Exec(self, feedback_fn):
3118
    """Compute the list of OSes.
3119

3120
    """
3121
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3122
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3123
    pol = self._DiagnoseByOS(node_data)
3124
    output = []
3125

    
3126
    for os_name, os_data in pol.items():
3127
      row = []
3128
      valid = True
3129
      (variants, params, api_versions) = null_state = (set(), set(), set())
3130
      for idx, osl in enumerate(os_data.values()):
3131
        valid = bool(valid and osl and osl[0][1])
3132
        if not valid:
3133
          (variants, params, api_versions) = null_state
3134
          break
3135
        node_variants, node_params, node_api = osl[0][3:6]
3136
        if idx == 0: # first entry
3137
          variants = set(node_variants)
3138
          params = set(node_params)
3139
          api_versions = set(node_api)
3140
        else: # keep consistency
3141
          variants.intersection_update(node_variants)
3142
          params.intersection_update(node_params)
3143
          api_versions.intersection_update(node_api)
3144

    
3145
      for field in self.op.output_fields:
3146
        if field == "name":
3147
          val = os_name
3148
        elif field == "valid":
3149
          val = valid
3150
        elif field == "node_status":
3151
          # this is just a copy of the dict
3152
          val = {}
3153
          for node_name, nos_list in os_data.items():
3154
            val[node_name] = nos_list
3155
        elif field == "variants":
3156
          val = list(variants)
3157
        elif field == "parameters":
3158
          val = list(params)
3159
        elif field == "api_versions":
3160
          val = list(api_versions)
3161
        else:
3162
          raise errors.ParameterError(field)
3163
        row.append(val)
3164
      output.append(row)
3165

    
3166
    return output
3167

    
3168

    
3169
class LURemoveNode(LogicalUnit):
3170
  """Logical unit for removing a node.
3171

3172
  """
3173
  HPATH = "node-remove"
3174
  HTYPE = constants.HTYPE_NODE
3175
  _OP_PARAMS = [
3176
    _PNodeName,
3177
    ]
3178

    
3179
  def BuildHooksEnv(self):
3180
    """Build hooks env.
3181

3182
    This doesn't run on the target node in the pre phase as a failed
3183
    node would then be impossible to remove.
3184

3185
    """
3186
    env = {
3187
      "OP_TARGET": self.op.node_name,
3188
      "NODE_NAME": self.op.node_name,
3189
      }
3190
    all_nodes = self.cfg.GetNodeList()
3191
    try:
3192
      all_nodes.remove(self.op.node_name)
3193
    except ValueError:
3194
      logging.warning("Node %s which is about to be removed not found"
3195
                      " in the all nodes list", self.op.node_name)
3196
    return env, all_nodes, all_nodes
3197

    
3198
  def CheckPrereq(self):
3199
    """Check prerequisites.
3200

3201
    This checks:
3202
     - the node exists in the configuration
3203
     - it does not have primary or secondary instances
3204
     - it's not the master
3205

3206
    Any errors are signaled by raising errors.OpPrereqError.
3207

3208
    """
3209
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3210
    node = self.cfg.GetNodeInfo(self.op.node_name)
3211
    assert node is not None
3212

    
3213
    instance_list = self.cfg.GetInstanceList()
3214

    
3215
    masternode = self.cfg.GetMasterNode()
3216
    if node.name == masternode:
3217
      raise errors.OpPrereqError("Node is the master node,"
3218
                                 " you need to failover first.",
3219
                                 errors.ECODE_INVAL)
3220

    
3221
    for instance_name in instance_list:
3222
      instance = self.cfg.GetInstanceInfo(instance_name)
3223
      if node.name in instance.all_nodes:
3224
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3225
                                   " please remove first." % instance_name,
3226
                                   errors.ECODE_INVAL)
3227
    self.op.node_name = node.name
3228
    self.node = node
3229

    
3230
  def Exec(self, feedback_fn):
3231
    """Removes the node from the cluster.
3232

3233
    """
3234
    node = self.node
3235
    logging.info("Stopping the node daemon and removing configs from node %s",
3236
                 node.name)
3237

    
3238
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3239

    
3240
    # Promote nodes to master candidate as needed
3241
    _AdjustCandidatePool(self, exceptions=[node.name])
3242
    self.context.RemoveNode(node.name)
3243

    
3244
    # Run post hooks on the node before it's removed
3245
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3246
    try:
3247
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3248
    except:
3249
      # pylint: disable-msg=W0702
3250
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3251

    
3252
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3253
    msg = result.fail_msg
3254
    if msg:
3255
      self.LogWarning("Errors encountered on the remote node while leaving"
3256
                      " the cluster: %s", msg)
3257

    
3258
    # Remove node from our /etc/hosts
3259
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3260
      # FIXME: this should be done via an rpc call to node daemon
3261
      utils.RemoveHostFromEtcHosts(node.name)
3262
      _RedistributeAncillaryFiles(self)
3263

    
3264

    
3265
class LUQueryNodes(NoHooksLU):
3266
  """Logical unit for querying nodes.
3267

3268
  """
3269
  # pylint: disable-msg=W0142
3270
  _OP_PARAMS = [
3271
    _POutputFields,
3272
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3273
    ("use_locking", False, _TBool),
3274
    ]
3275
  REQ_BGL = False
3276

    
3277
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3278
                    "master_candidate", "offline", "drained"]
3279

    
3280
  _FIELDS_DYNAMIC = utils.FieldSet(
3281
    "dtotal", "dfree",
3282
    "mtotal", "mnode", "mfree",
3283
    "bootid",
3284
    "ctotal", "cnodes", "csockets",
3285
    )
3286

    
3287
  _FIELDS_STATIC = utils.FieldSet(*[
3288
    "pinst_cnt", "sinst_cnt",
3289
    "pinst_list", "sinst_list",
3290
    "pip", "sip", "tags",
3291
    "master",
3292
    "role"] + _SIMPLE_FIELDS
3293
    )
3294

    
3295
  def CheckArguments(self):
3296
    _CheckOutputFields(static=self._FIELDS_STATIC,
3297
                       dynamic=self._FIELDS_DYNAMIC,
3298
                       selected=self.op.output_fields)
3299

    
3300
  def ExpandNames(self):
3301
    self.needed_locks = {}
3302
    self.share_locks[locking.LEVEL_NODE] = 1
3303

    
3304
    if self.op.names:
3305
      self.wanted = _GetWantedNodes(self, self.op.names)
3306
    else:
3307
      self.wanted = locking.ALL_SET
3308

    
3309
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3310
    self.do_locking = self.do_node_query and self.op.use_locking
3311
    if self.do_locking:
3312
      # if we don't request only static fields, we need to lock the nodes
3313
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
3314

    
3315
  def Exec(self, feedback_fn):
3316
    """Computes the list of nodes and their attributes.
3317

3318
    """
3319
    all_info = self.cfg.GetAllNodesInfo()
3320
    if self.do_locking:
3321
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
3322
    elif self.wanted != locking.ALL_SET:
3323
      nodenames = self.wanted
3324
      missing = set(nodenames).difference(all_info.keys())
3325
      if missing:
3326
        raise errors.OpExecError(
3327
          "Some nodes were removed before retrieving their data: %s" % missing)
3328
    else:
3329
      nodenames = all_info.keys()
3330

    
3331
    nodenames = utils.NiceSort(nodenames)
3332
    nodelist = [all_info[name] for name in nodenames]
3333

    
3334
    # begin data gathering
3335

    
3336
    if self.do_node_query:
3337
      live_data = {}
3338
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3339
                                          self.cfg.GetHypervisorType())
3340
      for name in nodenames:
3341
        nodeinfo = node_data[name]
3342
        if not nodeinfo.fail_msg and nodeinfo.payload:
3343
          nodeinfo = nodeinfo.payload
3344
          fn = utils.TryConvert
3345
          live_data[name] = {
3346
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3347
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3348
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
3349
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3350
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3351
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3352
            "bootid": nodeinfo.get('bootid', None),
3353
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3354
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3355
            }
3356
        else:
3357
          live_data[name] = {}
3358
    else:
3359
      live_data = dict.fromkeys(nodenames, {})
3360

    
3361
    node_to_primary = dict([(name, set()) for name in nodenames])
3362
    node_to_secondary = dict([(name, set()) for name in nodenames])
3363

    
3364
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3365
                             "sinst_cnt", "sinst_list"))
3366
    if inst_fields & frozenset(self.op.output_fields):
3367
      inst_data = self.cfg.GetAllInstancesInfo()
3368

    
3369
      for inst in inst_data.values():
3370
        if inst.primary_node in node_to_primary:
3371
          node_to_primary[inst.primary_node].add(inst.name)
3372
        for secnode in inst.secondary_nodes:
3373
          if secnode in node_to_secondary:
3374
            node_to_secondary[secnode].add(inst.name)
3375

    
3376
    master_node = self.cfg.GetMasterNode()
3377

    
3378
    # end data gathering
3379

    
3380
    output = []
3381
    for node in nodelist:
3382
      node_output = []
3383
      for field in self.op.output_fields:
3384
        if field in self._SIMPLE_FIELDS:
3385
          val = getattr(node, field)
3386
        elif field == "pinst_list":
3387
          val = list(node_to_primary[node.name])
3388
        elif field == "sinst_list":
3389
          val = list(node_to_secondary[node.name])
3390
        elif field == "pinst_cnt":
3391
          val = len(node_to_primary[node.name])
3392
        elif field == "sinst_cnt":
3393
          val = len(node_to_secondary[node.name])
3394
        elif field == "pip":
3395
          val = node.primary_ip
3396
        elif field == "sip":
3397
          val = node.secondary_ip
3398
        elif field == "tags":
3399
          val = list(node.GetTags())
3400
        elif field == "master":
3401
          val = node.name == master_node
3402
        elif self._FIELDS_DYNAMIC.Matches(field):
3403
          val = live_data[node.name].get(field, None)
3404
        elif field == "role":
3405
          if node.name == master_node:
3406
            val = "M"
3407
          elif node.master_candidate:
3408
            val = "C"
3409
          elif node.drained:
3410
            val = "D"
3411
          elif node.offline:
3412
            val = "O"
3413
          else:
3414
            val = "R"
3415
        else:
3416
          raise errors.ParameterError(field)
3417
        node_output.append(val)
3418
      output.append(node_output)
3419

    
3420
    return output
3421

    
3422

    
3423
class LUQueryNodeVolumes(NoHooksLU):
3424
  """Logical unit for getting volumes on node(s).
3425

3426
  """
3427
  _OP_PARAMS = [
3428
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3429
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3430
    ]
3431
  REQ_BGL = False
3432
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3433
  _FIELDS_STATIC = utils.FieldSet("node")
3434

    
3435
  def CheckArguments(self):
3436
    _CheckOutputFields(static=self._FIELDS_STATIC,
3437
                       dynamic=self._FIELDS_DYNAMIC,
3438
                       selected=self.op.output_fields)
3439

    
3440
  def ExpandNames(self):
3441
    self.needed_locks = {}
3442
    self.share_locks[locking.LEVEL_NODE] = 1
3443
    if not self.op.nodes:
3444
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3445
    else:
3446
      self.needed_locks[locking.LEVEL_NODE] = \
3447
        _GetWantedNodes(self, self.op.nodes)
3448

    
3449
  def Exec(self, feedback_fn):
3450
    """Computes the list of nodes and their attributes.
3451

3452
    """
3453
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3454
    volumes = self.rpc.call_node_volumes(nodenames)
3455

    
3456
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3457
             in self.cfg.GetInstanceList()]
3458

    
3459
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3460

    
3461
    output = []
3462
    for node in nodenames:
3463
      nresult = volumes[node]
3464
      if nresult.offline:
3465
        continue
3466
      msg = nresult.fail_msg
3467
      if msg:
3468
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3469
        continue
3470

    
3471
      node_vols = nresult.payload[:]
3472
      node_vols.sort(key=lambda vol: vol['dev'])
3473

    
3474
      for vol in node_vols:
3475
        node_output = []
3476
        for field in self.op.output_fields:
3477
          if field == "node":
3478
            val = node
3479
          elif field == "phys":
3480
            val = vol['dev']
3481
          elif field == "vg":
3482
            val = vol['vg']
3483
          elif field == "name":
3484
            val = vol['name']
3485
          elif field == "size":
3486
            val = int(float(vol['size']))
3487
          elif field == "instance":
3488
            for inst in ilist:
3489
              if node not in lv_by_node[inst]:
3490
                continue
3491
              if vol['name'] in lv_by_node[inst][node]:
3492
                val = inst.name
3493
                break
3494
            else:
3495
              val = '-'
3496
          else:
3497
            raise errors.ParameterError(field)
3498
          node_output.append(str(val))
3499

    
3500
        output.append(node_output)
3501

    
3502
    return output
3503

    
3504

    
3505
class LUQueryNodeStorage(NoHooksLU):
3506
  """Logical unit for getting information on storage units on node(s).
3507

3508
  """
3509
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3510
  _OP_PARAMS = [
3511
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3512
    ("storage_type", _NoDefault, _CheckStorageType),
3513
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3514
    ("name", None, _TMaybeString),
3515
    ]
3516
  REQ_BGL = False
3517

    
3518
  def CheckArguments(self):
3519
    _CheckOutputFields(static=self._FIELDS_STATIC,
3520
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3521
                       selected=self.op.output_fields)
3522

    
3523
  def ExpandNames(self):
3524
    self.needed_locks = {}
3525
    self.share_locks[locking.LEVEL_NODE] = 1
3526

    
3527
    if self.op.nodes:
3528
      self.needed_locks[locking.LEVEL_NODE] = \
3529
        _GetWantedNodes(self, self.op.nodes)
3530
    else:
3531
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3532

    
3533
  def Exec(self, feedback_fn):
3534
    """Computes the list of nodes and their attributes.
3535

3536
    """
3537
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3538

    
3539
    # Always get name to sort by
3540
    if constants.SF_NAME in self.op.output_fields:
3541
      fields = self.op.output_fields[:]
3542
    else:
3543
      fields = [constants.SF_NAME] + self.op.output_fields
3544

    
3545
    # Never ask for node or type as it's only known to the LU
3546
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3547
      while extra in fields:
3548
        fields.remove(extra)
3549

    
3550
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3551
    name_idx = field_idx[constants.SF_NAME]
3552

    
3553
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3554
    data = self.rpc.call_storage_list(self.nodes,
3555
                                      self.op.storage_type, st_args,
3556
                                      self.op.name, fields)
3557

    
3558
    result = []
3559

    
3560
    for node in utils.NiceSort(self.nodes):
3561
      nresult = data[node]
3562
      if nresult.offline:
3563
        continue
3564

    
3565
      msg = nresult.fail_msg
3566
      if msg:
3567
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3568
        continue
3569

    
3570
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3571

    
3572
      for name in utils.NiceSort(rows.keys()):
3573
        row = rows[name]
3574

    
3575
        out = []
3576

    
3577
        for field in self.op.output_fields:
3578
          if field == constants.SF_NODE:
3579
            val = node
3580
          elif field == constants.SF_TYPE:
3581
            val = self.op.storage_type
3582
          elif field in field_idx:
3583
            val = row[field_idx[field]]
3584
          else:
3585
            raise errors.ParameterError(field)
3586

    
3587
          out.append(val)
3588

    
3589
        result.append(out)
3590

    
3591
    return result
3592

    
3593

    
3594
class LUModifyNodeStorage(NoHooksLU):
3595
  """Logical unit for modifying a storage volume on a node.
3596

3597
  """
3598
  _OP_PARAMS = [
3599
    _PNodeName,
3600
    ("storage_type", _NoDefault, _CheckStorageType),
3601
    ("name", _NoDefault, _TNonEmptyString),
3602
    ("changes", _NoDefault, _TDict),
3603
    ]
3604
  REQ_BGL = False
3605

    
3606
  def CheckArguments(self):
3607
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3608

    
3609
    storage_type = self.op.storage_type
3610

    
3611
    try:
3612
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3613
    except KeyError:
3614
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3615
                                 " modified" % storage_type,
3616
                                 errors.ECODE_INVAL)
3617

    
3618
    diff = set(self.op.changes.keys()) - modifiable
3619
    if diff:
3620
      raise errors.OpPrereqError("The following fields can not be modified for"
3621
                                 " storage units of type '%s': %r" %
3622
                                 (storage_type, list(diff)),
3623
                                 errors.ECODE_INVAL)
3624

    
3625
  def ExpandNames(self):
3626
    self.needed_locks = {
3627
      locking.LEVEL_NODE: self.op.node_name,
3628
      }
3629

    
3630
  def Exec(self, feedback_fn):
3631
    """Computes the list of nodes and their attributes.
3632

3633
    """
3634
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3635
    result = self.rpc.call_storage_modify(self.op.node_name,
3636
                                          self.op.storage_type, st_args,
3637
                                          self.op.name, self.op.changes)
3638
    result.Raise("Failed to modify storage unit '%s' on %s" %
3639
                 (self.op.name, self.op.node_name))
3640

    
3641

    
3642
class LUAddNode(LogicalUnit):
3643
  """Logical unit for adding node to the cluster.
3644

3645
  """
3646
  HPATH = "node-add"
3647
  HTYPE = constants.HTYPE_NODE
3648
  _OP_PARAMS = [
3649
    _PNodeName,
3650
    ("primary_ip", None, _NoType),
3651
    ("secondary_ip", None, _TMaybeString),
3652
    ("readd", False, _TBool),
3653
    ]
3654

    
3655
  def CheckArguments(self):
3656
    # validate/normalize the node name
3657
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3658

    
3659
  def BuildHooksEnv(self):
3660
    """Build hooks env.
3661

3662
    This will run on all nodes before, and on all nodes + the new node after.
3663

3664
    """
3665
    env = {
3666
      "OP_TARGET": self.op.node_name,
3667
      "NODE_NAME": self.op.node_name,
3668
      "NODE_PIP": self.op.primary_ip,
3669
      "NODE_SIP": self.op.secondary_ip,
3670
      }
3671
    nodes_0 = self.cfg.GetNodeList()
3672
    nodes_1 = nodes_0 + [self.op.node_name, ]
3673
    return env, nodes_0, nodes_1
3674

    
3675
  def CheckPrereq(self):
3676
    """Check prerequisites.
3677

3678
    This checks:
3679
     - the new node is not already in the config
3680
     - it is resolvable
3681
     - its parameters (single/dual homed) matches the cluster
3682

3683
    Any errors are signaled by raising errors.OpPrereqError.
3684

3685
    """
3686
    node_name = self.op.node_name
3687
    cfg = self.cfg
3688

    
3689
    dns_data = utils.GetHostInfo(node_name)
3690

    
3691
    node = dns_data.name
3692
    primary_ip = self.op.primary_ip = dns_data.ip
3693
    if self.op.secondary_ip is None:
3694
      self.op.secondary_ip = primary_ip
3695
    if not utils.IsValidIP4(self.op.secondary_ip):
3696
      raise errors.OpPrereqError("Invalid secondary IP given",
3697
                                 errors.ECODE_INVAL)
3698
    secondary_ip = self.op.secondary_ip
3699

    
3700
    node_list = cfg.GetNodeList()
3701
    if not self.op.readd and node in node_list:
3702
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3703
                                 node, errors.ECODE_EXISTS)
3704
    elif self.op.readd and node not in node_list:
3705
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3706
                                 errors.ECODE_NOENT)
3707

    
3708
    self.changed_primary_ip = False
3709

    
3710
    for existing_node_name in node_list:
3711
      existing_node = cfg.GetNodeInfo(existing_node_name)
3712

    
3713
      if self.op.readd and node == existing_node_name:
3714
        if existing_node.secondary_ip != secondary_ip:
3715
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3716
                                     " address configuration as before",
3717
                                     errors.ECODE_INVAL)
3718
        if existing_node.primary_ip != primary_ip:
3719
          self.changed_primary_ip = True
3720

    
3721
        continue
3722

    
3723
      if (existing_node.primary_ip == primary_ip or
3724
          existing_node.secondary_ip == primary_ip or
3725
          existing_node.primary_ip == secondary_ip or
3726
          existing_node.secondary_ip == secondary_ip):
3727
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3728
                                   " existing node %s" % existing_node.name,
3729
                                   errors.ECODE_NOTUNIQUE)
3730

    
3731
    # check that the type of the node (single versus dual homed) is the
3732
    # same as for the master
3733
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3734
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3735
    newbie_singlehomed = secondary_ip == primary_ip
3736
    if master_singlehomed != newbie_singlehomed:
3737
      if master_singlehomed:
3738
        raise errors.OpPrereqError("The master has no private ip but the"
3739
                                   " new node has one",
3740
                                   errors.ECODE_INVAL)
3741
      else:
3742
        raise errors.OpPrereqError("The master has a private ip but the"
3743
                                   " new node doesn't have one",
3744
                                   errors.ECODE_INVAL)
3745

    
3746
    # checks reachability
3747
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3748
      raise errors.OpPrereqError("Node not reachable by ping",
3749
                                 errors.ECODE_ENVIRON)
3750

    
3751
    if not newbie_singlehomed:
3752
      # check reachability from my secondary ip to newbie's secondary ip
3753
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3754
                           source=myself.secondary_ip):
3755
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3756
                                   " based ping to noded port",
3757
                                   errors.ECODE_ENVIRON)
3758

    
3759
    if self.op.readd:
3760
      exceptions = [node]
3761
    else:
3762
      exceptions = []
3763

    
3764
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3765

    
3766
    if self.op.readd:
3767
      self.new_node = self.cfg.GetNodeInfo(node)
3768
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3769
    else:
3770
      self.new_node = objects.Node(name=node,
3771
                                   primary_ip=primary_ip,
3772
                                   secondary_ip=secondary_ip,
3773
                                   master_candidate=self.master_candidate,
3774
                                   offline=False, drained=False)
3775

    
3776
  def Exec(self, feedback_fn):
3777
    """Adds the new node to the cluster.
3778

3779
    """
3780
    new_node = self.new_node
3781
    node = new_node.name
3782

    
3783
    # for re-adds, reset the offline/drained/master-candidate flags;
3784
    # we need to reset here, otherwise offline would prevent RPC calls
3785
    # later in the procedure; this also means that if the re-add
3786
    # fails, we are left with a non-offlined, broken node
3787
    if self.op.readd:
3788
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3789
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3790
      # if we demote the node, we do cleanup later in the procedure
3791
      new_node.master_candidate = self.master_candidate
3792
      if self.changed_primary_ip:
3793
        new_node.primary_ip = self.op.primary_ip
3794

    
3795
    # notify the user about any possible mc promotion
3796
    if new_node.master_candidate:
3797
      self.LogInfo("Node will be a master candidate")
3798

    
3799
    # check connectivity
3800
    result = self.rpc.call_version([node])[node]
3801
    result.Raise("Can't get version information from node %s" % node)
3802
    if constants.PROTOCOL_VERSION == result.payload:
3803
      logging.info("Communication to node %s fine, sw version %s match",
3804
                   node, result.payload)
3805
    else:
3806
      raise errors.OpExecError("Version mismatch master version %s,"
3807
                               " node version %s" %
3808
                               (constants.PROTOCOL_VERSION, result.payload))
3809

    
3810
    # setup ssh on node
3811
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3812
      logging.info("Copy ssh key to node %s", node)
3813
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3814
      keyarray = []
3815
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3816
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3817
                  priv_key, pub_key]
3818

    
3819
      for i in keyfiles:
3820
        keyarray.append(utils.ReadFile(i))
3821

    
3822
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3823
                                      keyarray[2], keyarray[3], keyarray[4],
3824
                                      keyarray[5])
3825
      result.Raise("Cannot transfer ssh keys to the new node")
3826

    
3827
    # Add node to our /etc/hosts, and add key to known_hosts
3828
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3829
      # FIXME: this should be done via an rpc call to node daemon
3830
      utils.AddHostToEtcHosts(new_node.name)
3831

    
3832
    if new_node.secondary_ip != new_node.primary_ip:
3833
      result = self.rpc.call_node_has_ip_address(new_node.name,
3834
                                                 new_node.secondary_ip)
3835
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3836
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3837
      if not result.payload:
3838
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3839
                                 " you gave (%s). Please fix and re-run this"
3840
                                 " command." % new_node.secondary_ip)
3841

    
3842
    node_verify_list = [self.cfg.GetMasterNode()]
3843
    node_verify_param = {
3844
      constants.NV_NODELIST: [node],
3845
      # TODO: do a node-net-test as well?
3846
    }
3847

    
3848
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3849
                                       self.cfg.GetClusterName())
3850
    for verifier in node_verify_list:
3851
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3852
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3853
      if nl_payload:
3854
        for failed in nl_payload:
3855
          feedback_fn("ssh/hostname verification failed"
3856
                      " (checking from %s): %s" %
3857
                      (verifier, nl_payload[failed]))
3858
        raise errors.OpExecError("ssh/hostname verification failed.")
3859

    
3860
    if self.op.readd:
3861
      _RedistributeAncillaryFiles(self)
3862
      self.context.ReaddNode(new_node)
3863
      # make sure we redistribute the config
3864
      self.cfg.Update(new_node, feedback_fn)
3865
      # and make sure the new node will not have old files around
3866
      if not new_node.master_candidate:
3867
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3868
        msg = result.fail_msg
3869
        if msg:
3870
          self.LogWarning("Node failed to demote itself from master"
3871
                          " candidate status: %s" % msg)
3872
    else:
3873
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3874
      self.context.AddNode(new_node, self.proc.GetECId())
3875

    
3876

    
3877
class LUSetNodeParams(LogicalUnit):
3878
  """Modifies the parameters of a node.
3879

3880
  """
3881
  HPATH = "node-modify"
3882
  HTYPE = constants.HTYPE_NODE
3883
  _OP_PARAMS = [
3884
    _PNodeName,
3885
    ("master_candidate", None, _TMaybeBool),
3886
    ("offline", None, _TMaybeBool),
3887
    ("drained", None, _TMaybeBool),
3888
    ("auto_promote", False, _TBool),
3889
    _PForce,
3890
    ]
3891
  REQ_BGL = False
3892

    
3893
  def CheckArguments(self):
3894
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3895
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3896
    if all_mods.count(None) == 3:
3897
      raise errors.OpPrereqError("Please pass at least one modification",
3898
                                 errors.ECODE_INVAL)
3899
    if all_mods.count(True) > 1:
3900
      raise errors.OpPrereqError("Can't set the node into more than one"
3901
                                 " state at the same time",
3902
                                 errors.ECODE_INVAL)
3903

    
3904
    # Boolean value that tells us whether we're offlining or draining the node
3905
    self.offline_or_drain = (self.op.offline == True or
3906
                             self.op.drained == True)
3907
    self.deoffline_or_drain = (self.op.offline == False or
3908
                               self.op.drained == False)
3909
    self.might_demote = (self.op.master_candidate == False or
3910
                         self.offline_or_drain)
3911

    
3912
    self.lock_all = self.op.auto_promote and self.might_demote
3913

    
3914

    
3915
  def ExpandNames(self):
3916
    if self.lock_all:
3917
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3918
    else:
3919
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3920

    
3921
  def BuildHooksEnv(self):
3922
    """Build hooks env.
3923

3924
    This runs on the master node.
3925

3926
    """
3927
    env = {
3928
      "OP_TARGET": self.op.node_name,
3929
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3930
      "OFFLINE": str(self.op.offline),
3931
      "DRAINED": str(self.op.drained),
3932
      }
3933
    nl = [self.cfg.GetMasterNode(),
3934
          self.op.node_name]
3935
    return env, nl, nl
3936

    
3937
  def CheckPrereq(self):
3938
    """Check prerequisites.
3939

3940
    This only checks the instance list against the existing names.
3941

3942
    """
3943
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3944

    
3945
    if (self.op.master_candidate is not None or
3946
        self.op.drained is not None or
3947
        self.op.offline is not None):
3948
      # we can't change the master's node flags
3949
      if self.op.node_name == self.cfg.GetMasterNode():
3950
        raise errors.OpPrereqError("The master role can be changed"
3951
                                   " only via masterfailover",
3952
                                   errors.ECODE_INVAL)
3953

    
3954

    
3955
    if node.master_candidate and self.might_demote and not self.lock_all:
3956
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3957
      # check if after removing the current node, we're missing master
3958
      # candidates
3959
      (mc_remaining, mc_should, _) = \
3960
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3961
      if mc_remaining < mc_should:
3962
        raise errors.OpPrereqError("Not enough master candidates, please"
3963
                                   " pass auto_promote to allow promotion",
3964
                                   errors.ECODE_INVAL)
3965

    
3966
    if (self.op.master_candidate == True and
3967
        ((node.offline and not self.op.offline == False) or
3968
         (node.drained and not self.op.drained == False))):
3969
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3970
                                 " to master_candidate" % node.name,
3971
                                 errors.ECODE_INVAL)
3972

    
3973
    # If we're being deofflined/drained, we'll MC ourself if needed
3974
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3975
        self.op.master_candidate == True and not node.master_candidate):
3976
      self.op.master_candidate = _DecideSelfPromotion(self)
3977
      if self.op.master_candidate:
3978
        self.LogInfo("Autopromoting node to master candidate")
3979

    
3980
    return
3981

    
3982
  def Exec(self, feedback_fn):
3983
    """Modifies a node.
3984

3985
    """
3986
    node = self.node
3987

    
3988
    result = []
3989
    changed_mc = False
3990

    
3991
    if self.op.offline is not None:
3992
      node.offline = self.op.offline
3993
      result.append(("offline", str(self.op.offline)))
3994
      if self.op.offline == True:
3995
        if node.master_candidate:
3996
          node.master_candidate = False
3997
          changed_mc = True
3998
          result.append(("master_candidate", "auto-demotion due to offline"))
3999
        if node.drained:
4000
          node.drained = False
4001
          result.append(("drained", "clear drained status due to offline"))
4002

    
4003
    if self.op.master_candidate is not None:
4004
      node.master_candidate = self.op.master_candidate
4005
      changed_mc = True
4006
      result.append(("master_candidate", str(self.op.master_candidate)))
4007
      if self.op.master_candidate == False:
4008
        rrc = self.rpc.call_node_demote_from_mc(node.name)
4009
        msg = rrc.fail_msg
4010
        if msg:
4011
          self.LogWarning("Node failed to demote itself: %s" % msg)
4012

    
4013
    if self.op.drained is not None:
4014
      node.drained = self.op.drained
4015
      result.append(("drained", str(self.op.drained)))
4016
      if self.op.drained == True:
4017
        if node.master_candidate:
4018
          node.master_candidate = False
4019
          changed_mc = True
4020
          result.append(("master_candidate", "auto-demotion due to drain"))
4021
          rrc = self.rpc.call_node_demote_from_mc(node.name)
4022
          msg = rrc.fail_msg
4023
          if msg:
4024
            self.LogWarning("Node failed to demote itself: %s" % msg)
4025
        if node.offline:
4026
          node.offline = False
4027
          result.append(("offline", "clear offline status due to drain"))
4028

    
4029
    # we locked all nodes, we adjust the CP before updating this node
4030
    if self.lock_all:
4031
      _AdjustCandidatePool(self, [node.name])
4032

    
4033
    # this will trigger configuration file update, if needed
4034
    self.cfg.Update(node, feedback_fn)
4035

    
4036
    # this will trigger job queue propagation or cleanup
4037
    if changed_mc:
4038
      self.context.ReaddNode(node)
4039

    
4040
    return result
4041

    
4042

    
4043
class LUPowercycleNode(NoHooksLU):
4044
  """Powercycles a node.
4045

4046
  """
4047
  _OP_PARAMS = [
4048
    _PNodeName,
4049
    _PForce,
4050
    ]
4051
  REQ_BGL = False
4052

    
4053
  def CheckArguments(self):
4054
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4055
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4056
      raise errors.OpPrereqError("The node is the master and the force"
4057
                                 " parameter was not set",
4058
                                 errors.ECODE_INVAL)
4059

    
4060
  def ExpandNames(self):
4061
    """Locking for PowercycleNode.
4062

4063
    This is a last-resort option and shouldn't block on other
4064
    jobs. Therefore, we grab no locks.
4065

4066
    """
4067
    self.needed_locks = {}
4068

    
4069
  def Exec(self, feedback_fn):
4070
    """Reboots a node.
4071

4072
    """
4073
    result = self.rpc.call_node_powercycle(self.op.node_name,
4074
                                           self.cfg.GetHypervisorType())
4075
    result.Raise("Failed to schedule the reboot")
4076
    return result.payload
4077

    
4078

    
4079
class LUQueryClusterInfo(NoHooksLU):
4080
  """Query cluster configuration.
4081

4082
  """
4083
  REQ_BGL = False
4084

    
4085
  def ExpandNames(self):
4086
    self.needed_locks = {}
4087

    
4088
  def Exec(self, feedback_fn):
4089
    """Return cluster config.
4090

4091
    """
4092
    cluster = self.cfg.GetClusterInfo()
4093
    os_hvp = {}
4094

    
4095
    # Filter just for enabled hypervisors
4096
    for os_name, hv_dict in cluster.os_hvp.items():
4097
      os_hvp[os_name] = {}
4098
      for hv_name, hv_params in hv_dict.items():
4099
        if hv_name in cluster.enabled_hypervisors:
4100
          os_hvp[os_name][hv_name] = hv_params
4101

    
4102
    result = {
4103
      "software_version": constants.RELEASE_VERSION,
4104
      "protocol_version": constants.PROTOCOL_VERSION,
4105
      "config_version": constants.CONFIG_VERSION,
4106
      "os_api_version": max(constants.OS_API_VERSIONS),
4107
      "export_version": constants.EXPORT_VERSION,
4108
      "architecture": (platform.architecture()[0], platform.machine()),
4109
      "name": cluster.cluster_name,
4110
      "master": cluster.master_node,
4111
      "default_hypervisor": cluster.enabled_hypervisors[0],
4112
      "enabled_hypervisors": cluster.enabled_hypervisors,
4113
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4114
                        for hypervisor_name in cluster.enabled_hypervisors]),
4115
      "os_hvp": os_hvp,
4116
      "beparams": cluster.beparams,
4117
      "osparams": cluster.osparams,
4118
      "nicparams": cluster.nicparams,
4119
      "candidate_pool_size": cluster.candidate_pool_size,
4120
      "master_netdev": cluster.master_netdev,
4121
      "volume_group_name": cluster.volume_group_name,
4122
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4123
      "file_storage_dir": cluster.file_storage_dir,
4124
      "maintain_node_health": cluster.maintain_node_health,
4125
      "ctime": cluster.ctime,
4126
      "mtime": cluster.mtime,
4127
      "uuid": cluster.uuid,
4128
      "tags": list(cluster.GetTags()),
4129
      "uid_pool": cluster.uid_pool,
4130
      "default_iallocator": cluster.default_iallocator,
4131
      }
4132

    
4133
    return result
4134

    
4135

    
4136
class LUQueryConfigValues(NoHooksLU):
4137
  """Return configuration values.
4138

4139
  """
4140
  _OP_PARAMS = [_POutputFields]
4141
  REQ_BGL = False
4142
  _FIELDS_DYNAMIC = utils.FieldSet()
4143
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4144
                                  "watcher_pause")
4145

    
4146
  def CheckArguments(self):
4147
    _CheckOutputFields(static=self._FIELDS_STATIC,
4148
                       dynamic=self._FIELDS_DYNAMIC,
4149
                       selected=self.op.output_fields)
4150

    
4151
  def ExpandNames(self):
4152
    self.needed_locks = {}
4153

    
4154
  def Exec(self, feedback_fn):
4155
    """Dump a representation of the cluster config to the standard output.
4156

4157
    """
4158
    values = []
4159
    for field in self.op.output_fields:
4160
      if field == "cluster_name":
4161
        entry = self.cfg.GetClusterName()
4162
      elif field == "master_node":
4163
        entry = self.cfg.GetMasterNode()
4164
      elif field == "drain_flag":
4165
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4166
      elif field == "watcher_pause":
4167
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4168
      else:
4169
        raise errors.ParameterError(field)
4170
      values.append(entry)
4171
    return values
4172

    
4173

    
4174
class LUActivateInstanceDisks(NoHooksLU):
4175
  """Bring up an instance's disks.
4176

4177
  """
4178
  _OP_PARAMS = [
4179
    _PInstanceName,
4180
    ("ignore_size", False, _TBool),
4181
    ]
4182
  REQ_BGL = False
4183

    
4184
  def ExpandNames(self):
4185
    self._ExpandAndLockInstance()
4186
    self.needed_locks[locking.LEVEL_NODE] = []
4187
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4188

    
4189
  def DeclareLocks(self, level):
4190
    if level == locking.LEVEL_NODE:
4191
      self._LockInstancesNodes()
4192

    
4193
  def CheckPrereq(self):
4194
    """Check prerequisites.
4195

4196
    This checks that the instance is in the cluster.
4197

4198
    """
4199
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4200
    assert self.instance is not None, \
4201
      "Cannot retrieve locked instance %s" % self.op.instance_name
4202
    _CheckNodeOnline(self, self.instance.primary_node)
4203

    
4204
  def Exec(self, feedback_fn):
4205
    """Activate the disks.
4206

4207
    """
4208
    disks_ok, disks_info = \
4209
              _AssembleInstanceDisks(self, self.instance,
4210
                                     ignore_size=self.op.ignore_size)
4211
    if not disks_ok:
4212
      raise errors.OpExecError("Cannot activate block devices")
4213

    
4214
    return disks_info
4215

    
4216

    
4217
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4218
                           ignore_size=False):
4219
  """Prepare the block devices for an instance.
4220

4221
  This sets up the block devices on all nodes.
4222

4223
  @type lu: L{LogicalUnit}
4224
  @param lu: the logical unit on whose behalf we execute
4225
  @type instance: L{objects.Instance}
4226
  @param instance: the instance for whose disks we assemble
4227
  @type disks: list of L{objects.Disk} or None
4228
  @param disks: which disks to assemble (or all, if None)
4229
  @type ignore_secondaries: boolean
4230
  @param ignore_secondaries: if true, errors on secondary nodes
4231
      won't result in an error return from the function
4232
  @type ignore_size: boolean
4233
  @param ignore_size: if true, the current known size of the disk
4234
      will not be used during the disk activation, useful for cases
4235
      when the size is wrong
4236
  @return: False if the operation failed, otherwise a list of
4237
      (host, instance_visible_name, node_visible_name)
4238
      with the mapping from node devices to instance devices
4239

4240
  """
4241
  device_info = []
4242
  disks_ok = True
4243
  iname = instance.name
4244
  disks = _ExpandCheckDisks(instance, disks)
4245

    
4246
  # With the two passes mechanism we try to reduce the window of
4247
  # opportunity for the race condition of switching DRBD to primary
4248
  # before handshaking occured, but we do not eliminate it
4249

    
4250
  # The proper fix would be to wait (with some limits) until the
4251
  # connection has been made and drbd transitions from WFConnection
4252
  # into any other network-connected state (Connected, SyncTarget,
4253
  # SyncSource, etc.)
4254

    
4255
  # 1st pass, assemble on all nodes in secondary mode
4256
  for inst_disk in disks:
4257
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4258
      if ignore_size:
4259
        node_disk = node_disk.Copy()
4260
        node_disk.UnsetSize()
4261
      lu.cfg.SetDiskID(node_disk, node)
4262
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4263
      msg = result.fail_msg
4264
      if msg:
4265
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4266
                           " (is_primary=False, pass=1): %s",
4267
                           inst_disk.iv_name, node, msg)
4268
        if not ignore_secondaries:
4269
          disks_ok = False
4270

    
4271
  # FIXME: race condition on drbd migration to primary
4272

    
4273
  # 2nd pass, do only the primary node
4274
  for inst_disk in disks:
4275
    dev_path = None
4276

    
4277
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4278
      if node != instance.primary_node:
4279
        continue
4280
      if ignore_size:
4281
        node_disk = node_disk.Copy()
4282
        node_disk.UnsetSize()
4283
      lu.cfg.SetDiskID(node_disk, node)
4284
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4285
      msg = result.fail_msg
4286
      if msg:
4287
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4288
                           " (is_primary=True, pass=2): %s",
4289
                           inst_disk.iv_name, node, msg)
4290
        disks_ok = False
4291
      else:
4292
        dev_path = result.payload
4293

    
4294
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4295

    
4296
  # leave the disks configured for the primary node
4297
  # this is a workaround that would be fixed better by
4298
  # improving the logical/physical id handling
4299
  for disk in disks:
4300
    lu.cfg.SetDiskID(disk, instance.primary_node)
4301

    
4302
  return disks_ok, device_info
4303

    
4304

    
4305
def _StartInstanceDisks(lu, instance, force):
4306
  """Start the disks of an instance.
4307

4308
  """
4309
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4310
                                           ignore_secondaries=force)
4311
  if not disks_ok:
4312
    _ShutdownInstanceDisks(lu, instance)
4313
    if force is not None and not force:
4314
      lu.proc.LogWarning("", hint="If the message above refers to a"
4315
                         " secondary node,"
4316
                         " you can retry the operation using '--force'.")
4317
    raise errors.OpExecError("Disk consistency error")
4318

    
4319

    
4320
class LUDeactivateInstanceDisks(NoHooksLU):
4321
  """Shutdown an instance's disks.
4322

4323
  """
4324
  _OP_PARAMS = [
4325
    _PInstanceName,
4326
    ]
4327
  REQ_BGL = False
4328

    
4329
  def ExpandNames(self):
4330
    self._ExpandAndLockInstance()
4331
    self.needed_locks[locking.LEVEL_NODE] = []
4332
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4333

    
4334
  def DeclareLocks(self, level):
4335
    if level == locking.LEVEL_NODE:
4336
      self._LockInstancesNodes()
4337

    
4338
  def CheckPrereq(self):
4339
    """Check prerequisites.
4340

4341
    This checks that the instance is in the cluster.
4342

4343
    """
4344
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4345
    assert self.instance is not None, \
4346
      "Cannot retrieve locked instance %s" % self.op.instance_name
4347

    
4348
  def Exec(self, feedback_fn):
4349
    """Deactivate the disks
4350

4351
    """
4352
    instance = self.instance
4353
    _SafeShutdownInstanceDisks(self, instance)
4354

    
4355

    
4356
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4357
  """Shutdown block devices of an instance.
4358

4359
  This function checks if an instance is running, before calling
4360
  _ShutdownInstanceDisks.
4361

4362
  """
4363
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4364
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4365

    
4366

    
4367
def _ExpandCheckDisks(instance, disks):
4368
  """Return the instance disks selected by the disks list
4369

4370
  @type disks: list of L{objects.Disk} or None
4371
  @param disks: selected disks
4372
  @rtype: list of L{objects.Disk}
4373
  @return: selected instance disks to act on
4374

4375
  """
4376
  if disks is None:
4377
    return instance.disks
4378
  else:
4379
    if not set(disks).issubset(instance.disks):
4380
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4381
                                   " target instance")
4382
    return disks
4383

    
4384

    
4385
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4386
  """Shutdown block devices of an instance.
4387

4388
  This does the shutdown on all nodes of the instance.
4389

4390
  If the ignore_primary is false, errors on the primary node are
4391
  ignored.
4392

4393
  """
4394
  all_result = True
4395
  disks = _ExpandCheckDisks(instance, disks)
4396

    
4397
  for disk in disks:
4398
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4399
      lu.cfg.SetDiskID(top_disk, node)
4400
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4401
      msg = result.fail_msg
4402
      if msg:
4403
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4404
                      disk.iv_name, node, msg)
4405
        if not ignore_primary or node != instance.primary_node:
4406
          all_result = False
4407
  return all_result
4408

    
4409

    
4410
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4411
  """Checks if a node has enough free memory.
4412

4413
  This function check if a given node has the needed amount of free
4414
  memory. In case the node has less memory or we cannot get the
4415
  information from the node, this function raise an OpPrereqError
4416
  exception.
4417

4418
  @type lu: C{LogicalUnit}
4419
  @param lu: a logical unit from which we get configuration data
4420
  @type node: C{str}
4421
  @param node: the node to check
4422
  @type reason: C{str}
4423
  @param reason: string to use in the error message
4424
  @type requested: C{int}
4425
  @param requested: the amount of memory in MiB to check for
4426
  @type hypervisor_name: C{str}
4427
  @param hypervisor_name: the hypervisor to ask for memory stats
4428
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4429
      we cannot check the node
4430

4431
  """
4432
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4433
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4434
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4435
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4436
  if not isinstance(free_mem, int):
4437
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4438
                               " was '%s'" % (node, free_mem),
4439
                               errors.ECODE_ENVIRON)
4440
  if requested > free_mem:
4441
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4442
                               " needed %s MiB, available %s MiB" %
4443
                               (node, reason, requested, free_mem),
4444
                               errors.ECODE_NORES)
4445

    
4446

    
4447
def _CheckNodesFreeDisk(lu, nodenames, requested):
4448
  """Checks if nodes have enough free disk space in the default VG.
4449

4450
  This function check if all given nodes have the needed amount of
4451
  free disk. In case any node has less disk or we cannot get the
4452
  information from the node, this function raise an OpPrereqError
4453
  exception.
4454

4455
  @type lu: C{LogicalUnit}
4456
  @param lu: a logical unit from which we get configuration data
4457
  @type nodenames: C{list}
4458
  @param nodenames: the list of node names to check
4459
  @type requested: C{int}
4460
  @param requested: the amount of disk in MiB to check for
4461
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4462
      we cannot check the node
4463

4464
  """
4465
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4466
                                   lu.cfg.GetHypervisorType())
4467
  for node in nodenames:
4468
    info = nodeinfo[node]
4469
    info.Raise("Cannot get current information from node %s" % node,
4470
               prereq=True, ecode=errors.ECODE_ENVIRON)
4471
    vg_free = info.payload.get("vg_free", None)
4472
    if not isinstance(vg_free, int):
4473
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4474
                                 " result was '%s'" % (node, vg_free),
4475
                                 errors.ECODE_ENVIRON)
4476
    if requested > vg_free:
4477
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4478
                                 " required %d MiB, available %d MiB" %
4479
                                 (node, requested, vg_free),
4480
                                 errors.ECODE_NORES)
4481

    
4482

    
4483
class LUStartupInstance(LogicalUnit):
4484
  """Starts an instance.
4485

4486
  """
4487
  HPATH = "instance-start"
4488
  HTYPE = constants.HTYPE_INSTANCE
4489
  _OP_PARAMS = [
4490
    _PInstanceName,
4491
    _PForce,
4492
    ("hvparams", _EmptyDict, _TDict),
4493
    ("beparams", _EmptyDict, _TDict),
4494
    ]
4495
  REQ_BGL = False
4496

    
4497
  def CheckArguments(self):
4498
    # extra beparams
4499
    if self.op.beparams:
4500
      # fill the beparams dict
4501
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4502

    
4503
  def ExpandNames(self):
4504
    self._ExpandAndLockInstance()
4505

    
4506
  def BuildHooksEnv(self):
4507
    """Build hooks env.
4508

4509
    This runs on master, primary and secondary nodes of the instance.
4510

4511
    """
4512
    env = {
4513
      "FORCE": self.op.force,
4514
      }
4515
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4516
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4517
    return env, nl, nl
4518

    
4519
  def CheckPrereq(self):
4520
    """Check prerequisites.
4521

4522
    This checks that the instance is in the cluster.
4523

4524
    """
4525
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4526
    assert self.instance is not None, \
4527
      "Cannot retrieve locked instance %s" % self.op.instance_name
4528

    
4529
    # extra hvparams
4530
    if self.op.hvparams:
4531
      # check hypervisor parameter syntax (locally)
4532
      cluster = self.cfg.GetClusterInfo()
4533
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4534
      filled_hvp = cluster.FillHV(instance)
4535
      filled_hvp.update(self.op.hvparams)
4536
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4537
      hv_type.CheckParameterSyntax(filled_hvp)
4538
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4539

    
4540
    _CheckNodeOnline(self, instance.primary_node)
4541

    
4542
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4543
    # check bridges existence
4544
    _CheckInstanceBridgesExist(self, instance)
4545

    
4546
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4547
                                              instance.name,
4548
                                              instance.hypervisor)
4549
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4550
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4551
    if not remote_info.payload: # not running already
4552
      _CheckNodeFreeMemory(self, instance.primary_node,
4553
                           "starting instance %s" % instance.name,
4554
                           bep[constants.BE_MEMORY], instance.hypervisor)
4555

    
4556
  def Exec(self, feedback_fn):
4557
    """Start the instance.
4558

4559
    """
4560
    instance = self.instance
4561
    force = self.op.force
4562

    
4563
    self.cfg.MarkInstanceUp(instance.name)
4564

    
4565
    node_current = instance.primary_node
4566

    
4567
    _StartInstanceDisks(self, instance, force)
4568

    
4569
    result = self.rpc.call_instance_start(node_current, instance,
4570
                                          self.op.hvparams, self.op.beparams)
4571
    msg = result.fail_msg
4572
    if msg:
4573
      _ShutdownInstanceDisks(self, instance)
4574
      raise errors.OpExecError("Could not start instance: %s" % msg)
4575

    
4576

    
4577
class LURebootInstance(LogicalUnit):
4578
  """Reboot an instance.
4579

4580
  """
4581
  HPATH = "instance-reboot"
4582
  HTYPE = constants.HTYPE_INSTANCE
4583
  _OP_PARAMS = [
4584
    _PInstanceName,
4585
    ("ignore_secondaries", False, _TBool),
4586
    ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4587
    _PShutdownTimeout,
4588
    ]
4589
  REQ_BGL = False
4590

    
4591
  def ExpandNames(self):
4592
    self._ExpandAndLockInstance()
4593

    
4594
  def BuildHooksEnv(self):
4595
    """Build hooks env.
4596

4597
    This runs on master, primary and secondary nodes of the instance.
4598

4599
    """
4600
    env = {
4601
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4602
      "REBOOT_TYPE": self.op.reboot_type,
4603
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4604
      }
4605
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4606
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4607
    return env, nl, nl
4608

    
4609
  def CheckPrereq(self):
4610
    """Check prerequisites.
4611

4612
    This checks that the instance is in the cluster.
4613

4614
    """
4615
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4616
    assert self.instance is not None, \
4617
      "Cannot retrieve locked instance %s" % self.op.instance_name
4618

    
4619
    _CheckNodeOnline(self, instance.primary_node)
4620

    
4621
    # check bridges existence
4622
    _CheckInstanceBridgesExist(self, instance)
4623

    
4624
  def Exec(self, feedback_fn):
4625
    """Reboot the instance.
4626

4627
    """
4628
    instance = self.instance
4629
    ignore_secondaries = self.op.ignore_secondaries
4630
    reboot_type = self.op.reboot_type
4631

    
4632
    node_current = instance.primary_node
4633

    
4634
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4635
                       constants.INSTANCE_REBOOT_HARD]:
4636
      for disk in instance.disks:
4637
        self.cfg.SetDiskID(disk, node_current)
4638
      result = self.rpc.call_instance_reboot(node_current, instance,
4639
                                             reboot_type,
4640
                                             self.op.shutdown_timeout)
4641
      result.Raise("Could not reboot instance")
4642
    else:
4643
      result = self.rpc.call_instance_shutdown(node_current, instance,
4644
                                               self.op.shutdown_timeout)
4645
      result.Raise("Could not shutdown instance for full reboot")
4646
      _ShutdownInstanceDisks(self, instance)
4647
      _StartInstanceDisks(self, instance, ignore_secondaries)
4648
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4649
      msg = result.fail_msg
4650
      if msg:
4651
        _ShutdownInstanceDisks(self, instance)
4652
        raise errors.OpExecError("Could not start instance for"
4653
                                 " full reboot: %s" % msg)
4654

    
4655
    self.cfg.MarkInstanceUp(instance.name)
4656

    
4657

    
4658
class LUShutdownInstance(LogicalUnit):
4659
  """Shutdown an instance.
4660

4661
  """
4662
  HPATH = "instance-stop"
4663
  HTYPE = constants.HTYPE_INSTANCE
4664
  _OP_PARAMS = [
4665
    _PInstanceName,
4666
    ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4667
    ]
4668
  REQ_BGL = False
4669

    
4670
  def ExpandNames(self):
4671
    self._ExpandAndLockInstance()
4672

    
4673
  def BuildHooksEnv(self):
4674
    """Build hooks env.
4675

4676
    This runs on master, primary and secondary nodes of the instance.
4677

4678
    """
4679
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4680
    env["TIMEOUT"] = self.op.timeout
4681
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4682
    return env, nl, nl
4683

    
4684
  def CheckPrereq(self):
4685
    """Check prerequisites.
4686

4687
    This checks that the instance is in the cluster.
4688

4689
    """
4690
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4691
    assert self.instance is not None, \
4692
      "Cannot retrieve locked instance %s" % self.op.instance_name
4693
    _CheckNodeOnline(self, self.instance.primary_node)
4694

    
4695
  def Exec(self, feedback_fn):
4696
    """Shutdown the instance.
4697

4698
    """
4699
    instance = self.instance
4700
    node_current = instance.primary_node
4701
    timeout = self.op.timeout
4702
    self.cfg.MarkInstanceDown(instance.name)
4703
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4704
    msg = result.fail_msg
4705
    if msg:
4706
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4707

    
4708
    _ShutdownInstanceDisks(self, instance)
4709

    
4710

    
4711
class LUReinstallInstance(LogicalUnit):
4712
  """Reinstall an instance.
4713

4714
  """
4715
  HPATH = "instance-reinstall"
4716
  HTYPE = constants.HTYPE_INSTANCE
4717
  _OP_PARAMS = [
4718
    _PInstanceName,
4719
    ("os_type", None, _TMaybeString),
4720
    ("force_variant", False, _TBool),
4721
    ]
4722
  REQ_BGL = False
4723

    
4724
  def ExpandNames(self):
4725
    self._ExpandAndLockInstance()
4726

    
4727
  def BuildHooksEnv(self):
4728
    """Build hooks env.
4729

4730
    This runs on master, primary and secondary nodes of the instance.
4731

4732
    """
4733
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4734
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4735
    return env, nl, nl
4736

    
4737
  def CheckPrereq(self):
4738
    """Check prerequisites.
4739

4740
    This checks that the instance is in the cluster and is not running.
4741

4742
    """
4743
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4744
    assert instance is not None, \
4745
      "Cannot retrieve locked instance %s" % self.op.instance_name
4746
    _CheckNodeOnline(self, instance.primary_node)
4747

    
4748
    if instance.disk_template == constants.DT_DISKLESS:
4749
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4750
                                 self.op.instance_name,
4751
                                 errors.ECODE_INVAL)
4752
    _CheckInstanceDown(self, instance, "cannot reinstall")
4753

    
4754
    if self.op.os_type is not None:
4755
      # OS verification
4756
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4757
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4758

    
4759
    self.instance = instance
4760

    
4761
  def Exec(self, feedback_fn):
4762
    """Reinstall the instance.
4763

4764
    """
4765
    inst = self.instance
4766

    
4767
    if self.op.os_type is not None:
4768
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4769
      inst.os = self.op.os_type
4770
      self.cfg.Update(inst, feedback_fn)
4771

    
4772
    _StartInstanceDisks(self, inst, None)
4773
    try:
4774
      feedback_fn("Running the instance OS create scripts...")
4775
      # FIXME: pass debug option from opcode to backend
4776
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4777
                                             self.op.debug_level)
4778
      result.Raise("Could not install OS for instance %s on node %s" %
4779
                   (inst.name, inst.primary_node))
4780
    finally:
4781
      _ShutdownInstanceDisks(self, inst)
4782

    
4783

    
4784
class LURecreateInstanceDisks(LogicalUnit):
4785
  """Recreate an instance's missing disks.
4786

4787
  """
4788
  HPATH = "instance-recreate-disks"
4789
  HTYPE = constants.HTYPE_INSTANCE
4790
  _OP_PARAMS = [
4791
    _PInstanceName,
4792
    ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4793
    ]
4794
  REQ_BGL = False
4795

    
4796
  def ExpandNames(self):
4797
    self._ExpandAndLockInstance()
4798

    
4799
  def BuildHooksEnv(self):
4800
    """Build hooks env.
4801

4802
    This runs on master, primary and secondary nodes of the instance.
4803

4804
    """
4805
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4806
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4807
    return env, nl, nl
4808

    
4809
  def CheckPrereq(self):
4810
    """Check prerequisites.
4811

4812
    This checks that the instance is in the cluster and is not running.
4813

4814
    """
4815
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4816
    assert instance is not None, \
4817
      "Cannot retrieve locked instance %s" % self.op.instance_name
4818
    _CheckNodeOnline(self, instance.primary_node)
4819

    
4820
    if instance.disk_template == constants.DT_DISKLESS:
4821
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4822
                                 self.op.instance_name, errors.ECODE_INVAL)
4823
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4824

    
4825
    if not self.op.disks:
4826
      self.op.disks = range(len(instance.disks))
4827
    else:
4828
      for idx in self.op.disks:
4829
        if idx >= len(instance.disks):
4830
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4831
                                     errors.ECODE_INVAL)
4832

    
4833
    self.instance = instance
4834

    
4835
  def Exec(self, feedback_fn):
4836
    """Recreate the disks.
4837

4838
    """
4839
    to_skip = []
4840
    for idx, _ in enumerate(self.instance.disks):
4841
      if idx not in self.op.disks: # disk idx has not been passed in
4842
        to_skip.append(idx)
4843
        continue
4844

    
4845
    _CreateDisks(self, self.instance, to_skip=to_skip)
4846

    
4847

    
4848
class LURenameInstance(LogicalUnit):
4849
  """Rename an instance.
4850

4851
  """
4852
  HPATH = "instance-rename"
4853
  HTYPE = constants.HTYPE_INSTANCE
4854
  _OP_PARAMS = [
4855
    _PInstanceName,
4856
    ("new_name", _NoDefault, _TNonEmptyString),
4857
    ("ignore_ip", False, _TBool),
4858
    ("check_name", True, _TBool),
4859
    ]
4860

    
4861
  def BuildHooksEnv(self):
4862
    """Build hooks env.
4863

4864
    This runs on master, primary and secondary nodes of the instance.
4865

4866
    """
4867
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4868
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4869
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4870
    return env, nl, nl
4871

    
4872
  def CheckPrereq(self):
4873
    """Check prerequisites.
4874

4875
    This checks that the instance is in the cluster and is not running.
4876

4877
    """
4878
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4879
                                                self.op.instance_name)
4880
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4881
    assert instance is not None
4882
    _CheckNodeOnline(self, instance.primary_node)
4883
    _CheckInstanceDown(self, instance, "cannot rename")
4884
    self.instance = instance
4885

    
4886
    # new name verification
4887
    if self.op.check_name:
4888
      name_info = utils.GetHostInfo(self.op.new_name)
4889
      self.op.new_name = name_info.name
4890

    
4891
    new_name = self.op.new_name
4892

    
4893
    instance_list = self.cfg.GetInstanceList()
4894
    if new_name in instance_list:
4895
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4896
                                 new_name, errors.ECODE_EXISTS)
4897

    
4898
    if not self.op.ignore_ip:
4899
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4900
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4901
                                   (name_info.ip, new_name),
4902
                                   errors.ECODE_NOTUNIQUE)
4903

    
4904
  def Exec(self, feedback_fn):
4905
    """Reinstall the instance.
4906

4907
    """
4908
    inst = self.instance
4909
    old_name = inst.name
4910

    
4911
    if inst.disk_template == constants.DT_FILE:
4912
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4913

    
4914
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4915
    # Change the instance lock. This is definitely safe while we hold the BGL
4916
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4917
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4918

    
4919
    # re-read the instance from the configuration after rename
4920
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4921

    
4922
    if inst.disk_template == constants.DT_FILE:
4923
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4924
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4925
                                                     old_file_storage_dir,
4926
                                                     new_file_storage_dir)
4927
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4928
                   " (but the instance has been renamed in Ganeti)" %
4929
                   (inst.primary_node, old_file_storage_dir,
4930
                    new_file_storage_dir))
4931

    
4932
    _StartInstanceDisks(self, inst, None)
4933
    try:
4934
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4935
                                                 old_name, self.op.debug_level)
4936
      msg = result.fail_msg
4937
      if msg:
4938
        msg = ("Could not run OS rename script for instance %s on node %s"
4939
               " (but the instance has been renamed in Ganeti): %s" %
4940
               (inst.name, inst.primary_node, msg))
4941
        self.proc.LogWarning(msg)
4942
    finally:
4943
      _ShutdownInstanceDisks(self, inst)
4944

    
4945

    
4946
class LURemoveInstance(LogicalUnit):
4947
  """Remove an instance.
4948

4949
  """
4950
  HPATH = "instance-remove"
4951
  HTYPE = constants.HTYPE_INSTANCE
4952
  _OP_PARAMS = [
4953
    _PInstanceName,
4954
    ("ignore_failures", False, _TBool),
4955
    _PShutdownTimeout,
4956
    ]
4957
  REQ_BGL = False
4958

    
4959
  def ExpandNames(self):
4960
    self._ExpandAndLockInstance()
4961
    self.needed_locks[locking.LEVEL_NODE] = []
4962
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4963

    
4964
  def DeclareLocks(self, level):
4965
    if level == locking.LEVEL_NODE:
4966
      self._LockInstancesNodes()
4967

    
4968
  def BuildHooksEnv(self):
4969
    """Build hooks env.
4970

4971
    This runs on master, primary and secondary nodes of the instance.
4972

4973
    """
4974
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4975
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4976
    nl = [self.cfg.GetMasterNode()]
4977
    nl_post = list(self.instance.all_nodes) + nl
4978
    return env, nl, nl_post
4979

    
4980
  def CheckPrereq(self):
4981
    """Check prerequisites.
4982

4983
    This checks that the instance is in the cluster.
4984

4985
    """
4986
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4987
    assert self.instance is not None, \
4988
      "Cannot retrieve locked instance %s" % self.op.instance_name
4989

    
4990
  def Exec(self, feedback_fn):
4991
    """Remove the instance.
4992

4993
    """
4994
    instance = self.instance
4995
    logging.info("Shutting down instance %s on node %s",
4996
                 instance.name, instance.primary_node)
4997

    
4998
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4999
                                             self.op.shutdown_timeout)
5000
    msg = result.fail_msg
5001
    if msg:
5002
      if self.op.ignore_failures:
5003
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5004
      else:
5005
        raise errors.OpExecError("Could not shutdown instance %s on"
5006
                                 " node %s: %s" %
5007
                                 (instance.name, instance.primary_node, msg))
5008

    
5009
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5010

    
5011

    
5012
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5013
  """Utility function to remove an instance.
5014

5015
  """
5016
  logging.info("Removing block devices for instance %s", instance.name)
5017

    
5018
  if not _RemoveDisks(lu, instance):
5019
    if not ignore_failures:
5020
      raise errors.OpExecError("Can't remove instance's disks")
5021
    feedback_fn("Warning: can't remove instance's disks")
5022

    
5023
  logging.info("Removing instance %s out of cluster config", instance.name)
5024

    
5025
  lu.cfg.RemoveInstance(instance.name)
5026

    
5027
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5028
    "Instance lock removal conflict"
5029

    
5030
  # Remove lock for the instance
5031
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5032

    
5033

    
5034
class LUQueryInstances(NoHooksLU):
5035
  """Logical unit for querying instances.
5036

5037
  """
5038
  # pylint: disable-msg=W0142
5039
  _OP_PARAMS = [
5040
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5041
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5042
    ("use_locking", False, _TBool),
5043
    ]
5044
  REQ_BGL = False
5045
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5046
                    "serial_no", "ctime", "mtime", "uuid"]
5047
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5048
                                    "admin_state",
5049
                                    "disk_template", "ip", "mac", "bridge",
5050
                                    "nic_mode", "nic_link",
5051
                                    "sda_size", "sdb_size", "vcpus", "tags",
5052
                                    "network_port", "beparams",
5053
                                    r"(disk)\.(size)/([0-9]+)",
5054
                                    r"(disk)\.(sizes)", "disk_usage",
5055
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5056
                                    r"(nic)\.(bridge)/([0-9]+)",
5057
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
5058
                                    r"(disk|nic)\.(count)",
5059
                                    "hvparams",
5060
                                    ] + _SIMPLE_FIELDS +
5061
                                  ["hv/%s" % name
5062
                                   for name in constants.HVS_PARAMETERS
5063
                                   if name not in constants.HVC_GLOBALS] +
5064
                                  ["be/%s" % name
5065
                                   for name in constants.BES_PARAMETERS])
5066
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
5067

    
5068

    
5069
  def CheckArguments(self):
5070
    _CheckOutputFields(static=self._FIELDS_STATIC,
5071
                       dynamic=self._FIELDS_DYNAMIC,
5072
                       selected=self.op.output_fields)
5073

    
5074
  def ExpandNames(self):
5075
    self.needed_locks = {}
5076
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5077
    self.share_locks[locking.LEVEL_NODE] = 1
5078

    
5079
    if self.op.names:
5080
      self.wanted = _GetWantedInstances(self, self.op.names)
5081
    else:
5082
      self.wanted = locking.ALL_SET
5083

    
5084
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5085
    self.do_locking = self.do_node_query and self.op.use_locking
5086
    if self.do_locking:
5087
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5088
      self.needed_locks[locking.LEVEL_NODE] = []
5089
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5090

    
5091
  def DeclareLocks(self, level):
5092
    if level == locking.LEVEL_NODE and self.do_locking:
5093
      self._LockInstancesNodes()
5094

    
5095
  def Exec(self, feedback_fn):
5096
    """Computes the list of nodes and their attributes.
5097

5098
    """
5099
    # pylint: disable-msg=R0912
5100
    # way too many branches here
5101
    all_info = self.cfg.GetAllInstancesInfo()
5102
    if self.wanted == locking.ALL_SET:
5103
      # caller didn't specify instance names, so ordering is not important
5104
      if self.do_locking:
5105
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5106
      else:
5107
        instance_names = all_info.keys()
5108
      instance_names = utils.NiceSort(instance_names)
5109
    else:
5110
      # caller did specify names, so we must keep the ordering
5111
      if self.do_locking:
5112
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5113
      else:
5114
        tgt_set = all_info.keys()
5115
      missing = set(self.wanted).difference(tgt_set)
5116
      if missing:
5117
        raise errors.OpExecError("Some instances were removed before"
5118
                                 " retrieving their data: %s" % missing)
5119
      instance_names = self.wanted
5120

    
5121
    instance_list = [all_info[iname] for iname in instance_names]
5122

    
5123
    # begin data gathering
5124

    
5125
    nodes = frozenset([inst.primary_node for inst in instance_list])
5126
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
5127

    
5128
    bad_nodes = []
5129
    off_nodes = []
5130
    if self.do_node_query:
5131
      live_data = {}
5132
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5133
      for name in nodes:
5134
        result = node_data[name]
5135
        if result.offline:
5136
          # offline nodes will be in both lists
5137
          off_nodes.append(name)
5138
        if result.fail_msg:
5139
          bad_nodes.append(name)
5140
        else:
5141
          if result.payload:
5142
            live_data.update(result.payload)
5143
          # else no instance is alive
5144
    else:
5145
      live_data = dict([(name, {}) for name in instance_names])
5146

    
5147
    # end data gathering
5148

    
5149
    HVPREFIX = "hv/"
5150
    BEPREFIX = "be/"
5151
    output = []
5152
    cluster = self.cfg.GetClusterInfo()
5153
    for instance in instance_list:
5154
      iout = []
5155
      i_hv = cluster.FillHV(instance, skip_globals=True)
5156
      i_be = cluster.FillBE(instance)
5157
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5158
      for field in self.op.output_fields:
5159
        st_match = self._FIELDS_STATIC.Matches(field)
5160
        if field in self._SIMPLE_FIELDS:
5161
          val = getattr(instance, field)
5162
        elif field == "pnode":
5163
          val = instance.primary_node
5164
        elif field == "snodes":
5165
          val = list(instance.secondary_nodes)
5166
        elif field == "admin_state":
5167
          val = instance.admin_up
5168
        elif field == "oper_state":
5169
          if instance.primary_node in bad_nodes:
5170
            val = None
5171
          else:
5172
            val = bool(live_data.get(instance.name))
5173
        elif field == "status":
5174
          if instance.primary_node in off_nodes:
5175
            val = "ERROR_nodeoffline"
5176
          elif instance.primary_node in bad_nodes:
5177
            val = "ERROR_nodedown"
5178
          else:
5179
            running = bool(live_data.get(instance.name))
5180
            if running:
5181
              if instance.admin_up:
5182
                val = "running"
5183
              else:
5184
                val = "ERROR_up"
5185
            else:
5186
              if instance.admin_up:
5187
                val = "ERROR_down"
5188
              else:
5189
                val = "ADMIN_down"
5190
        elif field == "oper_ram":
5191
          if instance.primary_node in bad_nodes:
5192
            val = None
5193
          elif instance.name in live_data:
5194
            val = live_data[instance.name].get("memory", "?")
5195
          else:
5196
            val = "-"
5197
        elif field == "vcpus":
5198
          val = i_be[constants.BE_VCPUS]
5199
        elif field == "disk_template":
5200
          val = instance.disk_template
5201
        elif field == "ip":
5202
          if instance.nics:
5203
            val = instance.nics[0].ip
5204
          else:
5205
            val = None
5206
        elif field == "nic_mode":
5207
          if instance.nics:
5208
            val = i_nicp[0][constants.NIC_MODE]
5209
          else:
5210
            val = None
5211
        elif field == "nic_link":
5212
          if instance.nics:
5213
            val = i_nicp[0][constants.NIC_LINK]
5214
          else:
5215
            val = None
5216
        elif field == "bridge":
5217
          if (instance.nics and
5218
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5219
            val = i_nicp[0][constants.NIC_LINK]
5220
          else:
5221
            val = None
5222
        elif field == "mac":
5223
          if instance.nics:
5224
            val = instance.nics[0].mac
5225
          else:
5226
            val = None
5227
        elif field == "sda_size" or field == "sdb_size":
5228
          idx = ord(field[2]) - ord('a')
5229
          try:
5230
            val = instance.FindDisk(idx).size
5231
          except errors.OpPrereqError:
5232
            val = None
5233
        elif field == "disk_usage": # total disk usage per node
5234
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5235
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5236
        elif field == "tags":
5237
          val = list(instance.GetTags())
5238
        elif field == "hvparams":
5239
          val = i_hv
5240
        elif (field.startswith(HVPREFIX) and
5241
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5242
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5243
          val = i_hv.get(field[len(HVPREFIX):], None)
5244
        elif field == "beparams":
5245
          val = i_be
5246
        elif (field.startswith(BEPREFIX) and
5247
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5248
          val = i_be.get(field[len(BEPREFIX):], None)
5249
        elif st_match and st_match.groups():
5250
          # matches a variable list
5251
          st_groups = st_match.groups()
5252
          if st_groups and st_groups[0] == "disk":
5253
            if st_groups[1] == "count":
5254
              val = len(instance.disks)
5255
            elif st_groups[1] == "sizes":
5256
              val = [disk.size for disk in instance.disks]
5257
            elif st_groups[1] == "size":
5258
              try:
5259
                val = instance.FindDisk(st_groups[2]).size
5260
              except errors.OpPrereqError:
5261
                val = None
5262
            else:
5263
              assert False, "Unhandled disk parameter"
5264
          elif st_groups[0] == "nic":
5265
            if st_groups[1] == "count":
5266
              val = len(instance.nics)
5267
            elif st_groups[1] == "macs":
5268
              val = [nic.mac for nic in instance.nics]
5269
            elif st_groups[1] == "ips":
5270
              val = [nic.ip for nic in instance.nics]
5271
            elif st_groups[1] == "modes":
5272
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5273
            elif st_groups[1] == "links":
5274
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5275
            elif st_groups[1] == "bridges":
5276
              val = []
5277
              for nicp in i_nicp:
5278
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5279
                  val.append(nicp[constants.NIC_LINK])
5280
                else:
5281
                  val.append(None)
5282
            else:
5283
              # index-based item
5284
              nic_idx = int(st_groups[2])
5285
              if nic_idx >= len(instance.nics):
5286
                val = None
5287
              else:
5288
                if st_groups[1] == "mac":
5289
                  val = instance.nics[nic_idx].mac
5290
                elif st_groups[1] == "ip":
5291
                  val = instance.nics[nic_idx].ip
5292
                elif st_groups[1] == "mode":
5293
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5294
                elif st_groups[1] == "link":
5295
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5296
                elif st_groups[1] == "bridge":
5297
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5298
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5299
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5300
                  else:
5301
                    val = None
5302
                else:
5303
                  assert False, "Unhandled NIC parameter"
5304
          else:
5305
            assert False, ("Declared but unhandled variable parameter '%s'" %
5306
                           field)
5307
        else:
5308
          assert False, "Declared but unhandled parameter '%s'" % field
5309
        iout.append(val)
5310
      output.append(iout)
5311

    
5312
    return output
5313

    
5314

    
5315
class LUFailoverInstance(LogicalUnit):
5316
  """Failover an instance.
5317

5318
  """
5319
  HPATH = "instance-failover"
5320
  HTYPE = constants.HTYPE_INSTANCE
5321
  _OP_PARAMS = [
5322
    _PInstanceName,
5323
    ("ignore_consistency", False, _TBool),
5324
    _PShutdownTimeout,
5325
    ]
5326
  REQ_BGL = False
5327

    
5328
  def ExpandNames(self):
5329
    self._ExpandAndLockInstance()
5330
    self.needed_locks[locking.LEVEL_NODE] = []
5331
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5332

    
5333
  def DeclareLocks(self, level):
5334
    if level == locking.LEVEL_NODE:
5335
      self._LockInstancesNodes()
5336

    
5337
  def BuildHooksEnv(self):
5338
    """Build hooks env.
5339

5340
    This runs on master, primary and secondary nodes of the instance.
5341

5342
    """
5343
    instance = self.instance
5344
    source_node = instance.primary_node
5345
    target_node = instance.secondary_nodes[0]
5346
    env = {
5347
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5348
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5349
      "OLD_PRIMARY": source_node,
5350
      "OLD_SECONDARY": target_node,
5351
      "NEW_PRIMARY": target_node,
5352
      "NEW_SECONDARY": source_node,
5353
      }
5354
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5355
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5356
    nl_post = list(nl)
5357
    nl_post.append(source_node)
5358
    return env, nl, nl_post
5359

    
5360
  def CheckPrereq(self):
5361
    """Check prerequisites.
5362

5363
    This checks that the instance is in the cluster.
5364

5365
    """
5366
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5367
    assert self.instance is not None, \
5368
      "Cannot retrieve locked instance %s" % self.op.instance_name
5369

    
5370
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5371
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5372
      raise errors.OpPrereqError("Instance's disk layout is not"
5373
                                 " network mirrored, cannot failover.",
5374
                                 errors.ECODE_STATE)
5375

    
5376
    secondary_nodes = instance.secondary_nodes
5377
    if not secondary_nodes:
5378
      raise errors.ProgrammerError("no secondary node but using "
5379
                                   "a mirrored disk template")
5380

    
5381
    target_node = secondary_nodes[0]
5382
    _CheckNodeOnline(self, target_node)
5383
    _CheckNodeNotDrained(self, target_node)
5384
    if instance.admin_up:
5385
      # check memory requirements on the secondary node
5386
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5387
                           instance.name, bep[constants.BE_MEMORY],
5388
                           instance.hypervisor)
5389
    else:
5390
      self.LogInfo("Not checking memory on the secondary node as"
5391
                   " instance will not be started")
5392

    
5393
    # check bridge existance
5394
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5395

    
5396
  def Exec(self, feedback_fn):
5397
    """Failover an instance.
5398

5399
    The failover is done by shutting it down on its present node and
5400
    starting it on the secondary.
5401

5402
    """
5403
    instance = self.instance
5404

    
5405
    source_node = instance.primary_node
5406
    target_node = instance.secondary_nodes[0]
5407

    
5408
    if instance.admin_up:
5409
      feedback_fn("* checking disk consistency between source and target")
5410
      for dev in instance.disks:
5411
        # for drbd, these are drbd over lvm
5412
        if not _CheckDiskConsistency(self, dev, target_node, False):
5413
          if not self.op.ignore_consistency:
5414
            raise errors.OpExecError("Disk %s is degraded on target node,"
5415
                                     " aborting failover." % dev.iv_name)
5416
    else:
5417
      feedback_fn("* not checking disk consistency as instance is not running")
5418

    
5419
    feedback_fn("* shutting down instance on source node")
5420
    logging.info("Shutting down instance %s on node %s",
5421
                 instance.name, source_node)
5422

    
5423
    result = self.rpc.call_instance_shutdown(source_node, instance,
5424
                                             self.op.shutdown_timeout)
5425
    msg = result.fail_msg
5426
    if msg:
5427
      if self.op.ignore_consistency:
5428
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5429
                             " Proceeding anyway. Please make sure node"
5430
                             " %s is down. Error details: %s",
5431
                             instance.name, source_node, source_node, msg)
5432
      else:
5433
        raise errors.OpExecError("Could not shutdown instance %s on"
5434
                                 " node %s: %s" %
5435
                                 (instance.name, source_node, msg))
5436

    
5437
    feedback_fn("* deactivating the instance's disks on source node")
5438
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5439
      raise errors.OpExecError("Can't shut down the instance's disks.")
5440

    
5441
    instance.primary_node = target_node
5442
    # distribute new instance config to the other nodes
5443
    self.cfg.Update(instance, feedback_fn)
5444

    
5445
    # Only start the instance if it's marked as up
5446
    if instance.admin_up:
5447
      feedback_fn("* activating the instance's disks on target node")
5448
      logging.info("Starting instance %s on node %s",
5449
                   instance.name, target_node)
5450

    
5451
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5452
                                           ignore_secondaries=True)
5453
      if not disks_ok:
5454
        _ShutdownInstanceDisks(self, instance)
5455
        raise errors.OpExecError("Can't activate the instance's disks")
5456

    
5457
      feedback_fn("* starting the instance on the target node")
5458
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5459
      msg = result.fail_msg
5460
      if msg:
5461
        _ShutdownInstanceDisks(self, instance)
5462
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5463
                                 (instance.name, target_node, msg))
5464

    
5465

    
5466
class LUMigrateInstance(LogicalUnit):
5467
  """Migrate an instance.
5468

5469
  This is migration without shutting down, compared to the failover,
5470
  which is done with shutdown.
5471

5472
  """
5473
  HPATH = "instance-migrate"
5474
  HTYPE = constants.HTYPE_INSTANCE
5475
  _OP_PARAMS = [
5476
    _PInstanceName,
5477
    ("live", True, _TBool),
5478
    ("cleanup", False, _TBool),
5479
    ]
5480

    
5481
  REQ_BGL = False
5482

    
5483
  def ExpandNames(self):
5484
    self._ExpandAndLockInstance()
5485

    
5486
    self.needed_locks[locking.LEVEL_NODE] = []
5487
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5488

    
5489
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5490
                                       self.op.live, self.op.cleanup)
5491
    self.tasklets = [self._migrater]
5492

    
5493
  def DeclareLocks(self, level):
5494
    if level == locking.LEVEL_NODE:
5495
      self._LockInstancesNodes()
5496

    
5497
  def BuildHooksEnv(self):
5498
    """Build hooks env.
5499

5500
    This runs on master, primary and secondary nodes of the instance.
5501

5502
    """
5503
    instance = self._migrater.instance
5504
    source_node = instance.primary_node
5505
    target_node = instance.secondary_nodes[0]
5506
    env = _BuildInstanceHookEnvByObject(self, instance)
5507
    env["MIGRATE_LIVE"] = self.op.live
5508
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5509
    env.update({
5510
        "OLD_PRIMARY": source_node,
5511
        "OLD_SECONDARY": target_node,
5512
        "NEW_PRIMARY": target_node,
5513
        "NEW_SECONDARY": source_node,
5514
        })
5515
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5516
    nl_post = list(nl)
5517
    nl_post.append(source_node)
5518
    return env, nl, nl_post
5519

    
5520

    
5521
class LUMoveInstance(LogicalUnit):
5522
  """Move an instance by data-copying.
5523

5524
  """
5525
  HPATH = "instance-move"
5526
  HTYPE = constants.HTYPE_INSTANCE
5527
  _OP_PARAMS = [
5528
    _PInstanceName,
5529
    ("target_node", _NoDefault, _TNonEmptyString),
5530
    _PShutdownTimeout,
5531
    ]
5532
  REQ_BGL = False
5533

    
5534
  def ExpandNames(self):
5535
    self._ExpandAndLockInstance()
5536
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5537
    self.op.target_node = target_node
5538
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5539
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5540

    
5541
  def DeclareLocks(self, level):
5542
    if level == locking.LEVEL_NODE:
5543
      self._LockInstancesNodes(primary_only=True)
5544

    
5545
  def BuildHooksEnv(self):
5546
    """Build hooks env.
5547

5548
    This runs on master, primary and secondary nodes of the instance.
5549

5550
    """
5551
    env = {
5552
      "TARGET_NODE": self.op.target_node,
5553
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5554
      }
5555
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5556
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5557
                                       self.op.target_node]
5558
    return env, nl, nl
5559

    
5560
  def CheckPrereq(self):
5561
    """Check prerequisites.
5562

5563
    This checks that the instance is in the cluster.
5564

5565
    """
5566
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5567
    assert self.instance is not None, \
5568
      "Cannot retrieve locked instance %s" % self.op.instance_name
5569

    
5570
    node = self.cfg.GetNodeInfo(self.op.target_node)
5571
    assert node is not None, \
5572
      "Cannot retrieve locked node %s" % self.op.target_node
5573

    
5574
    self.target_node = target_node = node.name
5575

    
5576
    if target_node == instance.primary_node:
5577
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5578
                                 (instance.name, target_node),
5579
                                 errors.ECODE_STATE)
5580

    
5581
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5582

    
5583
    for idx, dsk in enumerate(instance.disks):
5584
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5585
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5586
                                   " cannot copy" % idx, errors.ECODE_STATE)
5587

    
5588
    _CheckNodeOnline(self, target_node)
5589
    _CheckNodeNotDrained(self, target_node)
5590

    
5591
    if instance.admin_up:
5592
      # check memory requirements on the secondary node
5593
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5594
                           instance.name, bep[constants.BE_MEMORY],
5595
                           instance.hypervisor)
5596
    else:
5597
      self.LogInfo("Not checking memory on the secondary node as"
5598
                   " instance will not be started")
5599

    
5600
    # check bridge existance
5601
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5602

    
5603
  def Exec(self, feedback_fn):
5604
    """Move an instance.
5605

5606
    The move is done by shutting it down on its present node, copying
5607
    the data over (slow) and starting it on the new node.
5608

5609
    """
5610
    instance = self.instance
5611

    
5612
    source_node = instance.primary_node
5613
    target_node = self.target_node
5614

    
5615
    self.LogInfo("Shutting down instance %s on source node %s",
5616
                 instance.name, source_node)
5617

    
5618
    result = self.rpc.call_instance_shutdown(source_node, instance,
5619
                                             self.op.shutdown_timeout)
5620
    msg = result.fail_msg
5621
    if msg:
5622
      if self.op.ignore_consistency:
5623
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5624
                             " Proceeding anyway. Please make sure node"
5625
                             " %s is down. Error details: %s",
5626
                             instance.name, source_node, source_node, msg)
5627
      else:
5628
        raise errors.OpExecError("Could not shutdown instance %s on"
5629
                                 " node %s: %s" %
5630
                                 (instance.name, source_node, msg))
5631

    
5632
    # create the target disks
5633
    try:
5634
      _CreateDisks(self, instance, target_node=target_node)
5635
    except errors.OpExecError:
5636
      self.LogWarning("Device creation failed, reverting...")
5637
      try:
5638
        _RemoveDisks(self, instance, target_node=target_node)
5639
      finally:
5640
        self.cfg.ReleaseDRBDMinors(instance.name)
5641
        raise
5642

    
5643
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5644

    
5645
    errs = []
5646
    # activate, get path, copy the data over
5647
    for idx, disk in enumerate(instance.disks):
5648
      self.LogInfo("Copying data for disk %d", idx)
5649
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5650
                                               instance.name, True)
5651
      if result.fail_msg:
5652
        self.LogWarning("Can't assemble newly created disk %d: %s",
5653
                        idx, result.fail_msg)
5654
        errs.append(result.fail_msg)
5655
        break
5656
      dev_path = result.payload
5657
      result = self.rpc.call_blockdev_export(source_node, disk,
5658
                                             target_node, dev_path,
5659
                                             cluster_name)
5660
      if result.fail_msg:
5661
        self.LogWarning("Can't copy data over for disk %d: %s",
5662
                        idx, result.fail_msg)
5663
        errs.append(result.fail_msg)
5664
        break
5665

    
5666
    if errs:
5667
      self.LogWarning("Some disks failed to copy, aborting")
5668
      try:
5669
        _RemoveDisks(self, instance, target_node=target_node)
5670
      finally:
5671
        self.cfg.ReleaseDRBDMinors(instance.name)
5672
        raise errors.OpExecError("Errors during disk copy: %s" %
5673
                                 (",".join(errs),))
5674

    
5675
    instance.primary_node = target_node
5676
    self.cfg.Update(instance, feedback_fn)
5677

    
5678
    self.LogInfo("Removing the disks on the original node")
5679
    _RemoveDisks(self, instance, target_node=source_node)
5680

    
5681
    # Only start the instance if it's marked as up
5682
    if instance.admin_up:
5683
      self.LogInfo("Starting instance %s on node %s",
5684
                   instance.name, target_node)
5685

    
5686
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5687
                                           ignore_secondaries=True)
5688
      if not disks_ok:
5689
        _ShutdownInstanceDisks(self, instance)
5690
        raise errors.OpExecError("Can't activate the instance's disks")
5691

    
5692
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5693
      msg = result.fail_msg
5694
      if msg:
5695
        _ShutdownInstanceDisks(self, instance)
5696
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5697
                                 (instance.name, target_node, msg))
5698

    
5699

    
5700
class LUMigrateNode(LogicalUnit):
5701
  """Migrate all instances from a node.
5702

5703
  """
5704
  HPATH = "node-migrate"
5705
  HTYPE = constants.HTYPE_NODE
5706
  _OP_PARAMS = [
5707
    _PNodeName,
5708
    ("live", False, _TBool),
5709
    ]
5710
  REQ_BGL = False
5711

    
5712
  def ExpandNames(self):
5713
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5714

    
5715
    self.needed_locks = {
5716
      locking.LEVEL_NODE: [self.op.node_name],
5717
      }
5718

    
5719
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5720

    
5721
    # Create tasklets for migrating instances for all instances on this node
5722
    names = []
5723
    tasklets = []
5724

    
5725
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5726
      logging.debug("Migrating instance %s", inst.name)
5727
      names.append(inst.name)
5728

    
5729
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5730

    
5731
    self.tasklets = tasklets
5732

    
5733
    # Declare instance locks
5734
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5735

    
5736
  def DeclareLocks(self, level):
5737
    if level == locking.LEVEL_NODE:
5738
      self._LockInstancesNodes()
5739

    
5740
  def BuildHooksEnv(self):
5741
    """Build hooks env.
5742

5743
    This runs on the master, the primary and all the secondaries.
5744

5745
    """
5746
    env = {
5747
      "NODE_NAME": self.op.node_name,
5748
      }
5749

    
5750
    nl = [self.cfg.GetMasterNode()]
5751

    
5752
    return (env, nl, nl)
5753

    
5754

    
5755
class TLMigrateInstance(Tasklet):
5756
  def __init__(self, lu, instance_name, live, cleanup):
5757
    """Initializes this class.
5758

5759
    """
5760
    Tasklet.__init__(self, lu)
5761

    
5762
    # Parameters
5763
    self.instance_name = instance_name
5764
    self.live = live
5765
    self.cleanup = cleanup
5766

    
5767
  def CheckPrereq(self):
5768
    """Check prerequisites.
5769

5770
    This checks that the instance is in the cluster.
5771

5772
    """
5773
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5774
    instance = self.cfg.GetInstanceInfo(instance_name)
5775
    assert instance is not None
5776

    
5777
    if instance.disk_template != constants.DT_DRBD8:
5778
      raise errors.OpPrereqError("Instance's disk layout is not"
5779
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5780

    
5781
    secondary_nodes = instance.secondary_nodes
5782
    if not secondary_nodes:
5783
      raise errors.ConfigurationError("No secondary node but using"
5784
                                      " drbd8 disk template")
5785

    
5786
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5787

    
5788
    target_node = secondary_nodes[0]
5789
    # check memory requirements on the secondary node
5790
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5791
                         instance.name, i_be[constants.BE_MEMORY],
5792
                         instance.hypervisor)
5793

    
5794
    # check bridge existance
5795
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5796

    
5797
    if not self.cleanup:
5798
      _CheckNodeNotDrained(self.lu, target_node)
5799
      result = self.rpc.call_instance_migratable(instance.primary_node,
5800
                                                 instance)
5801
      result.Raise("Can't migrate, please use failover",
5802
                   prereq=True, ecode=errors.ECODE_STATE)
5803

    
5804
    self.instance = instance
5805

    
5806
  def _WaitUntilSync(self):
5807
    """Poll with custom rpc for disk sync.
5808

5809
    This uses our own step-based rpc call.
5810

5811
    """
5812
    self.feedback_fn("* wait until resync is done")
5813
    all_done = False
5814
    while not all_done:
5815
      all_done = True
5816
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5817
                                            self.nodes_ip,
5818
                                            self.instance.disks)
5819
      min_percent = 100
5820
      for node, nres in result.items():
5821
        nres.Raise("Cannot resync disks on node %s" % node)
5822
        node_done, node_percent = nres.payload
5823
        all_done = all_done and node_done
5824
        if node_percent is not None:
5825
          min_percent = min(min_percent, node_percent)
5826
      if not all_done:
5827
        if min_percent < 100:
5828
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5829
        time.sleep(2)
5830

    
5831
  def _EnsureSecondary(self, node):
5832
    """Demote a node to secondary.
5833

5834
    """
5835
    self.feedback_fn("* switching node %s to secondary mode" % node)
5836

    
5837
    for dev in self.instance.disks:
5838
      self.cfg.SetDiskID(dev, node)
5839

    
5840
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5841
                                          self.instance.disks)
5842
    result.Raise("Cannot change disk to secondary on node %s" % node)
5843

    
5844
  def _GoStandalone(self):
5845
    """Disconnect from the network.
5846

5847
    """
5848
    self.feedback_fn("* changing into standalone mode")
5849
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5850
                                               self.instance.disks)
5851
    for node, nres in result.items():
5852
      nres.Raise("Cannot disconnect disks node %s" % node)
5853

    
5854
  def _GoReconnect(self, multimaster):
5855
    """Reconnect to the network.
5856

5857
    """
5858
    if multimaster:
5859
      msg = "dual-master"
5860
    else:
5861
      msg = "single-master"
5862
    self.feedback_fn("* changing disks into %s mode" % msg)
5863
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5864
                                           self.instance.disks,
5865
                                           self.instance.name, multimaster)
5866
    for node, nres in result.items():
5867
      nres.Raise("Cannot change disks config on node %s" % node)
5868

    
5869
  def _ExecCleanup(self):
5870
    """Try to cleanup after a failed migration.
5871

5872
    The cleanup is done by:
5873
      - check that the instance is running only on one node
5874
        (and update the config if needed)
5875
      - change disks on its secondary node to secondary
5876
      - wait until disks are fully synchronized
5877
      - disconnect from the network
5878
      - change disks into single-master mode
5879
      - wait again until disks are fully synchronized
5880

5881
    """
5882
    instance = self.instance
5883
    target_node = self.target_node
5884
    source_node = self.source_node
5885

    
5886
    # check running on only one node
5887
    self.feedback_fn("* checking where the instance actually runs"
5888
                     " (if this hangs, the hypervisor might be in"
5889
                     " a bad state)")
5890
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5891
    for node, result in ins_l.items():
5892
      result.Raise("Can't contact node %s" % node)
5893

    
5894
    runningon_source = instance.name in ins_l[source_node].payload
5895
    runningon_target = instance.name in ins_l[target_node].payload
5896

    
5897
    if runningon_source and runningon_target:
5898
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5899
                               " or the hypervisor is confused. You will have"
5900
                               " to ensure manually that it runs only on one"
5901
                               " and restart this operation.")
5902

    
5903
    if not (runningon_source or runningon_target):
5904
      raise errors.OpExecError("Instance does not seem to be running at all."
5905
                               " In this case, it's safer to repair by"
5906
                               " running 'gnt-instance stop' to ensure disk"
5907
                               " shutdown, and then restarting it.")
5908

    
5909
    if runningon_target:
5910
      # the migration has actually succeeded, we need to update the config
5911
      self.feedback_fn("* instance running on secondary node (%s),"
5912
                       " updating config" % target_node)
5913
      instance.primary_node = target_node
5914
      self.cfg.Update(instance, self.feedback_fn)
5915
      demoted_node = source_node
5916
    else:
5917
      self.feedback_fn("* instance confirmed to be running on its"
5918
                       " primary node (%s)" % source_node)
5919
      demoted_node = target_node
5920

    
5921
    self._EnsureSecondary(demoted_node)
5922
    try:
5923
      self._WaitUntilSync()
5924
    except errors.OpExecError:
5925
      # we ignore here errors, since if the device is standalone, it
5926
      # won't be able to sync
5927
      pass
5928
    self._GoStandalone()
5929
    self._GoReconnect(False)
5930
    self._WaitUntilSync()
5931

    
5932
    self.feedback_fn("* done")
5933

    
5934
  def _RevertDiskStatus(self):
5935
    """Try to revert the disk status after a failed migration.
5936

5937
    """
5938
    target_node = self.target_node
5939
    try:
5940
      self._EnsureSecondary(target_node)
5941
      self._GoStandalone()
5942
      self._GoReconnect(False)
5943
      self._WaitUntilSync()
5944
    except errors.OpExecError, err:
5945
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5946
                         " drives: error '%s'\n"
5947
                         "Please look and recover the instance status" %
5948
                         str(err))
5949

    
5950
  def _AbortMigration(self):
5951
    """Call the hypervisor code to abort a started migration.
5952

5953
    """
5954
    instance = self.instance
5955
    target_node = self.target_node
5956
    migration_info = self.migration_info
5957

    
5958
    abort_result = self.rpc.call_finalize_migration(target_node,
5959
                                                    instance,
5960
                                                    migration_info,
5961
                                                    False)
5962
    abort_msg = abort_result.fail_msg
5963
    if abort_msg:
5964
      logging.error("Aborting migration failed on target node %s: %s",
5965
                    target_node, abort_msg)
5966
      # Don't raise an exception here, as we stil have to try to revert the
5967
      # disk status, even if this step failed.
5968

    
5969
  def _ExecMigration(self):
5970
    """Migrate an instance.
5971

5972
    The migrate is done by:
5973
      - change the disks into dual-master mode
5974
      - wait until disks are fully synchronized again
5975
      - migrate the instance
5976
      - change disks on the new secondary node (the old primary) to secondary
5977
      - wait until disks are fully synchronized
5978
      - change disks into single-master mode
5979

5980
    """
5981
    instance = self.instance
5982
    target_node = self.target_node
5983
    source_node = self.source_node
5984

    
5985
    self.feedback_fn("* checking disk consistency between source and target")
5986
    for dev in instance.disks:
5987
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5988
        raise errors.OpExecError("Disk %s is degraded or not fully"
5989
                                 " synchronized on target node,"
5990
                                 " aborting migrate." % dev.iv_name)
5991

    
5992
    # First get the migration information from the remote node
5993
    result = self.rpc.call_migration_info(source_node, instance)
5994
    msg = result.fail_msg
5995
    if msg:
5996
      log_err = ("Failed fetching source migration information from %s: %s" %
5997
                 (source_node, msg))
5998
      logging.error(log_err)
5999
      raise errors.OpExecError(log_err)
6000

    
6001
    self.migration_info = migration_info = result.payload
6002

    
6003
    # Then switch the disks to master/master mode
6004
    self._EnsureSecondary(target_node)
6005
    self._GoStandalone()
6006
    self._GoReconnect(True)
6007
    self._WaitUntilSync()
6008

    
6009
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6010
    result = self.rpc.call_accept_instance(target_node,
6011
                                           instance,
6012
                                           migration_info,
6013
                                           self.nodes_ip[target_node])
6014

    
6015
    msg = result.fail_msg
6016
    if msg:
6017
      logging.error("Instance pre-migration failed, trying to revert"
6018
                    " disk status: %s", msg)
6019
      self.feedback_fn("Pre-migration failed, aborting")
6020
      self._AbortMigration()
6021
      self._RevertDiskStatus()
6022
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6023
                               (instance.name, msg))
6024

    
6025
    self.feedback_fn("* migrating instance to %s" % target_node)
6026
    time.sleep(10)
6027
    result = self.rpc.call_instance_migrate(source_node, instance,
6028
                                            self.nodes_ip[target_node],
6029
                                            self.live)
6030
    msg = result.fail_msg
6031
    if msg:
6032
      logging.error("Instance migration failed, trying to revert"
6033
                    " disk status: %s", msg)
6034
      self.feedback_fn("Migration failed, aborting")
6035
      self._AbortMigration()
6036
      self._RevertDiskStatus()
6037
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6038
                               (instance.name, msg))
6039
    time.sleep(10)
6040

    
6041
    instance.primary_node = target_node
6042
    # distribute new instance config to the other nodes
6043
    self.cfg.Update(instance, self.feedback_fn)
6044

    
6045
    result = self.rpc.call_finalize_migration(target_node,
6046
                                              instance,
6047
                                              migration_info,
6048
                                              True)
6049
    msg = result.fail_msg
6050
    if msg:
6051
      logging.error("Instance migration succeeded, but finalization failed:"
6052
                    " %s", msg)
6053
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6054
                               msg)
6055

    
6056
    self._EnsureSecondary(source_node)
6057
    self._WaitUntilSync()
6058
    self._GoStandalone()
6059
    self._GoReconnect(False)
6060
    self._WaitUntilSync()
6061

    
6062
    self.feedback_fn("* done")
6063

    
6064
  def Exec(self, feedback_fn):
6065
    """Perform the migration.
6066

6067
    """
6068
    feedback_fn("Migrating instance %s" % self.instance.name)
6069

    
6070
    self.feedback_fn = feedback_fn
6071

    
6072
    self.source_node = self.instance.primary_node
6073
    self.target_node = self.instance.secondary_nodes[0]
6074
    self.all_nodes = [self.source_node, self.target_node]
6075
    self.nodes_ip = {
6076
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6077
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6078
      }
6079

    
6080
    if self.cleanup:
6081
      return self._ExecCleanup()
6082
    else:
6083
      return self._ExecMigration()
6084

    
6085

    
6086
def _CreateBlockDev(lu, node, instance, device, force_create,
6087
                    info, force_open):
6088
  """Create a tree of block devices on a given node.
6089

6090
  If this device type has to be created on secondaries, create it and
6091
  all its children.
6092

6093
  If not, just recurse to children keeping the same 'force' value.
6094

6095
  @param lu: the lu on whose behalf we execute
6096
  @param node: the node on which to create the device
6097
  @type instance: L{objects.Instance}
6098
  @param instance: the instance which owns the device
6099
  @type device: L{objects.Disk}
6100
  @param device: the device to create
6101
  @type force_create: boolean
6102
  @param force_create: whether to force creation of this device; this
6103
      will be change to True whenever we find a device which has
6104
      CreateOnSecondary() attribute
6105
  @param info: the extra 'metadata' we should attach to the device
6106
      (this will be represented as a LVM tag)
6107
  @type force_open: boolean
6108
  @param force_open: this parameter will be passes to the
6109
      L{backend.BlockdevCreate} function where it specifies
6110
      whether we run on primary or not, and it affects both
6111
      the child assembly and the device own Open() execution
6112

6113
  """
6114
  if device.CreateOnSecondary():
6115
    force_create = True
6116

    
6117
  if device.children:
6118
    for child in device.children:
6119
      _CreateBlockDev(lu, node, instance, child, force_create,
6120
                      info, force_open)
6121

    
6122
  if not force_create:
6123
    return
6124

    
6125
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6126

    
6127

    
6128
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6129
  """Create a single block device on a given node.
6130

6131
  This will not recurse over children of the device, so they must be
6132
  created in advance.
6133

6134
  @param lu: the lu on whose behalf we execute
6135
  @param node: the node on which to create the device
6136
  @type instance: L{objects.Instance}
6137
  @param instance: the instance which owns the device
6138
  @type device: L{objects.Disk}
6139
  @param device: the device to create
6140
  @param info: the extra 'metadata' we should attach to the device
6141
      (this will be represented as a LVM tag)
6142
  @type force_open: boolean
6143
  @param force_open: this parameter will be passes to the
6144
      L{backend.BlockdevCreate} function where it specifies
6145
      whether we run on primary or not, and it affects both
6146
      the child assembly and the device own Open() execution
6147

6148
  """
6149
  lu.cfg.SetDiskID(device, node)
6150
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6151
                                       instance.name, force_open, info)
6152
  result.Raise("Can't create block device %s on"
6153
               " node %s for instance %s" % (device, node, instance.name))
6154
  if device.physical_id is None:
6155
    device.physical_id = result.payload
6156

    
6157

    
6158
def _GenerateUniqueNames(lu, exts):
6159
  """Generate a suitable LV name.
6160

6161
  This will generate a logical volume name for the given instance.
6162

6163
  """
6164
  results = []
6165
  for val in exts:
6166
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6167
    results.append("%s%s" % (new_id, val))
6168
  return results
6169

    
6170

    
6171
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6172
                         p_minor, s_minor):
6173
  """Generate a drbd8 device complete with its children.
6174

6175
  """
6176
  port = lu.cfg.AllocatePort()
6177
  vgname = lu.cfg.GetVGName()
6178
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6179
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6180
                          logical_id=(vgname, names[0]))
6181
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6182
                          logical_id=(vgname, names[1]))
6183
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6184
                          logical_id=(primary, secondary, port,
6185
                                      p_minor, s_minor,
6186
                                      shared_secret),
6187
                          children=[dev_data, dev_meta],
6188
                          iv_name=iv_name)
6189
  return drbd_dev
6190

    
6191

    
6192
def _GenerateDiskTemplate(lu, template_name,
6193
                          instance_name, primary_node,
6194
                          secondary_nodes, disk_info,
6195
                          file_storage_dir, file_driver,
6196
                          base_index):
6197
  """Generate the entire disk layout for a given template type.
6198

6199
  """
6200
  #TODO: compute space requirements
6201

    
6202
  vgname = lu.cfg.GetVGName()
6203
  disk_count = len(disk_info)
6204
  disks = []
6205
  if template_name == constants.DT_DISKLESS:
6206
    pass
6207
  elif template_name == constants.DT_PLAIN:
6208
    if len(secondary_nodes) != 0:
6209
      raise errors.ProgrammerError("Wrong template configuration")
6210

    
6211
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6212
                                      for i in range(disk_count)])
6213
    for idx, disk in enumerate(disk_info):
6214
      disk_index = idx + base_index
6215
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6216
                              logical_id=(vgname, names[idx]),
6217
                              iv_name="disk/%d" % disk_index,
6218
                              mode=disk["mode"])
6219
      disks.append(disk_dev)
6220
  elif template_name == constants.DT_DRBD8:
6221
    if len(secondary_nodes) != 1:
6222
      raise errors.ProgrammerError("Wrong template configuration")
6223
    remote_node = secondary_nodes[0]
6224
    minors = lu.cfg.AllocateDRBDMinor(
6225
      [primary_node, remote_node] * len(disk_info), instance_name)
6226

    
6227
    names = []
6228
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6229
                                               for i in range(disk_count)]):
6230
      names.append(lv_prefix + "_data")
6231
      names.append(lv_prefix + "_meta")
6232
    for idx, disk in enumerate(disk_info):
6233
      disk_index = idx + base_index
6234
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6235
                                      disk["size"], names[idx*2:idx*2+2],
6236
                                      "disk/%d" % disk_index,
6237
                                      minors[idx*2], minors[idx*2+1])
6238
      disk_dev.mode = disk["mode"]
6239
      disks.append(disk_dev)
6240
  elif template_name == constants.DT_FILE:
6241
    if len(secondary_nodes) != 0:
6242
      raise errors.ProgrammerError("Wrong template configuration")
6243

    
6244
    _RequireFileStorage()
6245

    
6246
    for idx, disk in enumerate(disk_info):
6247
      disk_index = idx + base_index
6248
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6249
                              iv_name="disk/%d" % disk_index,
6250
                              logical_id=(file_driver,
6251
                                          "%s/disk%d" % (file_storage_dir,
6252
                                                         disk_index)),
6253
                              mode=disk["mode"])
6254
      disks.append(disk_dev)
6255
  else:
6256
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6257
  return disks
6258

    
6259

    
6260
def _GetInstanceInfoText(instance):
6261
  """Compute that text that should be added to the disk's metadata.
6262

6263
  """
6264
  return "originstname+%s" % instance.name
6265

    
6266

    
6267
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6268
  """Create all disks for an instance.
6269

6270
  This abstracts away some work from AddInstance.
6271

6272
  @type lu: L{LogicalUnit}
6273
  @param lu: the logical unit on whose behalf we execute
6274
  @type instance: L{objects.Instance}
6275
  @param instance: the instance whose disks we should create
6276
  @type to_skip: list
6277
  @param to_skip: list of indices to skip
6278
  @type target_node: string
6279
  @param target_node: if passed, overrides the target node for creation
6280
  @rtype: boolean
6281
  @return: the success of the creation
6282

6283
  """
6284
  info = _GetInstanceInfoText(instance)
6285
  if target_node is None:
6286
    pnode = instance.primary_node
6287
    all_nodes = instance.all_nodes
6288
  else:
6289
    pnode = target_node
6290
    all_nodes = [pnode]
6291

    
6292
  if instance.disk_template == constants.DT_FILE:
6293
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6294
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6295

    
6296
    result.Raise("Failed to create directory '%s' on"
6297
                 " node %s" % (file_storage_dir, pnode))
6298

    
6299
  # Note: this needs to be kept in sync with adding of disks in
6300
  # LUSetInstanceParams
6301
  for idx, device in enumerate(instance.disks):
6302
    if to_skip and idx in to_skip:
6303
      continue
6304
    logging.info("Creating volume %s for instance %s",
6305
                 device.iv_name, instance.name)
6306
    #HARDCODE
6307
    for node in all_nodes:
6308
      f_create = node == pnode
6309
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6310

    
6311

    
6312
def _RemoveDisks(lu, instance, target_node=None):
6313
  """Remove all disks for an instance.
6314

6315
  This abstracts away some work from `AddInstance()` and
6316
  `RemoveInstance()`. Note that in case some of the devices couldn't
6317
  be removed, the removal will continue with the other ones (compare
6318
  with `_CreateDisks()`).
6319

6320
  @type lu: L{LogicalUnit}
6321
  @param lu: the logical unit on whose behalf we execute
6322
  @type instance: L{objects.Instance}
6323
  @param instance: the instance whose disks we should remove
6324
  @type target_node: string
6325
  @param target_node: used to override the node on which to remove the disks
6326
  @rtype: boolean
6327
  @return: the success of the removal
6328

6329
  """
6330
  logging.info("Removing block devices for instance %s", instance.name)
6331

    
6332
  all_result = True
6333
  for device in instance.disks:
6334
    if target_node:
6335
      edata = [(target_node, device)]
6336
    else:
6337
      edata = device.ComputeNodeTree(instance.primary_node)
6338
    for node, disk in edata:
6339
      lu.cfg.SetDiskID(disk, node)
6340
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6341
      if msg:
6342
        lu.LogWarning("Could not remove block device %s on node %s,"
6343
                      " continuing anyway: %s", device.iv_name, node, msg)
6344
        all_result = False
6345

    
6346
  if instance.disk_template == constants.DT_FILE:
6347
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6348
    if target_node:
6349
      tgt = target_node
6350
    else:
6351
      tgt = instance.primary_node
6352
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6353
    if result.fail_msg:
6354
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6355
                    file_storage_dir, instance.primary_node, result.fail_msg)
6356
      all_result = False
6357

    
6358
  return all_result
6359

    
6360

    
6361
def _ComputeDiskSize(disk_template, disks):
6362
  """Compute disk size requirements in the volume group
6363

6364
  """
6365
  # Required free disk space as a function of disk and swap space
6366
  req_size_dict = {
6367
    constants.DT_DISKLESS: None,
6368
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6369
    # 128 MB are added for drbd metadata for each disk
6370
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6371
    constants.DT_FILE: None,
6372
  }
6373

    
6374
  if disk_template not in req_size_dict:
6375
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6376
                                 " is unknown" %  disk_template)
6377

    
6378
  return req_size_dict[disk_template]
6379

    
6380

    
6381
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6382
  """Hypervisor parameter validation.
6383

6384
  This function abstract the hypervisor parameter validation to be
6385
  used in both instance create and instance modify.
6386

6387
  @type lu: L{LogicalUnit}
6388
  @param lu: the logical unit for which we check
6389
  @type nodenames: list
6390
  @param nodenames: the list of nodes on which we should check
6391
  @type hvname: string
6392
  @param hvname: the name of the hypervisor we should use
6393
  @type hvparams: dict
6394
  @param hvparams: the parameters which we need to check
6395
  @raise errors.OpPrereqError: if the parameters are not valid
6396

6397
  """
6398
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6399
                                                  hvname,
6400
                                                  hvparams)
6401
  for node in nodenames:
6402
    info = hvinfo[node]
6403
    if info.offline:
6404
      continue
6405
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6406

    
6407

    
6408
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6409
  """OS parameters validation.
6410

6411
  @type lu: L{LogicalUnit}
6412
  @param lu: the logical unit for which we check
6413
  @type required: boolean
6414
  @param required: whether the validation should fail if the OS is not
6415
      found
6416
  @type nodenames: list
6417
  @param nodenames: the list of nodes on which we should check
6418
  @type osname: string
6419
  @param osname: the name of the hypervisor we should use
6420
  @type osparams: dict
6421
  @param osparams: the parameters which we need to check
6422
  @raise errors.OpPrereqError: if the parameters are not valid
6423

6424
  """
6425
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6426
                                   [constants.OS_VALIDATE_PARAMETERS],
6427
                                   osparams)
6428
  for node, nres in result.items():
6429
    # we don't check for offline cases since this should be run only
6430
    # against the master node and/or an instance's nodes
6431
    nres.Raise("OS Parameters validation failed on node %s" % node)
6432
    if not nres.payload:
6433
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6434
                 osname, node)
6435

    
6436

    
6437
class LUCreateInstance(LogicalUnit):
6438
  """Create an instance.
6439

6440
  """
6441
  HPATH = "instance-add"
6442
  HTYPE = constants.HTYPE_INSTANCE
6443
  _OP_PARAMS = [
6444
    _PInstanceName,
6445
    ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6446
    ("start", True, _TBool),
6447
    ("wait_for_sync", True, _TBool),
6448
    ("ip_check", True, _TBool),
6449
    ("name_check", True, _TBool),
6450
    ("disks", _NoDefault, _TListOf(_TDict)),
6451
    ("nics", _NoDefault, _TListOf(_TDict)),
6452
    ("hvparams", _EmptyDict, _TDict),
6453
    ("beparams", _EmptyDict, _TDict),
6454
    ("osparams", _EmptyDict, _TDict),
6455
    ("no_install", None, _TMaybeBool),
6456
    ("os_type", None, _TMaybeString),
6457
    ("force_variant", False, _TBool),
6458
    ("source_handshake", None, _TOr(_TList, _TNone)),
6459
    ("source_x509_ca", None, _TOr(_TList, _TNone)),
6460
    ("source_instance_name", None, _TMaybeString),
6461
    ("src_node", None, _TMaybeString),
6462
    ("src_path", None, _TMaybeString),
6463
    ("pnode", None, _TMaybeString),
6464
    ("snode", None, _TMaybeString),
6465
    ("iallocator", None, _TMaybeString),
6466
    ("hypervisor", None, _TMaybeString),
6467
    ("disk_template", _NoDefault, _CheckDiskTemplate),
6468
    ("identify_defaults", False, _TBool),
6469
    ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6470
    ("file_storage_dir", None, _TMaybeString),
6471
    ("dry_run", False, _TBool),
6472
    ]
6473
  REQ_BGL = False
6474

    
6475
  def CheckArguments(self):
6476
    """Check arguments.
6477

6478
    """
6479
    # do not require name_check to ease forward/backward compatibility
6480
    # for tools
6481
    if self.op.no_install and self.op.start:
6482
      self.LogInfo("No-installation mode selected, disabling startup")
6483
      self.op.start = False
6484
    # validate/normalize the instance name
6485
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6486
    if self.op.ip_check and not self.op.name_check:
6487
      # TODO: make the ip check more flexible and not depend on the name check
6488
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6489
                                 errors.ECODE_INVAL)
6490

    
6491
    # check nics' parameter names
6492
    for nic in self.op.nics:
6493
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6494

    
6495
    # check disks. parameter names and consistent adopt/no-adopt strategy
6496
    has_adopt = has_no_adopt = False
6497
    for disk in self.op.disks:
6498
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6499
      if "adopt" in disk:
6500
        has_adopt = True
6501
      else:
6502
        has_no_adopt = True
6503
    if has_adopt and has_no_adopt:
6504
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6505
                                 errors.ECODE_INVAL)
6506
    if has_adopt:
6507
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6508
        raise errors.OpPrereqError("Disk adoption is not supported for the"
6509
                                   " '%s' disk template" %
6510
                                   self.op.disk_template,
6511
                                   errors.ECODE_INVAL)
6512
      if self.op.iallocator is not None:
6513
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6514
                                   " iallocator script", errors.ECODE_INVAL)
6515
      if self.op.mode == constants.INSTANCE_IMPORT:
6516
        raise errors.OpPrereqError("Disk adoption not allowed for"
6517
                                   " instance import", errors.ECODE_INVAL)
6518

    
6519
    self.adopt_disks = has_adopt
6520

    
6521
    # instance name verification
6522
    if self.op.name_check:
6523
      self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6524
      self.op.instance_name = self.hostname1.name
6525
      # used in CheckPrereq for ip ping check
6526
      self.check_ip = self.hostname1.ip
6527
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6528
      raise errors.OpPrereqError("Remote imports require names to be checked" %
6529
                                 errors.ECODE_INVAL)
6530
    else:
6531
      self.check_ip = None
6532

    
6533
    # file storage checks
6534
    if (self.op.file_driver and
6535
        not self.op.file_driver in constants.FILE_DRIVER):
6536
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6537
                                 self.op.file_driver, errors.ECODE_INVAL)
6538

    
6539
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6540
      raise errors.OpPrereqError("File storage directory path not absolute",
6541
                                 errors.ECODE_INVAL)
6542

    
6543
    ### Node/iallocator related checks
6544
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6545
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6546
                                 " node must be given",
6547
                                 errors.ECODE_INVAL)
6548

    
6549
    self._cds = _GetClusterDomainSecret()
6550

    
6551
    if self.op.mode == constants.INSTANCE_IMPORT:
6552
      # On import force_variant must be True, because if we forced it at
6553
      # initial install, our only chance when importing it back is that it
6554
      # works again!
6555
      self.op.force_variant = True
6556

    
6557
      if self.op.no_install:
6558
        self.LogInfo("No-installation mode has no effect during import")
6559

    
6560
    elif self.op.mode == constants.INSTANCE_CREATE:
6561
      if self.op.os_type is None:
6562
        raise errors.OpPrereqError("No guest OS specified",
6563
                                   errors.ECODE_INVAL)
6564
      if self.op.disk_template is None:
6565
        raise errors.OpPrereqError("No disk template specified",
6566
                                   errors.ECODE_INVAL)
6567

    
6568
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6569
      # Check handshake to ensure both clusters have the same domain secret
6570
      src_handshake = self.op.source_handshake
6571
      if not src_handshake:
6572
        raise errors.OpPrereqError("Missing source handshake",
6573
                                   errors.ECODE_INVAL)
6574

    
6575
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6576
                                                           src_handshake)
6577
      if errmsg:
6578
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6579
                                   errors.ECODE_INVAL)
6580

    
6581
      # Load and check source CA
6582
      self.source_x509_ca_pem = self.op.source_x509_ca
6583
      if not self.source_x509_ca_pem:
6584
        raise errors.OpPrereqError("Missing source X509 CA",
6585
                                   errors.ECODE_INVAL)
6586

    
6587
      try:
6588
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6589
                                                    self._cds)
6590
      except OpenSSL.crypto.Error, err:
6591
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6592
                                   (err, ), errors.ECODE_INVAL)
6593

    
6594
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6595
      if errcode is not None:
6596
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6597
                                   errors.ECODE_INVAL)
6598

    
6599
      self.source_x509_ca = cert
6600

    
6601
      src_instance_name = self.op.source_instance_name
6602
      if not src_instance_name:
6603
        raise errors.OpPrereqError("Missing source instance name",
6604
                                   errors.ECODE_INVAL)
6605

    
6606
      self.source_instance_name = \
6607
        utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6608

    
6609
    else:
6610
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6611
                                 self.op.mode, errors.ECODE_INVAL)
6612

    
6613
  def ExpandNames(self):
6614
    """ExpandNames for CreateInstance.
6615

6616
    Figure out the right locks for instance creation.
6617

6618
    """
6619
    self.needed_locks = {}
6620

    
6621
    instance_name = self.op.instance_name
6622
    # this is just a preventive check, but someone might still add this
6623
    # instance in the meantime, and creation will fail at lock-add time
6624
    if instance_name in self.cfg.GetInstanceList():
6625
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6626
                                 instance_name, errors.ECODE_EXISTS)
6627

    
6628
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6629

    
6630
    if self.op.iallocator:
6631
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6632
    else:
6633
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6634
      nodelist = [self.op.pnode]
6635
      if self.op.snode is not None:
6636
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6637
        nodelist.append(self.op.snode)
6638
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6639

    
6640
    # in case of import lock the source node too
6641
    if self.op.mode == constants.INSTANCE_IMPORT:
6642
      src_node = self.op.src_node
6643
      src_path = self.op.src_path
6644

    
6645
      if src_path is None:
6646
        self.op.src_path = src_path = self.op.instance_name
6647

    
6648
      if src_node is None:
6649
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6650
        self.op.src_node = None
6651
        if os.path.isabs(src_path):
6652
          raise errors.OpPrereqError("Importing an instance from an absolute"
6653
                                     " path requires a source node option.",
6654
                                     errors.ECODE_INVAL)
6655
      else:
6656
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6657
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6658
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6659
        if not os.path.isabs(src_path):
6660
          self.op.src_path = src_path = \
6661
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6662

    
6663
  def _RunAllocator(self):
6664
    """Run the allocator based on input opcode.
6665

6666
    """
6667
    nics = [n.ToDict() for n in self.nics]
6668
    ial = IAllocator(self.cfg, self.rpc,
6669
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6670
                     name=self.op.instance_name,
6671
                     disk_template=self.op.disk_template,
6672
                     tags=[],
6673
                     os=self.op.os_type,
6674
                     vcpus=self.be_full[constants.BE_VCPUS],
6675
                     mem_size=self.be_full[constants.BE_MEMORY],
6676
                     disks=self.disks,
6677
                     nics=nics,
6678
                     hypervisor=self.op.hypervisor,
6679
                     )
6680

    
6681
    ial.Run(self.op.iallocator)
6682

    
6683
    if not ial.success:
6684
      raise errors.OpPrereqError("Can't compute nodes using"
6685
                                 " iallocator '%s': %s" %
6686
                                 (self.op.iallocator, ial.info),
6687
                                 errors.ECODE_NORES)
6688
    if len(ial.result) != ial.required_nodes:
6689
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6690
                                 " of nodes (%s), required %s" %
6691
                                 (self.op.iallocator, len(ial.result),
6692
                                  ial.required_nodes), errors.ECODE_FAULT)
6693
    self.op.pnode = ial.result[0]
6694
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6695
                 self.op.instance_name, self.op.iallocator,
6696
                 utils.CommaJoin(ial.result))
6697
    if ial.required_nodes == 2:
6698
      self.op.snode = ial.result[1]
6699

    
6700
  def BuildHooksEnv(self):
6701
    """Build hooks env.
6702

6703
    This runs on master, primary and secondary nodes of the instance.
6704

6705
    """
6706
    env = {
6707
      "ADD_MODE": self.op.mode,
6708
      }
6709
    if self.op.mode == constants.INSTANCE_IMPORT:
6710
      env["SRC_NODE"] = self.op.src_node
6711
      env["SRC_PATH"] = self.op.src_path
6712
      env["SRC_IMAGES"] = self.src_images
6713

    
6714
    env.update(_BuildInstanceHookEnv(
6715
      name=self.op.instance_name,
6716
      primary_node=self.op.pnode,
6717
      secondary_nodes=self.secondaries,
6718
      status=self.op.start,
6719
      os_type=self.op.os_type,
6720
      memory=self.be_full[constants.BE_MEMORY],
6721
      vcpus=self.be_full[constants.BE_VCPUS],
6722
      nics=_NICListToTuple(self, self.nics),
6723
      disk_template=self.op.disk_template,
6724
      disks=[(d["size"], d["mode"]) for d in self.disks],
6725
      bep=self.be_full,
6726
      hvp=self.hv_full,
6727
      hypervisor_name=self.op.hypervisor,
6728
    ))
6729

    
6730
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6731
          self.secondaries)
6732
    return env, nl, nl
6733

    
6734
  def _ReadExportInfo(self):
6735
    """Reads the export information from disk.
6736

6737
    It will override the opcode source node and path with the actual
6738
    information, if these two were not specified before.
6739

6740
    @return: the export information
6741

6742
    """
6743
    assert self.op.mode == constants.INSTANCE_IMPORT
6744

    
6745
    src_node = self.op.src_node
6746
    src_path = self.op.src_path
6747

    
6748
    if src_node is None:
6749
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6750
      exp_list = self.rpc.call_export_list(locked_nodes)
6751
      found = False
6752
      for node in exp_list:
6753
        if exp_list[node].fail_msg:
6754
          continue
6755
        if src_path in exp_list[node].payload:
6756
          found = True
6757
          self.op.src_node = src_node = node
6758
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6759
                                                       src_path)
6760
          break
6761
      if not found:
6762
        raise errors.OpPrereqError("No export found for relative path %s" %
6763
                                    src_path, errors.ECODE_INVAL)
6764

    
6765
    _CheckNodeOnline(self, src_node)
6766
    result = self.rpc.call_export_info(src_node, src_path)
6767
    result.Raise("No export or invalid export found in dir %s" % src_path)
6768

    
6769
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6770
    if not export_info.has_section(constants.INISECT_EXP):
6771
      raise errors.ProgrammerError("Corrupted export config",
6772
                                   errors.ECODE_ENVIRON)
6773

    
6774
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6775
    if (int(ei_version) != constants.EXPORT_VERSION):
6776
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6777
                                 (ei_version, constants.EXPORT_VERSION),
6778
                                 errors.ECODE_ENVIRON)
6779
    return export_info
6780

    
6781
  def _ReadExportParams(self, einfo):
6782
    """Use export parameters as defaults.
6783

6784
    In case the opcode doesn't specify (as in override) some instance
6785
    parameters, then try to use them from the export information, if
6786
    that declares them.
6787

6788
    """
6789
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6790

    
6791
    if self.op.disk_template is None:
6792
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6793
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6794
                                          "disk_template")
6795
      else:
6796
        raise errors.OpPrereqError("No disk template specified and the export"
6797
                                   " is missing the disk_template information",
6798
                                   errors.ECODE_INVAL)
6799

    
6800
    if not self.op.disks:
6801
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6802
        disks = []
6803
        # TODO: import the disk iv_name too
6804
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6805
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6806
          disks.append({"size": disk_sz})
6807
        self.op.disks = disks
6808
      else:
6809
        raise errors.OpPrereqError("No disk info specified and the export"
6810
                                   " is missing the disk information",
6811
                                   errors.ECODE_INVAL)
6812

    
6813
    if (not self.op.nics and
6814
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6815
      nics = []
6816
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6817
        ndict = {}
6818
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6819
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6820
          ndict[name] = v
6821
        nics.append(ndict)
6822
      self.op.nics = nics
6823

    
6824
    if (self.op.hypervisor is None and
6825
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6826
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6827
    if einfo.has_section(constants.INISECT_HYP):
6828
      # use the export parameters but do not override the ones
6829
      # specified by the user
6830
      for name, value in einfo.items(constants.INISECT_HYP):
6831
        if name not in self.op.hvparams:
6832
          self.op.hvparams[name] = value
6833

    
6834
    if einfo.has_section(constants.INISECT_BEP):
6835
      # use the parameters, without overriding
6836
      for name, value in einfo.items(constants.INISECT_BEP):
6837
        if name not in self.op.beparams:
6838
          self.op.beparams[name] = value
6839
    else:
6840
      # try to read the parameters old style, from the main section
6841
      for name in constants.BES_PARAMETERS:
6842
        if (name not in self.op.beparams and
6843
            einfo.has_option(constants.INISECT_INS, name)):
6844
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6845

    
6846
    if einfo.has_section(constants.INISECT_OSP):
6847
      # use the parameters, without overriding
6848
      for name, value in einfo.items(constants.INISECT_OSP):
6849
        if name not in self.op.osparams:
6850
          self.op.osparams[name] = value
6851

    
6852
  def _RevertToDefaults(self, cluster):
6853
    """Revert the instance parameters to the default values.
6854

6855
    """
6856
    # hvparams
6857
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6858
    for name in self.op.hvparams.keys():
6859
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6860
        del self.op.hvparams[name]
6861
    # beparams
6862
    be_defs = cluster.SimpleFillBE({})
6863
    for name in self.op.beparams.keys():
6864
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6865
        del self.op.beparams[name]
6866
    # nic params
6867
    nic_defs = cluster.SimpleFillNIC({})
6868
    for nic in self.op.nics:
6869
      for name in constants.NICS_PARAMETERS:
6870
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6871
          del nic[name]
6872
    # osparams
6873
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6874
    for name in self.op.osparams.keys():
6875
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
6876
        del self.op.osparams[name]
6877

    
6878
  def CheckPrereq(self):
6879
    """Check prerequisites.
6880

6881
    """
6882
    if self.op.mode == constants.INSTANCE_IMPORT:
6883
      export_info = self._ReadExportInfo()
6884
      self._ReadExportParams(export_info)
6885

    
6886
    _CheckDiskTemplate(self.op.disk_template)
6887

    
6888
    if (not self.cfg.GetVGName() and
6889
        self.op.disk_template not in constants.DTS_NOT_LVM):
6890
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6891
                                 " instances", errors.ECODE_STATE)
6892

    
6893
    if self.op.hypervisor is None:
6894
      self.op.hypervisor = self.cfg.GetHypervisorType()
6895

    
6896
    cluster = self.cfg.GetClusterInfo()
6897
    enabled_hvs = cluster.enabled_hypervisors
6898
    if self.op.hypervisor not in enabled_hvs:
6899
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6900
                                 " cluster (%s)" % (self.op.hypervisor,
6901
                                  ",".join(enabled_hvs)),
6902
                                 errors.ECODE_STATE)
6903

    
6904
    # check hypervisor parameter syntax (locally)
6905
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6906
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6907
                                      self.op.hvparams)
6908
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6909
    hv_type.CheckParameterSyntax(filled_hvp)
6910
    self.hv_full = filled_hvp
6911
    # check that we don't specify global parameters on an instance
6912
    _CheckGlobalHvParams(self.op.hvparams)
6913

    
6914
    # fill and remember the beparams dict
6915
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6916
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
6917

    
6918
    # build os parameters
6919
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6920

    
6921
    # now that hvp/bep are in final format, let's reset to defaults,
6922
    # if told to do so
6923
    if self.op.identify_defaults:
6924
      self._RevertToDefaults(cluster)
6925

    
6926
    # NIC buildup
6927
    self.nics = []
6928
    for idx, nic in enumerate(self.op.nics):
6929
      nic_mode_req = nic.get("mode", None)
6930
      nic_mode = nic_mode_req
6931
      if nic_mode is None:
6932
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6933

    
6934
      # in routed mode, for the first nic, the default ip is 'auto'
6935
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6936
        default_ip_mode = constants.VALUE_AUTO
6937
      else:
6938
        default_ip_mode = constants.VALUE_NONE
6939

    
6940
      # ip validity checks
6941
      ip = nic.get("ip", default_ip_mode)
6942
      if ip is None or ip.lower() == constants.VALUE_NONE:
6943
        nic_ip = None
6944
      elif ip.lower() == constants.VALUE_AUTO:
6945
        if not self.op.name_check:
6946
          raise errors.OpPrereqError("IP address set to auto but name checks"
6947
                                     " have been skipped. Aborting.",
6948
                                     errors.ECODE_INVAL)
6949
        nic_ip = self.hostname1.ip
6950
      else:
6951
        if not utils.IsValidIP4(ip):
6952
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6953
                                     " like a valid IP" % ip,
6954
                                     errors.ECODE_INVAL)
6955
        nic_ip = ip
6956

    
6957
      # TODO: check the ip address for uniqueness
6958
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6959
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6960
                                   errors.ECODE_INVAL)
6961

    
6962
      # MAC address verification
6963
      mac = nic.get("mac", constants.VALUE_AUTO)
6964
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6965
        mac = utils.NormalizeAndValidateMac(mac)
6966

    
6967
        try:
6968
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6969
        except errors.ReservationError:
6970
          raise errors.OpPrereqError("MAC address %s already in use"
6971
                                     " in cluster" % mac,
6972
                                     errors.ECODE_NOTUNIQUE)
6973

    
6974
      # bridge verification
6975
      bridge = nic.get("bridge", None)
6976
      link = nic.get("link", None)
6977
      if bridge and link:
6978
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6979
                                   " at the same time", errors.ECODE_INVAL)
6980
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6981
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6982
                                   errors.ECODE_INVAL)
6983
      elif bridge:
6984
        link = bridge
6985

    
6986
      nicparams = {}
6987
      if nic_mode_req:
6988
        nicparams[constants.NIC_MODE] = nic_mode_req
6989
      if link:
6990
        nicparams[constants.NIC_LINK] = link
6991

    
6992
      check_params = cluster.SimpleFillNIC(nicparams)
6993
      objects.NIC.CheckParameterSyntax(check_params)
6994
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6995

    
6996
    # disk checks/pre-build
6997
    self.disks = []
6998
    for disk in self.op.disks:
6999
      mode = disk.get("mode", constants.DISK_RDWR)
7000
      if mode not in constants.DISK_ACCESS_SET:
7001
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7002
                                   mode, errors.ECODE_INVAL)
7003
      size = disk.get("size", None)
7004
      if size is None:
7005
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7006
      try:
7007
        size = int(size)
7008
      except (TypeError, ValueError):
7009
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7010
                                   errors.ECODE_INVAL)
7011
      new_disk = {"size": size, "mode": mode}
7012
      if "adopt" in disk:
7013
        new_disk["adopt"] = disk["adopt"]
7014
      self.disks.append(new_disk)
7015

    
7016
    if self.op.mode == constants.INSTANCE_IMPORT:
7017

    
7018
      # Check that the new instance doesn't have less disks than the export
7019
      instance_disks = len(self.disks)
7020
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7021
      if instance_disks < export_disks:
7022
        raise errors.OpPrereqError("Not enough disks to import."
7023
                                   " (instance: %d, export: %d)" %
7024
                                   (instance_disks, export_disks),
7025
                                   errors.ECODE_INVAL)
7026

    
7027
      disk_images = []
7028
      for idx in range(export_disks):
7029
        option = 'disk%d_dump' % idx
7030
        if export_info.has_option(constants.INISECT_INS, option):
7031
          # FIXME: are the old os-es, disk sizes, etc. useful?
7032
          export_name = export_info.get(constants.INISECT_INS, option)
7033
          image = utils.PathJoin(self.op.src_path, export_name)
7034
          disk_images.append(image)
7035
        else:
7036
          disk_images.append(False)
7037

    
7038
      self.src_images = disk_images
7039

    
7040
      old_name = export_info.get(constants.INISECT_INS, 'name')
7041
      try:
7042
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7043
      except (TypeError, ValueError), err:
7044
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7045
                                   " an integer: %s" % str(err),
7046
                                   errors.ECODE_STATE)
7047
      if self.op.instance_name == old_name:
7048
        for idx, nic in enumerate(self.nics):
7049
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7050
            nic_mac_ini = 'nic%d_mac' % idx
7051
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7052

    
7053
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7054

    
7055
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7056
    if self.op.ip_check:
7057
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7058
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7059
                                   (self.check_ip, self.op.instance_name),
7060
                                   errors.ECODE_NOTUNIQUE)
7061

    
7062
    #### mac address generation
7063
    # By generating here the mac address both the allocator and the hooks get
7064
    # the real final mac address rather than the 'auto' or 'generate' value.
7065
    # There is a race condition between the generation and the instance object
7066
    # creation, which means that we know the mac is valid now, but we're not
7067
    # sure it will be when we actually add the instance. If things go bad
7068
    # adding the instance will abort because of a duplicate mac, and the
7069
    # creation job will fail.
7070
    for nic in self.nics:
7071
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7072
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7073

    
7074
    #### allocator run
7075

    
7076
    if self.op.iallocator is not None:
7077
      self._RunAllocator()
7078

    
7079
    #### node related checks
7080

    
7081
    # check primary node
7082
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7083
    assert self.pnode is not None, \
7084
      "Cannot retrieve locked node %s" % self.op.pnode
7085
    if pnode.offline:
7086
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7087
                                 pnode.name, errors.ECODE_STATE)
7088
    if pnode.drained:
7089
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7090
                                 pnode.name, errors.ECODE_STATE)
7091

    
7092
    self.secondaries = []
7093

    
7094
    # mirror node verification
7095
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7096
      if self.op.snode is None:
7097
        raise errors.OpPrereqError("The networked disk templates need"
7098
                                   " a mirror node", errors.ECODE_INVAL)
7099
      if self.op.snode == pnode.name:
7100
        raise errors.OpPrereqError("The secondary node cannot be the"
7101
                                   " primary node.", errors.ECODE_INVAL)
7102
      _CheckNodeOnline(self, self.op.snode)
7103
      _CheckNodeNotDrained(self, self.op.snode)
7104
      self.secondaries.append(self.op.snode)
7105

    
7106
    nodenames = [pnode.name] + self.secondaries
7107

    
7108
    req_size = _ComputeDiskSize(self.op.disk_template,
7109
                                self.disks)
7110

    
7111
    # Check lv size requirements, if not adopting
7112
    if req_size is not None and not self.adopt_disks:
7113
      _CheckNodesFreeDisk(self, nodenames, req_size)
7114

    
7115
    if self.adopt_disks: # instead, we must check the adoption data
7116
      all_lvs = set([i["adopt"] for i in self.disks])
7117
      if len(all_lvs) != len(self.disks):
7118
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7119
                                   errors.ECODE_INVAL)
7120
      for lv_name in all_lvs:
7121
        try:
7122
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7123
        except errors.ReservationError:
7124
          raise errors.OpPrereqError("LV named %s used by another instance" %
7125
                                     lv_name, errors.ECODE_NOTUNIQUE)
7126

    
7127
      node_lvs = self.rpc.call_lv_list([pnode.name],
7128
                                       self.cfg.GetVGName())[pnode.name]
7129
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7130
      node_lvs = node_lvs.payload
7131
      delta = all_lvs.difference(node_lvs.keys())
7132
      if delta:
7133
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7134
                                   utils.CommaJoin(delta),
7135
                                   errors.ECODE_INVAL)
7136
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7137
      if online_lvs:
7138
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7139
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7140
                                   errors.ECODE_STATE)
7141
      # update the size of disk based on what is found
7142
      for dsk in self.disks:
7143
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7144

    
7145
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7146

    
7147
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7148
    # check OS parameters (remotely)
7149
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7150

    
7151
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7152

    
7153
    # memory check on primary node
7154
    if self.op.start:
7155
      _CheckNodeFreeMemory(self, self.pnode.name,
7156
                           "creating instance %s" % self.op.instance_name,
7157
                           self.be_full[constants.BE_MEMORY],
7158
                           self.op.hypervisor)
7159

    
7160
    self.dry_run_result = list(nodenames)
7161

    
7162
  def Exec(self, feedback_fn):
7163
    """Create and add the instance to the cluster.
7164

7165
    """
7166
    instance = self.op.instance_name
7167
    pnode_name = self.pnode.name
7168

    
7169
    ht_kind = self.op.hypervisor
7170
    if ht_kind in constants.HTS_REQ_PORT:
7171
      network_port = self.cfg.AllocatePort()
7172
    else:
7173
      network_port = None
7174

    
7175
    if constants.ENABLE_FILE_STORAGE:
7176
      # this is needed because os.path.join does not accept None arguments
7177
      if self.op.file_storage_dir is None:
7178
        string_file_storage_dir = ""
7179
      else:
7180
        string_file_storage_dir = self.op.file_storage_dir
7181

    
7182
      # build the full file storage dir path
7183
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7184
                                        string_file_storage_dir, instance)
7185
    else:
7186
      file_storage_dir = ""
7187

    
7188
    disks = _GenerateDiskTemplate(self,
7189
                                  self.op.disk_template,
7190
                                  instance, pnode_name,
7191
                                  self.secondaries,
7192
                                  self.disks,
7193
                                  file_storage_dir,
7194
                                  self.op.file_driver,
7195
                                  0)
7196

    
7197
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7198
                            primary_node=pnode_name,
7199
                            nics=self.nics, disks=disks,
7200
                            disk_template=self.op.disk_template,
7201
                            admin_up=False,
7202
                            network_port=network_port,
7203
                            beparams=self.op.beparams,
7204
                            hvparams=self.op.hvparams,
7205
                            hypervisor=self.op.hypervisor,
7206
                            osparams=self.op.osparams,
7207
                            )
7208

    
7209
    if self.adopt_disks:
7210
      # rename LVs to the newly-generated names; we need to construct
7211
      # 'fake' LV disks with the old data, plus the new unique_id
7212
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7213
      rename_to = []
7214
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7215
        rename_to.append(t_dsk.logical_id)
7216
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7217
        self.cfg.SetDiskID(t_dsk, pnode_name)
7218
      result = self.rpc.call_blockdev_rename(pnode_name,
7219
                                             zip(tmp_disks, rename_to))
7220
      result.Raise("Failed to rename adoped LVs")
7221
    else:
7222
      feedback_fn("* creating instance disks...")
7223
      try:
7224
        _CreateDisks(self, iobj)
7225
      except errors.OpExecError:
7226
        self.LogWarning("Device creation failed, reverting...")
7227
        try:
7228
          _RemoveDisks(self, iobj)
7229
        finally:
7230
          self.cfg.ReleaseDRBDMinors(instance)
7231
          raise
7232

    
7233
    feedback_fn("adding instance %s to cluster config" % instance)
7234

    
7235
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7236

    
7237
    # Declare that we don't want to remove the instance lock anymore, as we've
7238
    # added the instance to the config
7239
    del self.remove_locks[locking.LEVEL_INSTANCE]
7240
    # Unlock all the nodes
7241
    if self.op.mode == constants.INSTANCE_IMPORT:
7242
      nodes_keep = [self.op.src_node]
7243
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7244
                       if node != self.op.src_node]
7245
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7246
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7247
    else:
7248
      self.context.glm.release(locking.LEVEL_NODE)
7249
      del self.acquired_locks[locking.LEVEL_NODE]
7250

    
7251
    if self.op.wait_for_sync:
7252
      disk_abort = not _WaitForSync(self, iobj)
7253
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7254
      # make sure the disks are not degraded (still sync-ing is ok)
7255
      time.sleep(15)
7256
      feedback_fn("* checking mirrors status")
7257
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7258
    else:
7259
      disk_abort = False
7260

    
7261
    if disk_abort:
7262
      _RemoveDisks(self, iobj)
7263
      self.cfg.RemoveInstance(iobj.name)
7264
      # Make sure the instance lock gets removed
7265
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7266
      raise errors.OpExecError("There are some degraded disks for"
7267
                               " this instance")
7268

    
7269
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7270
      if self.op.mode == constants.INSTANCE_CREATE:
7271
        if not self.op.no_install:
7272
          feedback_fn("* running the instance OS create scripts...")
7273
          # FIXME: pass debug option from opcode to backend
7274
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7275
                                                 self.op.debug_level)
7276
          result.Raise("Could not add os for instance %s"
7277
                       " on node %s" % (instance, pnode_name))
7278

    
7279
      elif self.op.mode == constants.INSTANCE_IMPORT:
7280
        feedback_fn("* running the instance OS import scripts...")
7281

    
7282
        transfers = []
7283

    
7284
        for idx, image in enumerate(self.src_images):
7285
          if not image:
7286
            continue
7287

    
7288
          # FIXME: pass debug option from opcode to backend
7289
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7290
                                             constants.IEIO_FILE, (image, ),
7291
                                             constants.IEIO_SCRIPT,
7292
                                             (iobj.disks[idx], idx),
7293
                                             None)
7294
          transfers.append(dt)
7295

    
7296
        import_result = \
7297
          masterd.instance.TransferInstanceData(self, feedback_fn,
7298
                                                self.op.src_node, pnode_name,
7299
                                                self.pnode.secondary_ip,
7300
                                                iobj, transfers)
7301
        if not compat.all(import_result):
7302
          self.LogWarning("Some disks for instance %s on node %s were not"
7303
                          " imported successfully" % (instance, pnode_name))
7304

    
7305
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7306
        feedback_fn("* preparing remote import...")
7307
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
7308
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7309

    
7310
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7311
                                                     self.source_x509_ca,
7312
                                                     self._cds, timeouts)
7313
        if not compat.all(disk_results):
7314
          # TODO: Should the instance still be started, even if some disks
7315
          # failed to import (valid for local imports, too)?
7316
          self.LogWarning("Some disks for instance %s on node %s were not"
7317
                          " imported successfully" % (instance, pnode_name))
7318

    
7319
        # Run rename script on newly imported instance
7320
        assert iobj.name == instance
7321
        feedback_fn("Running rename script for %s" % instance)
7322
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7323
                                                   self.source_instance_name,
7324
                                                   self.op.debug_level)
7325
        if result.fail_msg:
7326
          self.LogWarning("Failed to run rename script for %s on node"
7327
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7328

    
7329
      else:
7330
        # also checked in the prereq part
7331
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7332
                                     % self.op.mode)
7333

    
7334
    if self.op.start:
7335
      iobj.admin_up = True
7336
      self.cfg.Update(iobj, feedback_fn)
7337
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7338
      feedback_fn("* starting instance...")
7339
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7340
      result.Raise("Could not start instance")
7341

    
7342
    return list(iobj.all_nodes)
7343

    
7344

    
7345
class LUConnectConsole(NoHooksLU):
7346
  """Connect to an instance's console.
7347

7348
  This is somewhat special in that it returns the command line that
7349
  you need to run on the master node in order to connect to the
7350
  console.
7351

7352
  """
7353
  _OP_PARAMS = [
7354
    _PInstanceName
7355
    ]
7356
  REQ_BGL = False
7357

    
7358
  def ExpandNames(self):
7359
    self._ExpandAndLockInstance()
7360

    
7361
  def CheckPrereq(self):
7362
    """Check prerequisites.
7363

7364
    This checks that the instance is in the cluster.
7365

7366
    """
7367
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7368
    assert self.instance is not None, \
7369
      "Cannot retrieve locked instance %s" % self.op.instance_name
7370
    _CheckNodeOnline(self, self.instance.primary_node)
7371

    
7372
  def Exec(self, feedback_fn):
7373
    """Connect to the console of an instance
7374

7375
    """
7376
    instance = self.instance
7377
    node = instance.primary_node
7378

    
7379
    node_insts = self.rpc.call_instance_list([node],
7380
                                             [instance.hypervisor])[node]
7381
    node_insts.Raise("Can't get node information from %s" % node)
7382

    
7383
    if instance.name not in node_insts.payload:
7384
      raise errors.OpExecError("Instance %s is not running." % instance.name)
7385

    
7386
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7387

    
7388
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7389
    cluster = self.cfg.GetClusterInfo()
7390
    # beparams and hvparams are passed separately, to avoid editing the
7391
    # instance and then saving the defaults in the instance itself.
7392
    hvparams = cluster.FillHV(instance)
7393
    beparams = cluster.FillBE(instance)
7394
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7395

    
7396
    # build ssh cmdline
7397
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7398

    
7399

    
7400
class LUReplaceDisks(LogicalUnit):
7401
  """Replace the disks of an instance.
7402

7403
  """
7404
  HPATH = "mirrors-replace"
7405
  HTYPE = constants.HTYPE_INSTANCE
7406
  _OP_PARAMS = [
7407
    _PInstanceName,
7408
    ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7409
    ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7410
    ("remote_node", None, _TMaybeString),
7411
    ("iallocator", None, _TMaybeString),
7412
    ("early_release", False, _TBool),
7413
    ]
7414
  REQ_BGL = False
7415

    
7416
  def CheckArguments(self):
7417
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7418
                                  self.op.iallocator)
7419

    
7420
  def ExpandNames(self):
7421
    self._ExpandAndLockInstance()
7422

    
7423
    if self.op.iallocator is not None:
7424
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7425

    
7426
    elif self.op.remote_node is not None:
7427
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7428
      self.op.remote_node = remote_node
7429

    
7430
      # Warning: do not remove the locking of the new secondary here
7431
      # unless DRBD8.AddChildren is changed to work in parallel;
7432
      # currently it doesn't since parallel invocations of
7433
      # FindUnusedMinor will conflict
7434
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7435
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7436

    
7437
    else:
7438
      self.needed_locks[locking.LEVEL_NODE] = []
7439
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7440

    
7441
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7442
                                   self.op.iallocator, self.op.remote_node,
7443
                                   self.op.disks, False, self.op.early_release)
7444

    
7445
    self.tasklets = [self.replacer]
7446

    
7447
  def DeclareLocks(self, level):
7448
    # If we're not already locking all nodes in the set we have to declare the
7449
    # instance's primary/secondary nodes.
7450
    if (level == locking.LEVEL_NODE and
7451
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7452
      self._LockInstancesNodes()
7453

    
7454
  def BuildHooksEnv(self):
7455
    """Build hooks env.
7456

7457
    This runs on the master, the primary and all the secondaries.
7458

7459
    """
7460
    instance = self.replacer.instance
7461
    env = {
7462
      "MODE": self.op.mode,
7463
      "NEW_SECONDARY": self.op.remote_node,
7464
      "OLD_SECONDARY": instance.secondary_nodes[0],
7465
      }
7466
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7467
    nl = [
7468
      self.cfg.GetMasterNode(),
7469
      instance.primary_node,
7470
      ]
7471
    if self.op.remote_node is not None:
7472
      nl.append(self.op.remote_node)
7473
    return env, nl, nl
7474

    
7475

    
7476
class TLReplaceDisks(Tasklet):
7477
  """Replaces disks for an instance.
7478

7479
  Note: Locking is not within the scope of this class.
7480

7481
  """
7482
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7483
               disks, delay_iallocator, early_release):
7484
    """Initializes this class.
7485

7486
    """
7487
    Tasklet.__init__(self, lu)
7488

    
7489
    # Parameters
7490
    self.instance_name = instance_name
7491
    self.mode = mode
7492
    self.iallocator_name = iallocator_name
7493
    self.remote_node = remote_node
7494
    self.disks = disks
7495
    self.delay_iallocator = delay_iallocator
7496
    self.early_release = early_release
7497

    
7498
    # Runtime data
7499
    self.instance = None
7500
    self.new_node = None
7501
    self.target_node = None
7502
    self.other_node = None
7503
    self.remote_node_info = None
7504
    self.node_secondary_ip = None
7505

    
7506
  @staticmethod
7507
  def CheckArguments(mode, remote_node, iallocator):
7508
    """Helper function for users of this class.
7509

7510
    """
7511
    # check for valid parameter combination
7512
    if mode == constants.REPLACE_DISK_CHG:
7513
      if remote_node is None and iallocator is None:
7514
        raise errors.OpPrereqError("When changing the secondary either an"
7515
                                   " iallocator script must be used or the"
7516
                                   " new node given", errors.ECODE_INVAL)
7517

    
7518
      if remote_node is not None and iallocator is not None:
7519
        raise errors.OpPrereqError("Give either the iallocator or the new"
7520
                                   " secondary, not both", errors.ECODE_INVAL)
7521

    
7522
    elif remote_node is not None or iallocator is not None:
7523
      # Not replacing the secondary
7524
      raise errors.OpPrereqError("The iallocator and new node options can"
7525
                                 " only be used when changing the"
7526
                                 " secondary node", errors.ECODE_INVAL)
7527

    
7528
  @staticmethod
7529
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7530
    """Compute a new secondary node using an IAllocator.
7531

7532
    """
7533
    ial = IAllocator(lu.cfg, lu.rpc,
7534
                     mode=constants.IALLOCATOR_MODE_RELOC,
7535
                     name=instance_name,
7536
                     relocate_from=relocate_from)
7537

    
7538
    ial.Run(iallocator_name)
7539

    
7540
    if not ial.success:
7541
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7542
                                 " %s" % (iallocator_name, ial.info),
7543
                                 errors.ECODE_NORES)
7544

    
7545
    if len(ial.result) != ial.required_nodes:
7546
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7547
                                 " of nodes (%s), required %s" %
7548
                                 (iallocator_name,
7549
                                  len(ial.result), ial.required_nodes),
7550
                                 errors.ECODE_FAULT)
7551

    
7552
    remote_node_name = ial.result[0]
7553

    
7554
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7555
               instance_name, remote_node_name)
7556

    
7557
    return remote_node_name
7558

    
7559
  def _FindFaultyDisks(self, node_name):
7560
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7561
                                    node_name, True)
7562

    
7563
  def CheckPrereq(self):
7564
    """Check prerequisites.
7565

7566
    This checks that the instance is in the cluster.
7567

7568
    """
7569
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7570
    assert instance is not None, \
7571
      "Cannot retrieve locked instance %s" % self.instance_name
7572

    
7573
    if instance.disk_template != constants.DT_DRBD8:
7574
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7575
                                 " instances", errors.ECODE_INVAL)
7576

    
7577
    if len(instance.secondary_nodes) != 1:
7578
      raise errors.OpPrereqError("The instance has a strange layout,"
7579
                                 " expected one secondary but found %d" %
7580
                                 len(instance.secondary_nodes),
7581
                                 errors.ECODE_FAULT)
7582

    
7583
    if not self.delay_iallocator:
7584
      self._CheckPrereq2()
7585

    
7586
  def _CheckPrereq2(self):
7587
    """Check prerequisites, second part.
7588

7589
    This function should always be part of CheckPrereq. It was separated and is
7590
    now called from Exec because during node evacuation iallocator was only
7591
    called with an unmodified cluster model, not taking planned changes into
7592
    account.
7593

7594
    """
7595
    instance = self.instance
7596
    secondary_node = instance.secondary_nodes[0]
7597

    
7598
    if self.iallocator_name is None:
7599
      remote_node = self.remote_node
7600
    else:
7601
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7602
                                       instance.name, instance.secondary_nodes)
7603

    
7604
    if remote_node is not None:
7605
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7606
      assert self.remote_node_info is not None, \
7607
        "Cannot retrieve locked node %s" % remote_node
7608
    else:
7609
      self.remote_node_info = None
7610

    
7611
    if remote_node == self.instance.primary_node:
7612
      raise errors.OpPrereqError("The specified node is the primary node of"
7613
                                 " the instance.", errors.ECODE_INVAL)
7614

    
7615
    if remote_node == secondary_node:
7616
      raise errors.OpPrereqError("The specified node is already the"
7617
                                 " secondary node of the instance.",
7618
                                 errors.ECODE_INVAL)
7619

    
7620
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7621
                                    constants.REPLACE_DISK_CHG):
7622
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7623
                                 errors.ECODE_INVAL)
7624

    
7625
    if self.mode == constants.REPLACE_DISK_AUTO:
7626
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7627
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7628

    
7629
      if faulty_primary and faulty_secondary:
7630
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7631
                                   " one node and can not be repaired"
7632
                                   " automatically" % self.instance_name,
7633
                                   errors.ECODE_STATE)
7634

    
7635
      if faulty_primary:
7636
        self.disks = faulty_primary
7637
        self.target_node = instance.primary_node
7638
        self.other_node = secondary_node
7639
        check_nodes = [self.target_node, self.other_node]
7640
      elif faulty_secondary:
7641
        self.disks = faulty_secondary
7642
        self.target_node = secondary_node
7643
        self.other_node = instance.primary_node
7644
        check_nodes = [self.target_node, self.other_node]
7645
      else:
7646
        self.disks = []
7647
        check_nodes = []
7648

    
7649
    else:
7650
      # Non-automatic modes
7651
      if self.mode == constants.REPLACE_DISK_PRI:
7652
        self.target_node = instance.primary_node
7653
        self.other_node = secondary_node
7654
        check_nodes = [self.target_node, self.other_node]
7655

    
7656
      elif self.mode == constants.REPLACE_DISK_SEC:
7657
        self.target_node = secondary_node
7658
        self.other_node = instance.primary_node
7659
        check_nodes = [self.target_node, self.other_node]
7660

    
7661
      elif self.mode == constants.REPLACE_DISK_CHG:
7662
        self.new_node = remote_node
7663
        self.other_node = instance.primary_node
7664
        self.target_node = secondary_node
7665
        check_nodes = [self.new_node, self.other_node]
7666

    
7667
        _CheckNodeNotDrained(self.lu, remote_node)
7668

    
7669
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7670
        assert old_node_info is not None
7671
        if old_node_info.offline and not self.early_release:
7672
          # doesn't make sense to delay the release
7673
          self.early_release = True
7674
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7675
                          " early-release mode", secondary_node)
7676

    
7677
      else:
7678
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7679
                                     self.mode)
7680

    
7681
      # If not specified all disks should be replaced
7682
      if not self.disks:
7683
        self.disks = range(len(self.instance.disks))
7684

    
7685
    for node in check_nodes:
7686
      _CheckNodeOnline(self.lu, node)
7687

    
7688
    # Check whether disks are valid
7689
    for disk_idx in self.disks:
7690
      instance.FindDisk(disk_idx)
7691

    
7692
    # Get secondary node IP addresses
7693
    node_2nd_ip = {}
7694

    
7695
    for node_name in [self.target_node, self.other_node, self.new_node]:
7696
      if node_name is not None:
7697
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7698

    
7699
    self.node_secondary_ip = node_2nd_ip
7700

    
7701
  def Exec(self, feedback_fn):
7702
    """Execute disk replacement.
7703

7704
    This dispatches the disk replacement to the appropriate handler.
7705

7706
    """
7707
    if self.delay_iallocator:
7708
      self._CheckPrereq2()
7709

    
7710
    if not self.disks:
7711
      feedback_fn("No disks need replacement")
7712
      return
7713

    
7714
    feedback_fn("Replacing disk(s) %s for %s" %
7715
                (utils.CommaJoin(self.disks), self.instance.name))
7716

    
7717
    activate_disks = (not self.instance.admin_up)
7718

    
7719
    # Activate the instance disks if we're replacing them on a down instance
7720
    if activate_disks:
7721
      _StartInstanceDisks(self.lu, self.instance, True)
7722

    
7723
    try:
7724
      # Should we replace the secondary node?
7725
      if self.new_node is not None:
7726
        fn = self._ExecDrbd8Secondary
7727
      else:
7728
        fn = self._ExecDrbd8DiskOnly
7729

    
7730
      return fn(feedback_fn)
7731

    
7732
    finally:
7733
      # Deactivate the instance disks if we're replacing them on a
7734
      # down instance
7735
      if activate_disks:
7736
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7737

    
7738
  def _CheckVolumeGroup(self, nodes):
7739
    self.lu.LogInfo("Checking volume groups")
7740

    
7741
    vgname = self.cfg.GetVGName()
7742

    
7743
    # Make sure volume group exists on all involved nodes
7744
    results = self.rpc.call_vg_list(nodes)
7745
    if not results:
7746
      raise errors.OpExecError("Can't list volume groups on the nodes")
7747

    
7748
    for node in nodes:
7749
      res = results[node]
7750
      res.Raise("Error checking node %s" % node)
7751
      if vgname not in res.payload:
7752
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7753
                                 (vgname, node))
7754

    
7755
  def _CheckDisksExistence(self, nodes):
7756
    # Check disk existence
7757
    for idx, dev in enumerate(self.instance.disks):
7758
      if idx not in self.disks:
7759
        continue
7760

    
7761
      for node in nodes:
7762
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7763
        self.cfg.SetDiskID(dev, node)
7764

    
7765
        result = self.rpc.call_blockdev_find(node, dev)
7766

    
7767
        msg = result.fail_msg
7768
        if msg or not result.payload:
7769
          if not msg:
7770
            msg = "disk not found"
7771
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7772
                                   (idx, node, msg))
7773

    
7774
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7775
    for idx, dev in enumerate(self.instance.disks):
7776
      if idx not in self.disks:
7777
        continue
7778

    
7779
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7780
                      (idx, node_name))
7781

    
7782
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7783
                                   ldisk=ldisk):
7784
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7785
                                 " replace disks for instance %s" %
7786
                                 (node_name, self.instance.name))
7787

    
7788
  def _CreateNewStorage(self, node_name):
7789
    vgname = self.cfg.GetVGName()
7790
    iv_names = {}
7791

    
7792
    for idx, dev in enumerate(self.instance.disks):
7793
      if idx not in self.disks:
7794
        continue
7795

    
7796
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7797

    
7798
      self.cfg.SetDiskID(dev, node_name)
7799

    
7800
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7801
      names = _GenerateUniqueNames(self.lu, lv_names)
7802

    
7803
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7804
                             logical_id=(vgname, names[0]))
7805
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7806
                             logical_id=(vgname, names[1]))
7807

    
7808
      new_lvs = [lv_data, lv_meta]
7809
      old_lvs = dev.children
7810
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7811

    
7812
      # we pass force_create=True to force the LVM creation
7813
      for new_lv in new_lvs:
7814
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7815
                        _GetInstanceInfoText(self.instance), False)
7816

    
7817
    return iv_names
7818

    
7819
  def _CheckDevices(self, node_name, iv_names):
7820
    for name, (dev, _, _) in iv_names.iteritems():
7821
      self.cfg.SetDiskID(dev, node_name)
7822

    
7823
      result = self.rpc.call_blockdev_find(node_name, dev)
7824

    
7825
      msg = result.fail_msg
7826
      if msg or not result.payload:
7827
        if not msg:
7828
          msg = "disk not found"
7829
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7830
                                 (name, msg))
7831

    
7832
      if result.payload.is_degraded:
7833
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7834

    
7835
  def _RemoveOldStorage(self, node_name, iv_names):
7836
    for name, (_, old_lvs, _) in iv_names.iteritems():
7837
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7838

    
7839
      for lv in old_lvs:
7840
        self.cfg.SetDiskID(lv, node_name)
7841

    
7842
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7843
        if msg:
7844
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7845
                             hint="remove unused LVs manually")
7846

    
7847
  def _ReleaseNodeLock(self, node_name):
7848
    """Releases the lock for a given node."""
7849
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7850

    
7851
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7852
    """Replace a disk on the primary or secondary for DRBD 8.
7853

7854
    The algorithm for replace is quite complicated:
7855

7856
      1. for each disk to be replaced:
7857

7858
        1. create new LVs on the target node with unique names
7859
        1. detach old LVs from the drbd device
7860
        1. rename old LVs to name_replaced.<time_t>
7861
        1. rename new LVs to old LVs
7862
        1. attach the new LVs (with the old names now) to the drbd device
7863

7864
      1. wait for sync across all devices
7865

7866
      1. for each modified disk:
7867

7868
        1. remove old LVs (which have the name name_replaces.<time_t>)
7869

7870
    Failures are not very well handled.
7871

7872
    """
7873
    steps_total = 6
7874

    
7875
    # Step: check device activation
7876
    self.lu.LogStep(1, steps_total, "Check device existence")
7877
    self._CheckDisksExistence([self.other_node, self.target_node])
7878
    self._CheckVolumeGroup([self.target_node, self.other_node])
7879

    
7880
    # Step: check other node consistency
7881
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7882
    self._CheckDisksConsistency(self.other_node,
7883
                                self.other_node == self.instance.primary_node,
7884
                                False)
7885

    
7886
    # Step: create new storage
7887
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7888
    iv_names = self._CreateNewStorage(self.target_node)
7889

    
7890
    # Step: for each lv, detach+rename*2+attach
7891
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7892
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7893
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7894

    
7895
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7896
                                                     old_lvs)
7897
      result.Raise("Can't detach drbd from local storage on node"
7898
                   " %s for device %s" % (self.target_node, dev.iv_name))
7899
      #dev.children = []
7900
      #cfg.Update(instance)
7901

    
7902
      # ok, we created the new LVs, so now we know we have the needed
7903
      # storage; as such, we proceed on the target node to rename
7904
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7905
      # using the assumption that logical_id == physical_id (which in
7906
      # turn is the unique_id on that node)
7907

    
7908
      # FIXME(iustin): use a better name for the replaced LVs
7909
      temp_suffix = int(time.time())
7910
      ren_fn = lambda d, suff: (d.physical_id[0],
7911
                                d.physical_id[1] + "_replaced-%s" % suff)
7912

    
7913
      # Build the rename list based on what LVs exist on the node
7914
      rename_old_to_new = []
7915
      for to_ren in old_lvs:
7916
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7917
        if not result.fail_msg and result.payload:
7918
          # device exists
7919
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7920

    
7921
      self.lu.LogInfo("Renaming the old LVs on the target node")
7922
      result = self.rpc.call_blockdev_rename(self.target_node,
7923
                                             rename_old_to_new)
7924
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7925

    
7926
      # Now we rename the new LVs to the old LVs
7927
      self.lu.LogInfo("Renaming the new LVs on the target node")
7928
      rename_new_to_old = [(new, old.physical_id)
7929
                           for old, new in zip(old_lvs, new_lvs)]
7930
      result = self.rpc.call_blockdev_rename(self.target_node,
7931
                                             rename_new_to_old)
7932
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7933

    
7934
      for old, new in zip(old_lvs, new_lvs):
7935
        new.logical_id = old.logical_id
7936
        self.cfg.SetDiskID(new, self.target_node)
7937

    
7938
      for disk in old_lvs:
7939
        disk.logical_id = ren_fn(disk, temp_suffix)
7940
        self.cfg.SetDiskID(disk, self.target_node)
7941

    
7942
      # Now that the new lvs have the old name, we can add them to the device
7943
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7944
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7945
                                                  new_lvs)
7946
      msg = result.fail_msg
7947
      if msg:
7948
        for new_lv in new_lvs:
7949
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7950
                                               new_lv).fail_msg
7951
          if msg2:
7952
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7953
                               hint=("cleanup manually the unused logical"
7954
                                     "volumes"))
7955
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7956

    
7957
      dev.children = new_lvs
7958

    
7959
      self.cfg.Update(self.instance, feedback_fn)
7960

    
7961
    cstep = 5
7962
    if self.early_release:
7963
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7964
      cstep += 1
7965
      self._RemoveOldStorage(self.target_node, iv_names)
7966
      # WARNING: we release both node locks here, do not do other RPCs
7967
      # than WaitForSync to the primary node
7968
      self._ReleaseNodeLock([self.target_node, self.other_node])
7969

    
7970
    # Wait for sync
7971
    # This can fail as the old devices are degraded and _WaitForSync
7972
    # does a combined result over all disks, so we don't check its return value
7973
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7974
    cstep += 1
7975
    _WaitForSync(self.lu, self.instance)
7976

    
7977
    # Check all devices manually
7978
    self._CheckDevices(self.instance.primary_node, iv_names)
7979

    
7980
    # Step: remove old storage
7981
    if not self.early_release:
7982
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7983
      cstep += 1
7984
      self._RemoveOldStorage(self.target_node, iv_names)
7985

    
7986
  def _ExecDrbd8Secondary(self, feedback_fn):
7987
    """Replace the secondary node for DRBD 8.
7988

7989
    The algorithm for replace is quite complicated:
7990
      - for all disks of the instance:
7991
        - create new LVs on the new node with same names
7992
        - shutdown the drbd device on the old secondary
7993
        - disconnect the drbd network on the primary
7994
        - create the drbd device on the new secondary
7995
        - network attach the drbd on the primary, using an artifice:
7996
          the drbd code for Attach() will connect to the network if it
7997
          finds a device which is connected to the good local disks but
7998
          not network enabled
7999
      - wait for sync across all devices
8000
      - remove all disks from the old secondary
8001

8002
    Failures are not very well handled.
8003

8004
    """
8005
    steps_total = 6
8006

    
8007
    # Step: check device activation
8008
    self.lu.LogStep(1, steps_total, "Check device existence")
8009
    self._CheckDisksExistence([self.instance.primary_node])
8010
    self._CheckVolumeGroup([self.instance.primary_node])
8011

    
8012
    # Step: check other node consistency
8013
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8014
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8015

    
8016
    # Step: create new storage
8017
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8018
    for idx, dev in enumerate(self.instance.disks):
8019
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8020
                      (self.new_node, idx))
8021
      # we pass force_create=True to force LVM creation
8022
      for new_lv in dev.children:
8023
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8024
                        _GetInstanceInfoText(self.instance), False)
8025

    
8026
    # Step 4: dbrd minors and drbd setups changes
8027
    # after this, we must manually remove the drbd minors on both the
8028
    # error and the success paths
8029
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8030
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8031
                                         for dev in self.instance.disks],
8032
                                        self.instance.name)
8033
    logging.debug("Allocated minors %r", minors)
8034

    
8035
    iv_names = {}
8036
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8037
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8038
                      (self.new_node, idx))
8039
      # create new devices on new_node; note that we create two IDs:
8040
      # one without port, so the drbd will be activated without
8041
      # networking information on the new node at this stage, and one
8042
      # with network, for the latter activation in step 4
8043
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8044
      if self.instance.primary_node == o_node1:
8045
        p_minor = o_minor1
8046
      else:
8047
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8048
        p_minor = o_minor2
8049

    
8050
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8051
                      p_minor, new_minor, o_secret)
8052
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8053
                    p_minor, new_minor, o_secret)
8054

    
8055
      iv_names[idx] = (dev, dev.children, new_net_id)
8056
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8057
                    new_net_id)
8058
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8059
                              logical_id=new_alone_id,
8060
                              children=dev.children,
8061
                              size=dev.size)
8062
      try:
8063
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8064
                              _GetInstanceInfoText(self.instance), False)
8065
      except errors.GenericError:
8066
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8067
        raise
8068

    
8069
    # We have new devices, shutdown the drbd on the old secondary
8070
    for idx, dev in enumerate(self.instance.disks):
8071
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8072
      self.cfg.SetDiskID(dev, self.target_node)
8073
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8074
      if msg:
8075
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8076
                           "node: %s" % (idx, msg),
8077
                           hint=("Please cleanup this device manually as"
8078
                                 " soon as possible"))
8079

    
8080
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8081
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8082
                                               self.node_secondary_ip,
8083
                                               self.instance.disks)\
8084
                                              [self.instance.primary_node]
8085

    
8086
    msg = result.fail_msg
8087
    if msg:
8088
      # detaches didn't succeed (unlikely)
8089
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8090
      raise errors.OpExecError("Can't detach the disks from the network on"
8091
                               " old node: %s" % (msg,))
8092

    
8093
    # if we managed to detach at least one, we update all the disks of
8094
    # the instance to point to the new secondary
8095
    self.lu.LogInfo("Updating instance configuration")
8096
    for dev, _, new_logical_id in iv_names.itervalues():
8097
      dev.logical_id = new_logical_id
8098
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8099

    
8100
    self.cfg.Update(self.instance, feedback_fn)
8101

    
8102
    # and now perform the drbd attach
8103
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8104
                    " (standalone => connected)")
8105
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8106
                                            self.new_node],
8107
                                           self.node_secondary_ip,
8108
                                           self.instance.disks,
8109
                                           self.instance.name,
8110
                                           False)
8111
    for to_node, to_result in result.items():
8112
      msg = to_result.fail_msg
8113
      if msg:
8114
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8115
                           to_node, msg,
8116
                           hint=("please do a gnt-instance info to see the"
8117
                                 " status of disks"))
8118
    cstep = 5
8119
    if self.early_release:
8120
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8121
      cstep += 1
8122
      self._RemoveOldStorage(self.target_node, iv_names)
8123
      # WARNING: we release all node locks here, do not do other RPCs
8124
      # than WaitForSync to the primary node
8125
      self._ReleaseNodeLock([self.instance.primary_node,
8126
                             self.target_node,
8127
                             self.new_node])
8128

    
8129
    # Wait for sync
8130
    # This can fail as the old devices are degraded and _WaitForSync
8131
    # does a combined result over all disks, so we don't check its return value
8132
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8133
    cstep += 1
8134
    _WaitForSync(self.lu, self.instance)
8135

    
8136
    # Check all devices manually
8137
    self._CheckDevices(self.instance.primary_node, iv_names)
8138

    
8139
    # Step: remove old storage
8140
    if not self.early_release:
8141
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8142
      self._RemoveOldStorage(self.target_node, iv_names)
8143

    
8144

    
8145
class LURepairNodeStorage(NoHooksLU):
8146
  """Repairs the volume group on a node.
8147

8148
  """
8149
  _OP_PARAMS = [
8150
    _PNodeName,
8151
    ("storage_type", _NoDefault, _CheckStorageType),
8152
    ("name", _NoDefault, _TNonEmptyString),
8153
    ("ignore_consistency", False, _TBool),
8154
    ]
8155
  REQ_BGL = False
8156

    
8157
  def CheckArguments(self):
8158
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8159

    
8160
    storage_type = self.op.storage_type
8161

    
8162
    if (constants.SO_FIX_CONSISTENCY not in
8163
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8164
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8165
                                 " repaired" % storage_type,
8166
                                 errors.ECODE_INVAL)
8167

    
8168
  def ExpandNames(self):
8169
    self.needed_locks = {
8170
      locking.LEVEL_NODE: [self.op.node_name],
8171
      }
8172

    
8173
  def _CheckFaultyDisks(self, instance, node_name):
8174
    """Ensure faulty disks abort the opcode or at least warn."""
8175
    try:
8176
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8177
                                  node_name, True):
8178
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8179
                                   " node '%s'" % (instance.name, node_name),
8180
                                   errors.ECODE_STATE)
8181
    except errors.OpPrereqError, err:
8182
      if self.op.ignore_consistency:
8183
        self.proc.LogWarning(str(err.args[0]))
8184
      else:
8185
        raise
8186

    
8187
  def CheckPrereq(self):
8188
    """Check prerequisites.
8189

8190
    """
8191
    # Check whether any instance on this node has faulty disks
8192
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8193
      if not inst.admin_up:
8194
        continue
8195
      check_nodes = set(inst.all_nodes)
8196
      check_nodes.discard(self.op.node_name)
8197
      for inst_node_name in check_nodes:
8198
        self._CheckFaultyDisks(inst, inst_node_name)
8199

    
8200
  def Exec(self, feedback_fn):
8201
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8202
                (self.op.name, self.op.node_name))
8203

    
8204
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8205
    result = self.rpc.call_storage_execute(self.op.node_name,
8206
                                           self.op.storage_type, st_args,
8207
                                           self.op.name,
8208
                                           constants.SO_FIX_CONSISTENCY)
8209
    result.Raise("Failed to repair storage unit '%s' on %s" %
8210
                 (self.op.name, self.op.node_name))
8211

    
8212

    
8213
class LUNodeEvacuationStrategy(NoHooksLU):
8214
  """Computes the node evacuation strategy.
8215

8216
  """
8217
  _OP_PARAMS = [
8218
    ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8219
    ("remote_node", None, _TMaybeString),
8220
    ("iallocator", None, _TMaybeString),
8221
    ]
8222
  REQ_BGL = False
8223

    
8224
  def CheckArguments(self):
8225
    if self.op.remote_node is not None and self.op.iallocator is not None:
8226
      raise errors.OpPrereqError("Give either the iallocator or the new"
8227
                                 " secondary, not both", errors.ECODE_INVAL)
8228

    
8229
  def ExpandNames(self):
8230
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8231
    self.needed_locks = locks = {}
8232
    if self.op.remote_node is None:
8233
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8234
    else:
8235
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8236
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8237

    
8238
  def Exec(self, feedback_fn):
8239
    if self.op.remote_node is not None:
8240
      instances = []
8241
      for node in self.op.nodes:
8242
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8243
      result = []
8244
      for i in instances:
8245
        if i.primary_node == self.op.remote_node:
8246
          raise errors.OpPrereqError("Node %s is the primary node of"
8247
                                     " instance %s, cannot use it as"
8248
                                     " secondary" %
8249
                                     (self.op.remote_node, i.name),
8250
                                     errors.ECODE_INVAL)
8251
        result.append([i.name, self.op.remote_node])
8252
    else:
8253
      ial = IAllocator(self.cfg, self.rpc,
8254
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8255
                       evac_nodes=self.op.nodes)
8256
      ial.Run(self.op.iallocator, validate=True)
8257
      if not ial.success:
8258
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8259
                                 errors.ECODE_NORES)
8260
      result = ial.result
8261
    return result
8262

    
8263

    
8264
class LUGrowDisk(LogicalUnit):
8265
  """Grow a disk of an instance.
8266

8267
  """
8268
  HPATH = "disk-grow"
8269
  HTYPE = constants.HTYPE_INSTANCE
8270
  _OP_PARAMS = [
8271
    _PInstanceName,
8272
    ("disk", _NoDefault, _TInt),
8273
    ("amount", _NoDefault, _TInt),
8274
    ("wait_for_sync", True, _TBool),
8275
    ]
8276
  REQ_BGL = False
8277

    
8278
  def ExpandNames(self):
8279
    self._ExpandAndLockInstance()
8280
    self.needed_locks[locking.LEVEL_NODE] = []
8281
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8282

    
8283
  def DeclareLocks(self, level):
8284
    if level == locking.LEVEL_NODE:
8285
      self._LockInstancesNodes()
8286

    
8287
  def BuildHooksEnv(self):
8288
    """Build hooks env.
8289

8290
    This runs on the master, the primary and all the secondaries.
8291

8292
    """
8293
    env = {
8294
      "DISK": self.op.disk,
8295
      "AMOUNT": self.op.amount,
8296
      }
8297
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8298
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8299
    return env, nl, nl
8300

    
8301
  def CheckPrereq(self):
8302
    """Check prerequisites.
8303

8304
    This checks that the instance is in the cluster.
8305

8306
    """
8307
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8308
    assert instance is not None, \
8309
      "Cannot retrieve locked instance %s" % self.op.instance_name
8310
    nodenames = list(instance.all_nodes)
8311
    for node in nodenames:
8312
      _CheckNodeOnline(self, node)
8313

    
8314
    self.instance = instance
8315

    
8316
    if instance.disk_template not in constants.DTS_GROWABLE:
8317
      raise errors.OpPrereqError("Instance's disk layout does not support"
8318
                                 " growing.", errors.ECODE_INVAL)
8319

    
8320
    self.disk = instance.FindDisk(self.op.disk)
8321

    
8322
    if instance.disk_template != constants.DT_FILE:
8323
      # TODO: check the free disk space for file, when that feature will be
8324
      # supported
8325
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8326

    
8327
  def Exec(self, feedback_fn):
8328
    """Execute disk grow.
8329

8330
    """
8331
    instance = self.instance
8332
    disk = self.disk
8333

    
8334
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8335
    if not disks_ok:
8336
      raise errors.OpExecError("Cannot activate block device to grow")
8337

    
8338
    for node in instance.all_nodes:
8339
      self.cfg.SetDiskID(disk, node)
8340
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8341
      result.Raise("Grow request failed to node %s" % node)
8342

    
8343
      # TODO: Rewrite code to work properly
8344
      # DRBD goes into sync mode for a short amount of time after executing the
8345
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8346
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8347
      # time is a work-around.
8348
      time.sleep(5)
8349

    
8350
    disk.RecordGrow(self.op.amount)
8351
    self.cfg.Update(instance, feedback_fn)
8352
    if self.op.wait_for_sync:
8353
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8354
      if disk_abort:
8355
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8356
                             " status.\nPlease check the instance.")
8357
      if not instance.admin_up:
8358
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8359
    elif not instance.admin_up:
8360
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8361
                           " not supposed to be running because no wait for"
8362
                           " sync mode was requested.")
8363

    
8364

    
8365
class LUQueryInstanceData(NoHooksLU):
8366
  """Query runtime instance data.
8367

8368
  """
8369
  _OP_PARAMS = [
8370
    ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8371
    ("static", False, _TBool),
8372
    ]
8373
  REQ_BGL = False
8374

    
8375
  def ExpandNames(self):
8376
    self.needed_locks = {}
8377
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8378

    
8379
    if self.op.instances:
8380
      self.wanted_names = []
8381
      for name in self.op.instances:
8382
        full_name = _ExpandInstanceName(self.cfg, name)
8383
        self.wanted_names.append(full_name)
8384
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8385
    else:
8386
      self.wanted_names = None
8387
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8388

    
8389
    self.needed_locks[locking.LEVEL_NODE] = []
8390
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8391

    
8392
  def DeclareLocks(self, level):
8393
    if level == locking.LEVEL_NODE:
8394
      self._LockInstancesNodes()
8395

    
8396
  def CheckPrereq(self):
8397
    """Check prerequisites.
8398

8399
    This only checks the optional instance list against the existing names.
8400

8401
    """
8402
    if self.wanted_names is None:
8403
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8404

    
8405
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8406
                             in self.wanted_names]
8407

    
8408
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8409
    """Returns the status of a block device
8410

8411
    """
8412
    if self.op.static or not node:
8413
      return None
8414

    
8415
    self.cfg.SetDiskID(dev, node)
8416

    
8417
    result = self.rpc.call_blockdev_find(node, dev)
8418
    if result.offline:
8419
      return None
8420

    
8421
    result.Raise("Can't compute disk status for %s" % instance_name)
8422

    
8423
    status = result.payload
8424
    if status is None:
8425
      return None
8426

    
8427
    return (status.dev_path, status.major, status.minor,
8428
            status.sync_percent, status.estimated_time,
8429
            status.is_degraded, status.ldisk_status)
8430

    
8431
  def _ComputeDiskStatus(self, instance, snode, dev):
8432
    """Compute block device status.
8433

8434
    """
8435
    if dev.dev_type in constants.LDS_DRBD:
8436
      # we change the snode then (otherwise we use the one passed in)
8437
      if dev.logical_id[0] == instance.primary_node:
8438
        snode = dev.logical_id[1]
8439
      else:
8440
        snode = dev.logical_id[0]
8441

    
8442
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8443
                                              instance.name, dev)
8444
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8445

    
8446
    if dev.children:
8447
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8448
                      for child in dev.children]
8449
    else:
8450
      dev_children = []
8451

    
8452
    data = {
8453
      "iv_name": dev.iv_name,
8454
      "dev_type": dev.dev_type,
8455
      "logical_id": dev.logical_id,
8456
      "physical_id": dev.physical_id,
8457
      "pstatus": dev_pstatus,
8458
      "sstatus": dev_sstatus,
8459
      "children": dev_children,
8460
      "mode": dev.mode,
8461
      "size": dev.size,
8462
      }
8463

    
8464
    return data
8465

    
8466
  def Exec(self, feedback_fn):
8467
    """Gather and return data"""
8468
    result = {}
8469

    
8470
    cluster = self.cfg.GetClusterInfo()
8471

    
8472
    for instance in self.wanted_instances:
8473
      if not self.op.static:
8474
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8475
                                                  instance.name,
8476
                                                  instance.hypervisor)
8477
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8478
        remote_info = remote_info.payload
8479
        if remote_info and "state" in remote_info:
8480
          remote_state = "up"
8481
        else:
8482
          remote_state = "down"
8483
      else:
8484
        remote_state = None
8485
      if instance.admin_up:
8486
        config_state = "up"
8487
      else:
8488
        config_state = "down"
8489

    
8490
      disks = [self._ComputeDiskStatus(instance, None, device)
8491
               for device in instance.disks]
8492

    
8493
      idict = {
8494
        "name": instance.name,
8495
        "config_state": config_state,
8496
        "run_state": remote_state,
8497
        "pnode": instance.primary_node,
8498
        "snodes": instance.secondary_nodes,
8499
        "os": instance.os,
8500
        # this happens to be the same format used for hooks
8501
        "nics": _NICListToTuple(self, instance.nics),
8502
        "disk_template": instance.disk_template,
8503
        "disks": disks,
8504
        "hypervisor": instance.hypervisor,
8505
        "network_port": instance.network_port,
8506
        "hv_instance": instance.hvparams,
8507
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8508
        "be_instance": instance.beparams,
8509
        "be_actual": cluster.FillBE(instance),
8510
        "os_instance": instance.osparams,
8511
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8512
        "serial_no": instance.serial_no,
8513
        "mtime": instance.mtime,
8514
        "ctime": instance.ctime,
8515
        "uuid": instance.uuid,
8516
        }
8517

    
8518
      result[instance.name] = idict
8519

    
8520
    return result
8521

    
8522

    
8523
class LUSetInstanceParams(LogicalUnit):
8524
  """Modifies an instances's parameters.
8525

8526
  """
8527
  HPATH = "instance-modify"
8528
  HTYPE = constants.HTYPE_INSTANCE
8529
  _OP_PARAMS = [
8530
    _PInstanceName,
8531
    ("nics", _EmptyList, _TList),
8532
    ("disks", _EmptyList, _TList),
8533
    ("beparams", _EmptyDict, _TDict),
8534
    ("hvparams", _EmptyDict, _TDict),
8535
    ("disk_template", None, _TMaybeString),
8536
    ("remote_node", None, _TMaybeString),
8537
    ("os_name", None, _TMaybeString),
8538
    ("force_variant", False, _TBool),
8539
    ("osparams", None, _TOr(_TDict, _TNone)),
8540
    _PForce,
8541
    ]
8542
  REQ_BGL = False
8543

    
8544
  def CheckArguments(self):
8545
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8546
            self.op.hvparams or self.op.beparams or self.op.os_name):
8547
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8548

    
8549
    if self.op.hvparams:
8550
      _CheckGlobalHvParams(self.op.hvparams)
8551

    
8552
    # Disk validation
8553
    disk_addremove = 0
8554
    for disk_op, disk_dict in self.op.disks:
8555
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8556
      if disk_op == constants.DDM_REMOVE:
8557
        disk_addremove += 1
8558
        continue
8559
      elif disk_op == constants.DDM_ADD:
8560
        disk_addremove += 1
8561
      else:
8562
        if not isinstance(disk_op, int):
8563
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8564
        if not isinstance(disk_dict, dict):
8565
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8566
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8567

    
8568
      if disk_op == constants.DDM_ADD:
8569
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8570
        if mode not in constants.DISK_ACCESS_SET:
8571
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8572
                                     errors.ECODE_INVAL)
8573
        size = disk_dict.get('size', None)
8574
        if size is None:
8575
          raise errors.OpPrereqError("Required disk parameter size missing",
8576
                                     errors.ECODE_INVAL)
8577
        try:
8578
          size = int(size)
8579
        except (TypeError, ValueError), err:
8580
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8581
                                     str(err), errors.ECODE_INVAL)
8582
        disk_dict['size'] = size
8583
      else:
8584
        # modification of disk
8585
        if 'size' in disk_dict:
8586
          raise errors.OpPrereqError("Disk size change not possible, use"
8587
                                     " grow-disk", errors.ECODE_INVAL)
8588

    
8589
    if disk_addremove > 1:
8590
      raise errors.OpPrereqError("Only one disk add or remove operation"
8591
                                 " supported at a time", errors.ECODE_INVAL)
8592

    
8593
    if self.op.disks and self.op.disk_template is not None:
8594
      raise errors.OpPrereqError("Disk template conversion and other disk"
8595
                                 " changes not supported at the same time",
8596
                                 errors.ECODE_INVAL)
8597

    
8598
    if self.op.disk_template:
8599
      _CheckDiskTemplate(self.op.disk_template)
8600
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8601
          self.op.remote_node is None):
8602
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8603
                                   " one requires specifying a secondary node",
8604
                                   errors.ECODE_INVAL)
8605

    
8606
    # NIC validation
8607
    nic_addremove = 0
8608
    for nic_op, nic_dict in self.op.nics:
8609
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8610
      if nic_op == constants.DDM_REMOVE:
8611
        nic_addremove += 1
8612
        continue
8613
      elif nic_op == constants.DDM_ADD:
8614
        nic_addremove += 1
8615
      else:
8616
        if not isinstance(nic_op, int):
8617
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8618
        if not isinstance(nic_dict, dict):
8619
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8620
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8621

    
8622
      # nic_dict should be a dict
8623
      nic_ip = nic_dict.get('ip', None)
8624
      if nic_ip is not None:
8625
        if nic_ip.lower() == constants.VALUE_NONE:
8626
          nic_dict['ip'] = None
8627
        else:
8628
          if not utils.IsValidIP4(nic_ip):
8629
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8630
                                       errors.ECODE_INVAL)
8631

    
8632
      nic_bridge = nic_dict.get('bridge', None)
8633
      nic_link = nic_dict.get('link', None)
8634
      if nic_bridge and nic_link:
8635
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8636
                                   " at the same time", errors.ECODE_INVAL)
8637
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8638
        nic_dict['bridge'] = None
8639
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8640
        nic_dict['link'] = None
8641

    
8642
      if nic_op == constants.DDM_ADD:
8643
        nic_mac = nic_dict.get('mac', None)
8644
        if nic_mac is None:
8645
          nic_dict['mac'] = constants.VALUE_AUTO
8646

    
8647
      if 'mac' in nic_dict:
8648
        nic_mac = nic_dict['mac']
8649
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8650
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8651

    
8652
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8653
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8654
                                     " modifying an existing nic",
8655
                                     errors.ECODE_INVAL)
8656

    
8657
    if nic_addremove > 1:
8658
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8659
                                 " supported at a time", errors.ECODE_INVAL)
8660

    
8661
  def ExpandNames(self):
8662
    self._ExpandAndLockInstance()
8663
    self.needed_locks[locking.LEVEL_NODE] = []
8664
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8665

    
8666
  def DeclareLocks(self, level):
8667
    if level == locking.LEVEL_NODE:
8668
      self._LockInstancesNodes()
8669
      if self.op.disk_template and self.op.remote_node:
8670
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8671
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8672

    
8673
  def BuildHooksEnv(self):
8674
    """Build hooks env.
8675

8676
    This runs on the master, primary and secondaries.
8677

8678
    """
8679
    args = dict()
8680
    if constants.BE_MEMORY in self.be_new:
8681
      args['memory'] = self.be_new[constants.BE_MEMORY]
8682
    if constants.BE_VCPUS in self.be_new:
8683
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8684
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8685
    # information at all.
8686
    if self.op.nics:
8687
      args['nics'] = []
8688
      nic_override = dict(self.op.nics)
8689
      for idx, nic in enumerate(self.instance.nics):
8690
        if idx in nic_override:
8691
          this_nic_override = nic_override[idx]
8692
        else:
8693
          this_nic_override = {}
8694
        if 'ip' in this_nic_override:
8695
          ip = this_nic_override['ip']
8696
        else:
8697
          ip = nic.ip
8698
        if 'mac' in this_nic_override:
8699
          mac = this_nic_override['mac']
8700
        else:
8701
          mac = nic.mac
8702
        if idx in self.nic_pnew:
8703
          nicparams = self.nic_pnew[idx]
8704
        else:
8705
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8706
        mode = nicparams[constants.NIC_MODE]
8707
        link = nicparams[constants.NIC_LINK]
8708
        args['nics'].append((ip, mac, mode, link))
8709
      if constants.DDM_ADD in nic_override:
8710
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8711
        mac = nic_override[constants.DDM_ADD]['mac']
8712
        nicparams = self.nic_pnew[constants.DDM_ADD]
8713
        mode = nicparams[constants.NIC_MODE]
8714
        link = nicparams[constants.NIC_LINK]
8715
        args['nics'].append((ip, mac, mode, link))
8716
      elif constants.DDM_REMOVE in nic_override:
8717
        del args['nics'][-1]
8718

    
8719
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8720
    if self.op.disk_template:
8721
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8722
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8723
    return env, nl, nl
8724

    
8725
  def CheckPrereq(self):
8726
    """Check prerequisites.
8727

8728
    This only checks the instance list against the existing names.
8729

8730
    """
8731
    # checking the new params on the primary/secondary nodes
8732

    
8733
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8734
    cluster = self.cluster = self.cfg.GetClusterInfo()
8735
    assert self.instance is not None, \
8736
      "Cannot retrieve locked instance %s" % self.op.instance_name
8737
    pnode = instance.primary_node
8738
    nodelist = list(instance.all_nodes)
8739

    
8740
    # OS change
8741
    if self.op.os_name and not self.op.force:
8742
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8743
                      self.op.force_variant)
8744
      instance_os = self.op.os_name
8745
    else:
8746
      instance_os = instance.os
8747

    
8748
    if self.op.disk_template:
8749
      if instance.disk_template == self.op.disk_template:
8750
        raise errors.OpPrereqError("Instance already has disk template %s" %
8751
                                   instance.disk_template, errors.ECODE_INVAL)
8752

    
8753
      if (instance.disk_template,
8754
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8755
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8756
                                   " %s to %s" % (instance.disk_template,
8757
                                                  self.op.disk_template),
8758
                                   errors.ECODE_INVAL)
8759
      _CheckInstanceDown(self, instance, "cannot change disk template")
8760
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8761
        _CheckNodeOnline(self, self.op.remote_node)
8762
        _CheckNodeNotDrained(self, self.op.remote_node)
8763
        disks = [{"size": d.size} for d in instance.disks]
8764
        required = _ComputeDiskSize(self.op.disk_template, disks)
8765
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8766

    
8767
    # hvparams processing
8768
    if self.op.hvparams:
8769
      hv_type = instance.hypervisor
8770
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8771
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8772
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8773

    
8774
      # local check
8775
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8776
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8777
      self.hv_new = hv_new # the new actual values
8778
      self.hv_inst = i_hvdict # the new dict (without defaults)
8779
    else:
8780
      self.hv_new = self.hv_inst = {}
8781

    
8782
    # beparams processing
8783
    if self.op.beparams:
8784
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8785
                                   use_none=True)
8786
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8787
      be_new = cluster.SimpleFillBE(i_bedict)
8788
      self.be_new = be_new # the new actual values
8789
      self.be_inst = i_bedict # the new dict (without defaults)
8790
    else:
8791
      self.be_new = self.be_inst = {}
8792

    
8793
    # osparams processing
8794
    if self.op.osparams:
8795
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8796
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8797
      self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8798
      self.os_inst = i_osdict # the new dict (without defaults)
8799
    else:
8800
      self.os_new = self.os_inst = {}
8801

    
8802
    self.warn = []
8803

    
8804
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8805
      mem_check_list = [pnode]
8806
      if be_new[constants.BE_AUTO_BALANCE]:
8807
        # either we changed auto_balance to yes or it was from before
8808
        mem_check_list.extend(instance.secondary_nodes)
8809
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8810
                                                  instance.hypervisor)
8811
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8812
                                         instance.hypervisor)
8813
      pninfo = nodeinfo[pnode]
8814
      msg = pninfo.fail_msg
8815
      if msg:
8816
        # Assume the primary node is unreachable and go ahead
8817
        self.warn.append("Can't get info from primary node %s: %s" %
8818
                         (pnode,  msg))
8819
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8820
        self.warn.append("Node data from primary node %s doesn't contain"
8821
                         " free memory information" % pnode)
8822
      elif instance_info.fail_msg:
8823
        self.warn.append("Can't get instance runtime information: %s" %
8824
                        instance_info.fail_msg)
8825
      else:
8826
        if instance_info.payload:
8827
          current_mem = int(instance_info.payload['memory'])
8828
        else:
8829
          # Assume instance not running
8830
          # (there is a slight race condition here, but it's not very probable,
8831
          # and we have no other way to check)
8832
          current_mem = 0
8833
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8834
                    pninfo.payload['memory_free'])
8835
        if miss_mem > 0:
8836
          raise errors.OpPrereqError("This change will prevent the instance"
8837
                                     " from starting, due to %d MB of memory"
8838
                                     " missing on its primary node" % miss_mem,
8839
                                     errors.ECODE_NORES)
8840

    
8841
      if be_new[constants.BE_AUTO_BALANCE]:
8842
        for node, nres in nodeinfo.items():
8843
          if node not in instance.secondary_nodes:
8844
            continue
8845
          msg = nres.fail_msg
8846
          if msg:
8847
            self.warn.append("Can't get info from secondary node %s: %s" %
8848
                             (node, msg))
8849
          elif not isinstance(nres.payload.get('memory_free', None), int):
8850
            self.warn.append("Secondary node %s didn't return free"
8851
                             " memory information" % node)
8852
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8853
            self.warn.append("Not enough memory to failover instance to"
8854
                             " secondary node %s" % node)
8855

    
8856
    # NIC processing
8857
    self.nic_pnew = {}
8858
    self.nic_pinst = {}
8859
    for nic_op, nic_dict in self.op.nics:
8860
      if nic_op == constants.DDM_REMOVE:
8861
        if not instance.nics:
8862
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8863
                                     errors.ECODE_INVAL)
8864
        continue
8865
      if nic_op != constants.DDM_ADD:
8866
        # an existing nic
8867
        if not instance.nics:
8868
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8869
                                     " no NICs" % nic_op,
8870
                                     errors.ECODE_INVAL)
8871
        if nic_op < 0 or nic_op >= len(instance.nics):
8872
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8873
                                     " are 0 to %d" %
8874
                                     (nic_op, len(instance.nics) - 1),
8875
                                     errors.ECODE_INVAL)
8876
        old_nic_params = instance.nics[nic_op].nicparams
8877
        old_nic_ip = instance.nics[nic_op].ip
8878
      else:
8879
        old_nic_params = {}
8880
        old_nic_ip = None
8881

    
8882
      update_params_dict = dict([(key, nic_dict[key])
8883
                                 for key in constants.NICS_PARAMETERS
8884
                                 if key in nic_dict])
8885

    
8886
      if 'bridge' in nic_dict:
8887
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8888

    
8889
      new_nic_params = _GetUpdatedParams(old_nic_params,
8890
                                         update_params_dict)
8891
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8892
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8893
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8894
      self.nic_pinst[nic_op] = new_nic_params
8895
      self.nic_pnew[nic_op] = new_filled_nic_params
8896
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8897

    
8898
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8899
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8900
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8901
        if msg:
8902
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8903
          if self.op.force:
8904
            self.warn.append(msg)
8905
          else:
8906
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8907
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8908
        if 'ip' in nic_dict:
8909
          nic_ip = nic_dict['ip']
8910
        else:
8911
          nic_ip = old_nic_ip
8912
        if nic_ip is None:
8913
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8914
                                     ' on a routed nic', errors.ECODE_INVAL)
8915
      if 'mac' in nic_dict:
8916
        nic_mac = nic_dict['mac']
8917
        if nic_mac is None:
8918
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8919
                                     errors.ECODE_INVAL)
8920
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8921
          # otherwise generate the mac
8922
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8923
        else:
8924
          # or validate/reserve the current one
8925
          try:
8926
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8927
          except errors.ReservationError:
8928
            raise errors.OpPrereqError("MAC address %s already in use"
8929
                                       " in cluster" % nic_mac,
8930
                                       errors.ECODE_NOTUNIQUE)
8931

    
8932
    # DISK processing
8933
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8934
      raise errors.OpPrereqError("Disk operations not supported for"
8935
                                 " diskless instances",
8936
                                 errors.ECODE_INVAL)
8937
    for disk_op, _ in self.op.disks:
8938
      if disk_op == constants.DDM_REMOVE:
8939
        if len(instance.disks) == 1:
8940
          raise errors.OpPrereqError("Cannot remove the last disk of"
8941
                                     " an instance", errors.ECODE_INVAL)
8942
        _CheckInstanceDown(self, instance, "cannot remove disks")
8943

    
8944
      if (disk_op == constants.DDM_ADD and
8945
          len(instance.nics) >= constants.MAX_DISKS):
8946
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8947
                                   " add more" % constants.MAX_DISKS,
8948
                                   errors.ECODE_STATE)
8949
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8950
        # an existing disk
8951
        if disk_op < 0 or disk_op >= len(instance.disks):
8952
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8953
                                     " are 0 to %d" %
8954
                                     (disk_op, len(instance.disks)),
8955
                                     errors.ECODE_INVAL)
8956

    
8957
    return
8958

    
8959
  def _ConvertPlainToDrbd(self, feedback_fn):
8960
    """Converts an instance from plain to drbd.
8961

8962
    """
8963
    feedback_fn("Converting template to drbd")
8964
    instance = self.instance
8965
    pnode = instance.primary_node
8966
    snode = self.op.remote_node
8967

    
8968
    # create a fake disk info for _GenerateDiskTemplate
8969
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8970
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8971
                                      instance.name, pnode, [snode],
8972
                                      disk_info, None, None, 0)
8973
    info = _GetInstanceInfoText(instance)
8974
    feedback_fn("Creating aditional volumes...")
8975
    # first, create the missing data and meta devices
8976
    for disk in new_disks:
8977
      # unfortunately this is... not too nice
8978
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8979
                            info, True)
8980
      for child in disk.children:
8981
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8982
    # at this stage, all new LVs have been created, we can rename the
8983
    # old ones
8984
    feedback_fn("Renaming original volumes...")
8985
    rename_list = [(o, n.children[0].logical_id)
8986
                   for (o, n) in zip(instance.disks, new_disks)]
8987
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8988
    result.Raise("Failed to rename original LVs")
8989

    
8990
    feedback_fn("Initializing DRBD devices...")
8991
    # all child devices are in place, we can now create the DRBD devices
8992
    for disk in new_disks:
8993
      for node in [pnode, snode]:
8994
        f_create = node == pnode
8995
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8996

    
8997
    # at this point, the instance has been modified
8998
    instance.disk_template = constants.DT_DRBD8
8999
    instance.disks = new_disks
9000
    self.cfg.Update(instance, feedback_fn)
9001

    
9002
    # disks are created, waiting for sync
9003
    disk_abort = not _WaitForSync(self, instance)
9004
    if disk_abort:
9005
      raise errors.OpExecError("There are some degraded disks for"
9006
                               " this instance, please cleanup manually")
9007

    
9008
  def _ConvertDrbdToPlain(self, feedback_fn):
9009
    """Converts an instance from drbd to plain.
9010

9011
    """
9012
    instance = self.instance
9013
    assert len(instance.secondary_nodes) == 1
9014
    pnode = instance.primary_node
9015
    snode = instance.secondary_nodes[0]
9016
    feedback_fn("Converting template to plain")
9017

    
9018
    old_disks = instance.disks
9019
    new_disks = [d.children[0] for d in old_disks]
9020

    
9021
    # copy over size and mode
9022
    for parent, child in zip(old_disks, new_disks):
9023
      child.size = parent.size
9024
      child.mode = parent.mode
9025

    
9026
    # update instance structure
9027
    instance.disks = new_disks
9028
    instance.disk_template = constants.DT_PLAIN
9029
    self.cfg.Update(instance, feedback_fn)
9030

    
9031
    feedback_fn("Removing volumes on the secondary node...")
9032
    for disk in old_disks:
9033
      self.cfg.SetDiskID(disk, snode)
9034
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9035
      if msg:
9036
        self.LogWarning("Could not remove block device %s on node %s,"
9037
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9038

    
9039
    feedback_fn("Removing unneeded volumes on the primary node...")
9040
    for idx, disk in enumerate(old_disks):
9041
      meta = disk.children[1]
9042
      self.cfg.SetDiskID(meta, pnode)
9043
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9044
      if msg:
9045
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9046
                        " continuing anyway: %s", idx, pnode, msg)
9047

    
9048

    
9049
  def Exec(self, feedback_fn):
9050
    """Modifies an instance.
9051

9052
    All parameters take effect only at the next restart of the instance.
9053

9054
    """
9055
    # Process here the warnings from CheckPrereq, as we don't have a
9056
    # feedback_fn there.
9057
    for warn in self.warn:
9058
      feedback_fn("WARNING: %s" % warn)
9059

    
9060
    result = []
9061
    instance = self.instance
9062
    # disk changes
9063
    for disk_op, disk_dict in self.op.disks:
9064
      if disk_op == constants.DDM_REMOVE:
9065
        # remove the last disk
9066
        device = instance.disks.pop()
9067
        device_idx = len(instance.disks)
9068
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9069
          self.cfg.SetDiskID(disk, node)
9070
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9071
          if msg:
9072
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9073
                            " continuing anyway", device_idx, node, msg)
9074
        result.append(("disk/%d" % device_idx, "remove"))
9075
      elif disk_op == constants.DDM_ADD:
9076
        # add a new disk
9077
        if instance.disk_template == constants.DT_FILE:
9078
          file_driver, file_path = instance.disks[0].logical_id
9079
          file_path = os.path.dirname(file_path)
9080
        else:
9081
          file_driver = file_path = None
9082
        disk_idx_base = len(instance.disks)
9083
        new_disk = _GenerateDiskTemplate(self,
9084
                                         instance.disk_template,
9085
                                         instance.name, instance.primary_node,
9086
                                         instance.secondary_nodes,
9087
                                         [disk_dict],
9088
                                         file_path,
9089
                                         file_driver,
9090
                                         disk_idx_base)[0]
9091
        instance.disks.append(new_disk)
9092
        info = _GetInstanceInfoText(instance)
9093

    
9094
        logging.info("Creating volume %s for instance %s",
9095
                     new_disk.iv_name, instance.name)
9096
        # Note: this needs to be kept in sync with _CreateDisks
9097
        #HARDCODE
9098
        for node in instance.all_nodes:
9099
          f_create = node == instance.primary_node
9100
          try:
9101
            _CreateBlockDev(self, node, instance, new_disk,
9102
                            f_create, info, f_create)
9103
          except errors.OpExecError, err:
9104
            self.LogWarning("Failed to create volume %s (%s) on"
9105
                            " node %s: %s",
9106
                            new_disk.iv_name, new_disk, node, err)
9107
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9108
                       (new_disk.size, new_disk.mode)))
9109
      else:
9110
        # change a given disk
9111
        instance.disks[disk_op].mode = disk_dict['mode']
9112
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9113

    
9114
    if self.op.disk_template:
9115
      r_shut = _ShutdownInstanceDisks(self, instance)
9116
      if not r_shut:
9117
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9118
                                 " proceed with disk template conversion")
9119
      mode = (instance.disk_template, self.op.disk_template)
9120
      try:
9121
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9122
      except:
9123
        self.cfg.ReleaseDRBDMinors(instance.name)
9124
        raise
9125
      result.append(("disk_template", self.op.disk_template))
9126

    
9127
    # NIC changes
9128
    for nic_op, nic_dict in self.op.nics:
9129
      if nic_op == constants.DDM_REMOVE:
9130
        # remove the last nic
9131
        del instance.nics[-1]
9132
        result.append(("nic.%d" % len(instance.nics), "remove"))
9133
      elif nic_op == constants.DDM_ADD:
9134
        # mac and bridge should be set, by now
9135
        mac = nic_dict['mac']
9136
        ip = nic_dict.get('ip', None)
9137
        nicparams = self.nic_pinst[constants.DDM_ADD]
9138
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9139
        instance.nics.append(new_nic)
9140
        result.append(("nic.%d" % (len(instance.nics) - 1),
9141
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9142
                       (new_nic.mac, new_nic.ip,
9143
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9144
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9145
                       )))
9146
      else:
9147
        for key in 'mac', 'ip':
9148
          if key in nic_dict:
9149
            setattr(instance.nics[nic_op], key, nic_dict[key])
9150
        if nic_op in self.nic_pinst:
9151
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9152
        for key, val in nic_dict.iteritems():
9153
          result.append(("nic.%s/%d" % (key, nic_op), val))
9154

    
9155
    # hvparams changes
9156
    if self.op.hvparams:
9157
      instance.hvparams = self.hv_inst
9158
      for key, val in self.op.hvparams.iteritems():
9159
        result.append(("hv/%s" % key, val))
9160

    
9161
    # beparams changes
9162
    if self.op.beparams:
9163
      instance.beparams = self.be_inst
9164
      for key, val in self.op.beparams.iteritems():
9165
        result.append(("be/%s" % key, val))
9166

    
9167
    # OS change
9168
    if self.op.os_name:
9169
      instance.os = self.op.os_name
9170

    
9171
    # osparams changes
9172
    if self.op.osparams:
9173
      instance.osparams = self.os_inst
9174
      for key, val in self.op.osparams.iteritems():
9175
        result.append(("os/%s" % key, val))
9176

    
9177
    self.cfg.Update(instance, feedback_fn)
9178

    
9179
    return result
9180

    
9181
  _DISK_CONVERSIONS = {
9182
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9183
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9184
    }
9185

    
9186

    
9187
class LUQueryExports(NoHooksLU):
9188
  """Query the exports list
9189

9190
  """
9191
  _OP_PARAMS = [
9192
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9193
    ("use_locking", False, _TBool),
9194
    ]
9195
  REQ_BGL = False
9196

    
9197
  def ExpandNames(self):
9198
    self.needed_locks = {}
9199
    self.share_locks[locking.LEVEL_NODE] = 1
9200
    if not self.op.nodes:
9201
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9202
    else:
9203
      self.needed_locks[locking.LEVEL_NODE] = \
9204
        _GetWantedNodes(self, self.op.nodes)
9205

    
9206
  def Exec(self, feedback_fn):
9207
    """Compute the list of all the exported system images.
9208

9209
    @rtype: dict
9210
    @return: a dictionary with the structure node->(export-list)
9211
        where export-list is a list of the instances exported on
9212
        that node.
9213

9214
    """
9215
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9216
    rpcresult = self.rpc.call_export_list(self.nodes)
9217
    result = {}
9218
    for node in rpcresult:
9219
      if rpcresult[node].fail_msg:
9220
        result[node] = False
9221
      else:
9222
        result[node] = rpcresult[node].payload
9223

    
9224
    return result
9225

    
9226

    
9227
class LUPrepareExport(NoHooksLU):
9228
  """Prepares an instance for an export and returns useful information.
9229

9230
  """
9231
  _OP_PARAMS = [
9232
    _PInstanceName,
9233
    ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9234
    ]
9235
  REQ_BGL = False
9236

    
9237
  def ExpandNames(self):
9238
    self._ExpandAndLockInstance()
9239

    
9240
  def CheckPrereq(self):
9241
    """Check prerequisites.
9242

9243
    """
9244
    instance_name = self.op.instance_name
9245

    
9246
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9247
    assert self.instance is not None, \
9248
          "Cannot retrieve locked instance %s" % self.op.instance_name
9249
    _CheckNodeOnline(self, self.instance.primary_node)
9250

    
9251
    self._cds = _GetClusterDomainSecret()
9252

    
9253
  def Exec(self, feedback_fn):
9254
    """Prepares an instance for an export.
9255

9256
    """
9257
    instance = self.instance
9258

    
9259
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9260
      salt = utils.GenerateSecret(8)
9261

    
9262
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9263
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9264
                                              constants.RIE_CERT_VALIDITY)
9265
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9266

    
9267
      (name, cert_pem) = result.payload
9268

    
9269
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9270
                                             cert_pem)
9271

    
9272
      return {
9273
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9274
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9275
                          salt),
9276
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9277
        }
9278

    
9279
    return None
9280

    
9281

    
9282
class LUExportInstance(LogicalUnit):
9283
  """Export an instance to an image in the cluster.
9284

9285
  """
9286
  HPATH = "instance-export"
9287
  HTYPE = constants.HTYPE_INSTANCE
9288
  _OP_PARAMS = [
9289
    _PInstanceName,
9290
    ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9291
    ("shutdown", True, _TBool),
9292
    _PShutdownTimeout,
9293
    ("remove_instance", False, _TBool),
9294
    ("ignore_remove_failures", False, _TBool),
9295
    ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9296
    ("x509_key_name", None, _TOr(_TList, _TNone)),
9297
    ("destination_x509_ca", None, _TMaybeString),
9298
    ]
9299
  REQ_BGL = False
9300

    
9301
  def CheckArguments(self):
9302
    """Check the arguments.
9303

9304
    """
9305
    self.x509_key_name = self.op.x509_key_name
9306
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9307

    
9308
    if self.op.remove_instance and not self.op.shutdown:
9309
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9310
                                 " down before")
9311

    
9312
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9313
      if not self.x509_key_name:
9314
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9315
                                   errors.ECODE_INVAL)
9316

    
9317
      if not self.dest_x509_ca_pem:
9318
        raise errors.OpPrereqError("Missing destination X509 CA",
9319
                                   errors.ECODE_INVAL)
9320

    
9321
  def ExpandNames(self):
9322
    self._ExpandAndLockInstance()
9323

    
9324
    # Lock all nodes for local exports
9325
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9326
      # FIXME: lock only instance primary and destination node
9327
      #
9328
      # Sad but true, for now we have do lock all nodes, as we don't know where
9329
      # the previous export might be, and in this LU we search for it and
9330
      # remove it from its current node. In the future we could fix this by:
9331
      #  - making a tasklet to search (share-lock all), then create the
9332
      #    new one, then one to remove, after
9333
      #  - removing the removal operation altogether
9334
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9335

    
9336
  def DeclareLocks(self, level):
9337
    """Last minute lock declaration."""
9338
    # All nodes are locked anyway, so nothing to do here.
9339

    
9340
  def BuildHooksEnv(self):
9341
    """Build hooks env.
9342

9343
    This will run on the master, primary node and target node.
9344

9345
    """
9346
    env = {
9347
      "EXPORT_MODE": self.op.mode,
9348
      "EXPORT_NODE": self.op.target_node,
9349
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9350
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9351
      # TODO: Generic function for boolean env variables
9352
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9353
      }
9354

    
9355
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9356

    
9357
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9358

    
9359
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9360
      nl.append(self.op.target_node)
9361

    
9362
    return env, nl, nl
9363

    
9364
  def CheckPrereq(self):
9365
    """Check prerequisites.
9366

9367
    This checks that the instance and node names are valid.
9368

9369
    """
9370
    instance_name = self.op.instance_name
9371

    
9372
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9373
    assert self.instance is not None, \
9374
          "Cannot retrieve locked instance %s" % self.op.instance_name
9375
    _CheckNodeOnline(self, self.instance.primary_node)
9376

    
9377
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9378
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9379
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9380
      assert self.dst_node is not None
9381

    
9382
      _CheckNodeOnline(self, self.dst_node.name)
9383
      _CheckNodeNotDrained(self, self.dst_node.name)
9384

    
9385
      self._cds = None
9386
      self.dest_disk_info = None
9387
      self.dest_x509_ca = None
9388

    
9389
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9390
      self.dst_node = None
9391

    
9392
      if len(self.op.target_node) != len(self.instance.disks):
9393
        raise errors.OpPrereqError(("Received destination information for %s"
9394
                                    " disks, but instance %s has %s disks") %
9395
                                   (len(self.op.target_node), instance_name,
9396
                                    len(self.instance.disks)),
9397
                                   errors.ECODE_INVAL)
9398

    
9399
      cds = _GetClusterDomainSecret()
9400

    
9401
      # Check X509 key name
9402
      try:
9403
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9404
      except (TypeError, ValueError), err:
9405
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9406

    
9407
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9408
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9409
                                   errors.ECODE_INVAL)
9410

    
9411
      # Load and verify CA
9412
      try:
9413
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9414
      except OpenSSL.crypto.Error, err:
9415
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9416
                                   (err, ), errors.ECODE_INVAL)
9417

    
9418
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9419
      if errcode is not None:
9420
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9421
                                   (msg, ), errors.ECODE_INVAL)
9422

    
9423
      self.dest_x509_ca = cert
9424

    
9425
      # Verify target information
9426
      disk_info = []
9427
      for idx, disk_data in enumerate(self.op.target_node):
9428
        try:
9429
          (host, port, magic) = \
9430
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9431
        except errors.GenericError, err:
9432
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9433
                                     (idx, err), errors.ECODE_INVAL)
9434

    
9435
        disk_info.append((host, port, magic))
9436

    
9437
      assert len(disk_info) == len(self.op.target_node)
9438
      self.dest_disk_info = disk_info
9439

    
9440
    else:
9441
      raise errors.ProgrammerError("Unhandled export mode %r" %
9442
                                   self.op.mode)
9443

    
9444
    # instance disk type verification
9445
    # TODO: Implement export support for file-based disks
9446
    for disk in self.instance.disks:
9447
      if disk.dev_type == constants.LD_FILE:
9448
        raise errors.OpPrereqError("Export not supported for instances with"
9449
                                   " file-based disks", errors.ECODE_INVAL)
9450

    
9451
  def _CleanupExports(self, feedback_fn):
9452
    """Removes exports of current instance from all other nodes.
9453

9454
    If an instance in a cluster with nodes A..D was exported to node C, its
9455
    exports will be removed from the nodes A, B and D.
9456

9457
    """
9458
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9459

    
9460
    nodelist = self.cfg.GetNodeList()
9461
    nodelist.remove(self.dst_node.name)
9462

    
9463
    # on one-node clusters nodelist will be empty after the removal
9464
    # if we proceed the backup would be removed because OpQueryExports
9465
    # substitutes an empty list with the full cluster node list.
9466
    iname = self.instance.name
9467
    if nodelist:
9468
      feedback_fn("Removing old exports for instance %s" % iname)
9469
      exportlist = self.rpc.call_export_list(nodelist)
9470
      for node in exportlist:
9471
        if exportlist[node].fail_msg:
9472
          continue
9473
        if iname in exportlist[node].payload:
9474
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9475
          if msg:
9476
            self.LogWarning("Could not remove older export for instance %s"
9477
                            " on node %s: %s", iname, node, msg)
9478

    
9479
  def Exec(self, feedback_fn):
9480
    """Export an instance to an image in the cluster.
9481

9482
    """
9483
    assert self.op.mode in constants.EXPORT_MODES
9484

    
9485
    instance = self.instance
9486
    src_node = instance.primary_node
9487

    
9488
    if self.op.shutdown:
9489
      # shutdown the instance, but not the disks
9490
      feedback_fn("Shutting down instance %s" % instance.name)
9491
      result = self.rpc.call_instance_shutdown(src_node, instance,
9492
                                               self.op.shutdown_timeout)
9493
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9494
      result.Raise("Could not shutdown instance %s on"
9495
                   " node %s" % (instance.name, src_node))
9496

    
9497
    # set the disks ID correctly since call_instance_start needs the
9498
    # correct drbd minor to create the symlinks
9499
    for disk in instance.disks:
9500
      self.cfg.SetDiskID(disk, src_node)
9501

    
9502
    activate_disks = (not instance.admin_up)
9503

    
9504
    if activate_disks:
9505
      # Activate the instance disks if we'exporting a stopped instance
9506
      feedback_fn("Activating disks for %s" % instance.name)
9507
      _StartInstanceDisks(self, instance, None)
9508

    
9509
    try:
9510
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9511
                                                     instance)
9512

    
9513
      helper.CreateSnapshots()
9514
      try:
9515
        if (self.op.shutdown and instance.admin_up and
9516
            not self.op.remove_instance):
9517
          assert not activate_disks
9518
          feedback_fn("Starting instance %s" % instance.name)
9519
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9520
          msg = result.fail_msg
9521
          if msg:
9522
            feedback_fn("Failed to start instance: %s" % msg)
9523
            _ShutdownInstanceDisks(self, instance)
9524
            raise errors.OpExecError("Could not start instance: %s" % msg)
9525

    
9526
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9527
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9528
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9529
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9530
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9531

    
9532
          (key_name, _, _) = self.x509_key_name
9533

    
9534
          dest_ca_pem = \
9535
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9536
                                            self.dest_x509_ca)
9537

    
9538
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9539
                                                     key_name, dest_ca_pem,
9540
                                                     timeouts)
9541
      finally:
9542
        helper.Cleanup()
9543

    
9544
      # Check for backwards compatibility
9545
      assert len(dresults) == len(instance.disks)
9546
      assert compat.all(isinstance(i, bool) for i in dresults), \
9547
             "Not all results are boolean: %r" % dresults
9548

    
9549
    finally:
9550
      if activate_disks:
9551
        feedback_fn("Deactivating disks for %s" % instance.name)
9552
        _ShutdownInstanceDisks(self, instance)
9553

    
9554
    if not (compat.all(dresults) and fin_resu):
9555
      failures = []
9556
      if not fin_resu:
9557
        failures.append("export finalization")
9558
      if not compat.all(dresults):
9559
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9560
                               if not dsk)
9561
        failures.append("disk export: disk(s) %s" % fdsk)
9562

    
9563
      raise errors.OpExecError("Export failed, errors in %s" %
9564
                               utils.CommaJoin(failures))
9565

    
9566
    # At this point, the export was successful, we can cleanup/finish
9567

    
9568
    # Remove instance if requested
9569
    if self.op.remove_instance:
9570
      feedback_fn("Removing instance %s" % instance.name)
9571
      _RemoveInstance(self, feedback_fn, instance,
9572
                      self.op.ignore_remove_failures)
9573

    
9574
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9575
      self._CleanupExports(feedback_fn)
9576

    
9577
    return fin_resu, dresults
9578

    
9579

    
9580
class LURemoveExport(NoHooksLU):
9581
  """Remove exports related to the named instance.
9582

9583
  """
9584
  _OP_PARAMS = [
9585
    _PInstanceName,
9586
    ]
9587
  REQ_BGL = False
9588

    
9589
  def ExpandNames(self):
9590
    self.needed_locks = {}
9591
    # We need all nodes to be locked in order for RemoveExport to work, but we
9592
    # don't need to lock the instance itself, as nothing will happen to it (and
9593
    # we can remove exports also for a removed instance)
9594
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9595

    
9596
  def Exec(self, feedback_fn):
9597
    """Remove any export.
9598

9599
    """
9600
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9601
    # If the instance was not found we'll try with the name that was passed in.
9602
    # This will only work if it was an FQDN, though.
9603
    fqdn_warn = False
9604
    if not instance_name:
9605
      fqdn_warn = True
9606
      instance_name = self.op.instance_name
9607

    
9608
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9609
    exportlist = self.rpc.call_export_list(locked_nodes)
9610
    found = False
9611
    for node in exportlist:
9612
      msg = exportlist[node].fail_msg
9613
      if msg:
9614
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9615
        continue
9616
      if instance_name in exportlist[node].payload:
9617
        found = True
9618
        result = self.rpc.call_export_remove(node, instance_name)
9619
        msg = result.fail_msg
9620
        if msg:
9621
          logging.error("Could not remove export for instance %s"
9622
                        " on node %s: %s", instance_name, node, msg)
9623

    
9624
    if fqdn_warn and not found:
9625
      feedback_fn("Export not found. If trying to remove an export belonging"
9626
                  " to a deleted instance please use its Fully Qualified"
9627
                  " Domain Name.")
9628

    
9629

    
9630
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9631
  """Generic tags LU.
9632

9633
  This is an abstract class which is the parent of all the other tags LUs.
9634

9635
  """
9636

    
9637
  def ExpandNames(self):
9638
    self.needed_locks = {}
9639
    if self.op.kind == constants.TAG_NODE:
9640
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9641
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9642
    elif self.op.kind == constants.TAG_INSTANCE:
9643
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9644
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9645

    
9646
  def CheckPrereq(self):
9647
    """Check prerequisites.
9648

9649
    """
9650
    if self.op.kind == constants.TAG_CLUSTER:
9651
      self.target = self.cfg.GetClusterInfo()
9652
    elif self.op.kind == constants.TAG_NODE:
9653
      self.target = self.cfg.GetNodeInfo(self.op.name)
9654
    elif self.op.kind == constants.TAG_INSTANCE:
9655
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9656
    else:
9657
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9658
                                 str(self.op.kind), errors.ECODE_INVAL)
9659

    
9660

    
9661
class LUGetTags(TagsLU):
9662
  """Returns the tags of a given object.
9663

9664
  """
9665
  _OP_PARAMS = [
9666
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9667
    ("name", _NoDefault, _TNonEmptyString),
9668
    ]
9669
  REQ_BGL = False
9670

    
9671
  def Exec(self, feedback_fn):
9672
    """Returns the tag list.
9673

9674
    """
9675
    return list(self.target.GetTags())
9676

    
9677

    
9678
class LUSearchTags(NoHooksLU):
9679
  """Searches the tags for a given pattern.
9680

9681
  """
9682
  _OP_PARAMS = [
9683
    ("pattern", _NoDefault, _TNonEmptyString),
9684
    ]
9685
  REQ_BGL = False
9686

    
9687
  def ExpandNames(self):
9688
    self.needed_locks = {}
9689

    
9690
  def CheckPrereq(self):
9691
    """Check prerequisites.
9692

9693
    This checks the pattern passed for validity by compiling it.
9694

9695
    """
9696
    try:
9697
      self.re = re.compile(self.op.pattern)
9698
    except re.error, err:
9699
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9700
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9701

    
9702
  def Exec(self, feedback_fn):
9703
    """Returns the tag list.
9704

9705
    """
9706
    cfg = self.cfg
9707
    tgts = [("/cluster", cfg.GetClusterInfo())]
9708
    ilist = cfg.GetAllInstancesInfo().values()
9709
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9710
    nlist = cfg.GetAllNodesInfo().values()
9711
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9712
    results = []
9713
    for path, target in tgts:
9714
      for tag in target.GetTags():
9715
        if self.re.search(tag):
9716
          results.append((path, tag))
9717
    return results
9718

    
9719

    
9720
class LUAddTags(TagsLU):
9721
  """Sets a tag on a given object.
9722

9723
  """
9724
  _OP_PARAMS = [
9725
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9726
    ("name", _NoDefault, _TNonEmptyString),
9727
    ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9728
    ]
9729
  REQ_BGL = False
9730

    
9731
  def CheckPrereq(self):
9732
    """Check prerequisites.
9733

9734
    This checks the type and length of the tag name and value.
9735

9736
    """
9737
    TagsLU.CheckPrereq(self)
9738
    for tag in self.op.tags:
9739
      objects.TaggableObject.ValidateTag(tag)
9740

    
9741
  def Exec(self, feedback_fn):
9742
    """Sets the tag.
9743

9744
    """
9745
    try:
9746
      for tag in self.op.tags:
9747
        self.target.AddTag(tag)
9748
    except errors.TagError, err:
9749
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9750
    self.cfg.Update(self.target, feedback_fn)
9751

    
9752

    
9753
class LUDelTags(TagsLU):
9754
  """Delete a list of tags from a given object.
9755

9756
  """
9757
  _OP_PARAMS = [
9758
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9759
    ("name", _NoDefault, _TNonEmptyString),
9760
    ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9761
    ]
9762
  REQ_BGL = False
9763

    
9764
  def CheckPrereq(self):
9765
    """Check prerequisites.
9766

9767
    This checks that we have the given tag.
9768

9769
    """
9770
    TagsLU.CheckPrereq(self)
9771
    for tag in self.op.tags:
9772
      objects.TaggableObject.ValidateTag(tag)
9773
    del_tags = frozenset(self.op.tags)
9774
    cur_tags = self.target.GetTags()
9775
    if not del_tags <= cur_tags:
9776
      diff_tags = del_tags - cur_tags
9777
      diff_names = ["'%s'" % tag for tag in diff_tags]
9778
      diff_names.sort()
9779
      raise errors.OpPrereqError("Tag(s) %s not found" %
9780
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9781

    
9782
  def Exec(self, feedback_fn):
9783
    """Remove the tag from the object.
9784

9785
    """
9786
    for tag in self.op.tags:
9787
      self.target.RemoveTag(tag)
9788
    self.cfg.Update(self.target, feedback_fn)
9789

    
9790

    
9791
class LUTestDelay(NoHooksLU):
9792
  """Sleep for a specified amount of time.
9793

9794
  This LU sleeps on the master and/or nodes for a specified amount of
9795
  time.
9796

9797
  """
9798
  _OP_PARAMS = [
9799
    ("duration", _NoDefault, _TFloat),
9800
    ("on_master", True, _TBool),
9801
    ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9802
    ("repeat", 0, _TPositiveInt)
9803
    ]
9804
  REQ_BGL = False
9805

    
9806
  def ExpandNames(self):
9807
    """Expand names and set required locks.
9808

9809
    This expands the node list, if any.
9810

9811
    """
9812
    self.needed_locks = {}
9813
    if self.op.on_nodes:
9814
      # _GetWantedNodes can be used here, but is not always appropriate to use
9815
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9816
      # more information.
9817
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9818
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9819

    
9820
  def _TestDelay(self):
9821
    """Do the actual sleep.
9822

9823
    """
9824
    if self.op.on_master:
9825
      if not utils.TestDelay(self.op.duration):
9826
        raise errors.OpExecError("Error during master delay test")
9827
    if self.op.on_nodes:
9828
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9829
      for node, node_result in result.items():
9830
        node_result.Raise("Failure during rpc call to node %s" % node)
9831

    
9832
  def Exec(self, feedback_fn):
9833
    """Execute the test delay opcode, with the wanted repetitions.
9834

9835
    """
9836
    if self.op.repeat == 0:
9837
      self._TestDelay()
9838
    else:
9839
      top_value = self.op.repeat - 1
9840
      for i in range(self.op.repeat):
9841
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9842
        self._TestDelay()
9843

    
9844

    
9845
class IAllocator(object):
9846
  """IAllocator framework.
9847

9848
  An IAllocator instance has three sets of attributes:
9849
    - cfg that is needed to query the cluster
9850
    - input data (all members of the _KEYS class attribute are required)
9851
    - four buffer attributes (in|out_data|text), that represent the
9852
      input (to the external script) in text and data structure format,
9853
      and the output from it, again in two formats
9854
    - the result variables from the script (success, info, nodes) for
9855
      easy usage
9856

9857
  """
9858
  # pylint: disable-msg=R0902
9859
  # lots of instance attributes
9860
  _ALLO_KEYS = [
9861
    "name", "mem_size", "disks", "disk_template",
9862
    "os", "tags", "nics", "vcpus", "hypervisor",
9863
    ]
9864
  _RELO_KEYS = [
9865
    "name", "relocate_from",
9866
    ]
9867
  _EVAC_KEYS = [
9868
    "evac_nodes",
9869
    ]
9870

    
9871
  def __init__(self, cfg, rpc, mode, **kwargs):
9872
    self.cfg = cfg
9873
    self.rpc = rpc
9874
    # init buffer variables
9875
    self.in_text = self.out_text = self.in_data = self.out_data = None
9876
    # init all input fields so that pylint is happy
9877
    self.mode = mode
9878
    self.mem_size = self.disks = self.disk_template = None
9879
    self.os = self.tags = self.nics = self.vcpus = None
9880
    self.hypervisor = None
9881
    self.relocate_from = None
9882
    self.name = None
9883
    self.evac_nodes = None
9884
    # computed fields
9885
    self.required_nodes = None
9886
    # init result fields
9887
    self.success = self.info = self.result = None
9888
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9889
      keyset = self._ALLO_KEYS
9890
      fn = self._AddNewInstance
9891
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9892
      keyset = self._RELO_KEYS
9893
      fn = self._AddRelocateInstance
9894
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9895
      keyset = self._EVAC_KEYS
9896
      fn = self._AddEvacuateNodes
9897
    else:
9898
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9899
                                   " IAllocator" % self.mode)
9900
    for key in kwargs:
9901
      if key not in keyset:
9902
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9903
                                     " IAllocator" % key)
9904
      setattr(self, key, kwargs[key])
9905

    
9906
    for key in keyset:
9907
      if key not in kwargs:
9908
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9909
                                     " IAllocator" % key)
9910
    self._BuildInputData(fn)
9911

    
9912
  def _ComputeClusterData(self):
9913
    """Compute the generic allocator input data.
9914

9915
    This is the data that is independent of the actual operation.
9916

9917
    """
9918
    cfg = self.cfg
9919
    cluster_info = cfg.GetClusterInfo()
9920
    # cluster data
9921
    data = {
9922
      "version": constants.IALLOCATOR_VERSION,
9923
      "cluster_name": cfg.GetClusterName(),
9924
      "cluster_tags": list(cluster_info.GetTags()),
9925
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9926
      # we don't have job IDs
9927
      }
9928
    iinfo = cfg.GetAllInstancesInfo().values()
9929
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9930

    
9931
    # node data
9932
    node_results = {}
9933
    node_list = cfg.GetNodeList()
9934

    
9935
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9936
      hypervisor_name = self.hypervisor
9937
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9938
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9939
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9940
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9941

    
9942
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9943
                                        hypervisor_name)
9944
    node_iinfo = \
9945
      self.rpc.call_all_instances_info(node_list,
9946
                                       cluster_info.enabled_hypervisors)
9947
    for nname, nresult in node_data.items():
9948
      # first fill in static (config-based) values
9949
      ninfo = cfg.GetNodeInfo(nname)
9950
      pnr = {
9951
        "tags": list(ninfo.GetTags()),
9952
        "primary_ip": ninfo.primary_ip,
9953
        "secondary_ip": ninfo.secondary_ip,
9954
        "offline": ninfo.offline,
9955
        "drained": ninfo.drained,
9956
        "master_candidate": ninfo.master_candidate,
9957
        }
9958

    
9959
      if not (ninfo.offline or ninfo.drained):
9960
        nresult.Raise("Can't get data for node %s" % nname)
9961
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9962
                                nname)
9963
        remote_info = nresult.payload
9964

    
9965
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9966
                     'vg_size', 'vg_free', 'cpu_total']:
9967
          if attr not in remote_info:
9968
            raise errors.OpExecError("Node '%s' didn't return attribute"
9969
                                     " '%s'" % (nname, attr))
9970
          if not isinstance(remote_info[attr], int):
9971
            raise errors.OpExecError("Node '%s' returned invalid value"
9972
                                     " for '%s': %s" %
9973
                                     (nname, attr, remote_info[attr]))
9974
        # compute memory used by primary instances
9975
        i_p_mem = i_p_up_mem = 0
9976
        for iinfo, beinfo in i_list:
9977
          if iinfo.primary_node == nname:
9978
            i_p_mem += beinfo[constants.BE_MEMORY]
9979
            if iinfo.name not in node_iinfo[nname].payload:
9980
              i_used_mem = 0
9981
            else:
9982
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9983
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9984
            remote_info['memory_free'] -= max(0, i_mem_diff)
9985

    
9986
            if iinfo.admin_up:
9987
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9988

    
9989
        # compute memory used by instances
9990
        pnr_dyn = {
9991
          "total_memory": remote_info['memory_total'],
9992
          "reserved_memory": remote_info['memory_dom0'],
9993
          "free_memory": remote_info['memory_free'],
9994
          "total_disk": remote_info['vg_size'],
9995
          "free_disk": remote_info['vg_free'],
9996
          "total_cpus": remote_info['cpu_total'],
9997
          "i_pri_memory": i_p_mem,
9998
          "i_pri_up_memory": i_p_up_mem,
9999
          }
10000
        pnr.update(pnr_dyn)
10001

    
10002
      node_results[nname] = pnr
10003
    data["nodes"] = node_results
10004

    
10005
    # instance data
10006
    instance_data = {}
10007
    for iinfo, beinfo in i_list:
10008
      nic_data = []
10009
      for nic in iinfo.nics:
10010
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10011
        nic_dict = {"mac": nic.mac,
10012
                    "ip": nic.ip,
10013
                    "mode": filled_params[constants.NIC_MODE],
10014
                    "link": filled_params[constants.NIC_LINK],
10015
                   }
10016
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10017
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10018
        nic_data.append(nic_dict)
10019
      pir = {
10020
        "tags": list(iinfo.GetTags()),
10021
        "admin_up": iinfo.admin_up,
10022
        "vcpus": beinfo[constants.BE_VCPUS],
10023
        "memory": beinfo[constants.BE_MEMORY],
10024
        "os": iinfo.os,
10025
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10026
        "nics": nic_data,
10027
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10028
        "disk_template": iinfo.disk_template,
10029
        "hypervisor": iinfo.hypervisor,
10030
        }
10031
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10032
                                                 pir["disks"])
10033
      instance_data[iinfo.name] = pir
10034

    
10035
    data["instances"] = instance_data
10036

    
10037
    self.in_data = data
10038

    
10039
  def _AddNewInstance(self):
10040
    """Add new instance data to allocator structure.
10041

10042
    This in combination with _AllocatorGetClusterData will create the
10043
    correct structure needed as input for the allocator.
10044

10045
    The checks for the completeness of the opcode must have already been
10046
    done.
10047

10048
    """
10049
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10050

    
10051
    if self.disk_template in constants.DTS_NET_MIRROR:
10052
      self.required_nodes = 2
10053
    else:
10054
      self.required_nodes = 1
10055
    request = {
10056
      "name": self.name,
10057
      "disk_template": self.disk_template,
10058
      "tags": self.tags,
10059
      "os": self.os,
10060
      "vcpus": self.vcpus,
10061
      "memory": self.mem_size,
10062
      "disks": self.disks,
10063
      "disk_space_total": disk_space,
10064
      "nics": self.nics,
10065
      "required_nodes": self.required_nodes,
10066
      }
10067
    return request
10068

    
10069
  def _AddRelocateInstance(self):
10070
    """Add relocate instance data to allocator structure.
10071

10072
    This in combination with _IAllocatorGetClusterData will create the
10073
    correct structure needed as input for the allocator.
10074

10075
    The checks for the completeness of the opcode must have already been
10076
    done.
10077

10078
    """
10079
    instance = self.cfg.GetInstanceInfo(self.name)
10080
    if instance is None:
10081
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
10082
                                   " IAllocator" % self.name)
10083

    
10084
    if instance.disk_template not in constants.DTS_NET_MIRROR:
10085
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10086
                                 errors.ECODE_INVAL)
10087

    
10088
    if len(instance.secondary_nodes) != 1:
10089
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
10090
                                 errors.ECODE_STATE)
10091

    
10092
    self.required_nodes = 1
10093
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
10094
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10095

    
10096
    request = {
10097
      "name": self.name,
10098
      "disk_space_total": disk_space,
10099
      "required_nodes": self.required_nodes,
10100
      "relocate_from": self.relocate_from,
10101
      }
10102
    return request
10103

    
10104
  def _AddEvacuateNodes(self):
10105
    """Add evacuate nodes data to allocator structure.
10106

10107
    """
10108
    request = {
10109
      "evac_nodes": self.evac_nodes
10110
      }
10111
    return request
10112

    
10113
  def _BuildInputData(self, fn):
10114
    """Build input data structures.
10115

10116
    """
10117
    self._ComputeClusterData()
10118

    
10119
    request = fn()
10120
    request["type"] = self.mode
10121
    self.in_data["request"] = request
10122

    
10123
    self.in_text = serializer.Dump(self.in_data)
10124

    
10125
  def Run(self, name, validate=True, call_fn=None):
10126
    """Run an instance allocator and return the results.
10127

10128
    """
10129
    if call_fn is None:
10130
      call_fn = self.rpc.call_iallocator_runner
10131

    
10132
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10133
    result.Raise("Failure while running the iallocator script")
10134

    
10135
    self.out_text = result.payload
10136
    if validate:
10137
      self._ValidateResult()
10138

    
10139
  def _ValidateResult(self):
10140
    """Process the allocator results.
10141

10142
    This will process and if successful save the result in
10143
    self.out_data and the other parameters.
10144

10145
    """
10146
    try:
10147
      rdict = serializer.Load(self.out_text)
10148
    except Exception, err:
10149
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10150

    
10151
    if not isinstance(rdict, dict):
10152
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10153

    
10154
    # TODO: remove backwards compatiblity in later versions
10155
    if "nodes" in rdict and "result" not in rdict:
10156
      rdict["result"] = rdict["nodes"]
10157
      del rdict["nodes"]
10158

    
10159
    for key in "success", "info", "result":
10160
      if key not in rdict:
10161
        raise errors.OpExecError("Can't parse iallocator results:"
10162
                                 " missing key '%s'" % key)
10163
      setattr(self, key, rdict[key])
10164

    
10165
    if not isinstance(rdict["result"], list):
10166
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10167
                               " is not a list")
10168
    self.out_data = rdict
10169

    
10170

    
10171
class LUTestAllocator(NoHooksLU):
10172
  """Run allocator tests.
10173

10174
  This LU runs the allocator tests
10175

10176
  """
10177
  _OP_PARAMS = [
10178
    ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10179
    ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10180
    ("name", _NoDefault, _TNonEmptyString),
10181
    ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10182
      _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10183
               _TOr(_TNone, _TNonEmptyString))))),
10184
    ("disks", _NoDefault, _TOr(_TNone, _TList)),
10185
    ("hypervisor", None, _TMaybeString),
10186
    ("allocator", None, _TMaybeString),
10187
    ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10188
    ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10189
    ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10190
    ("os", None, _TMaybeString),
10191
    ("disk_template", None, _TMaybeString),
10192
    ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10193
    ]
10194

    
10195
  def CheckPrereq(self):
10196
    """Check prerequisites.
10197

10198
    This checks the opcode parameters depending on the director and mode test.
10199

10200
    """
10201
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10202
      for attr in ["mem_size", "disks", "disk_template",
10203
                   "os", "tags", "nics", "vcpus"]:
10204
        if not hasattr(self.op, attr):
10205
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10206
                                     attr, errors.ECODE_INVAL)
10207
      iname = self.cfg.ExpandInstanceName(self.op.name)
10208
      if iname is not None:
10209
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10210
                                   iname, errors.ECODE_EXISTS)
10211
      if not isinstance(self.op.nics, list):
10212
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10213
                                   errors.ECODE_INVAL)
10214
      if not isinstance(self.op.disks, list):
10215
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10216
                                   errors.ECODE_INVAL)
10217
      for row in self.op.disks:
10218
        if (not isinstance(row, dict) or
10219
            "size" not in row or
10220
            not isinstance(row["size"], int) or
10221
            "mode" not in row or
10222
            row["mode"] not in ['r', 'w']):
10223
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10224
                                     " parameter", errors.ECODE_INVAL)
10225
      if self.op.hypervisor is None:
10226
        self.op.hypervisor = self.cfg.GetHypervisorType()
10227
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10228
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10229
      self.op.name = fname
10230
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10231
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10232
      if not hasattr(self.op, "evac_nodes"):
10233
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10234
                                   " opcode input", errors.ECODE_INVAL)
10235
    else:
10236
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10237
                                 self.op.mode, errors.ECODE_INVAL)
10238

    
10239
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10240
      if self.op.allocator is None:
10241
        raise errors.OpPrereqError("Missing allocator name",
10242
                                   errors.ECODE_INVAL)
10243
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10244
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10245
                                 self.op.direction, errors.ECODE_INVAL)
10246

    
10247
  def Exec(self, feedback_fn):
10248
    """Run the allocator test.
10249

10250
    """
10251
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10252
      ial = IAllocator(self.cfg, self.rpc,
10253
                       mode=self.op.mode,
10254
                       name=self.op.name,
10255
                       mem_size=self.op.mem_size,
10256
                       disks=self.op.disks,
10257
                       disk_template=self.op.disk_template,
10258
                       os=self.op.os,
10259
                       tags=self.op.tags,
10260
                       nics=self.op.nics,
10261
                       vcpus=self.op.vcpus,
10262
                       hypervisor=self.op.hypervisor,
10263
                       )
10264
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10265
      ial = IAllocator(self.cfg, self.rpc,
10266
                       mode=self.op.mode,
10267
                       name=self.op.name,
10268
                       relocate_from=list(self.relocate_from),
10269
                       )
10270
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10271
      ial = IAllocator(self.cfg, self.rpc,
10272
                       mode=self.op.mode,
10273
                       evac_nodes=self.op.evac_nodes)
10274
    else:
10275
      raise errors.ProgrammerError("Uncatched mode %s in"
10276
                                   " LUTestAllocator.Exec", self.op.mode)
10277

    
10278
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10279
      result = ial.in_text
10280
    else:
10281
      ial.Run(self.op.allocator, validate=False)
10282
      result = ial.out_text
10283
    return result