Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 9dd6889b

History | View | Annotate | Download (358.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39

    
40
from ganeti import ssh
41
from ganeti import utils
42
from ganeti import errors
43
from ganeti import hypervisor
44
from ganeti import locking
45
from ganeti import constants
46
from ganeti import objects
47
from ganeti import serializer
48
from ganeti import ssconf
49
from ganeti import uidpool
50
from ganeti import compat
51
from ganeti import masterd
52
from ganeti import netutils
53

    
54
import ganeti.masterd.instance # pylint: disable-msg=W0611
55

    
56

    
57
# Modifiable default values; need to define these here before the
58
# actual LUs
59

    
60
def _EmptyList():
61
  """Returns an empty list.
62

63
  """
64
  return []
65

    
66

    
67
def _EmptyDict():
68
  """Returns an empty dict.
69

70
  """
71
  return {}
72

    
73

    
74
#: The without-default default value
75
_NoDefault = object()
76

    
77

    
78
#: The no-type (value to complex to check it in the type system)
79
_NoType = object()
80

    
81

    
82
# Some basic types
83
def _TNotNone(val):
84
  """Checks if the given value is not None.
85

86
  """
87
  return val is not None
88

    
89

    
90
def _TNone(val):
91
  """Checks if the given value is None.
92

93
  """
94
  return val is None
95

    
96

    
97
def _TBool(val):
98
  """Checks if the given value is a boolean.
99

100
  """
101
  return isinstance(val, bool)
102

    
103

    
104
def _TInt(val):
105
  """Checks if the given value is an integer.
106

107
  """
108
  return isinstance(val, int)
109

    
110

    
111
def _TFloat(val):
112
  """Checks if the given value is a float.
113

114
  """
115
  return isinstance(val, float)
116

    
117

    
118
def _TString(val):
119
  """Checks if the given value is a string.
120

121
  """
122
  return isinstance(val, basestring)
123

    
124

    
125
def _TTrue(val):
126
  """Checks if a given value evaluates to a boolean True value.
127

128
  """
129
  return bool(val)
130

    
131

    
132
def _TElemOf(target_list):
133
  """Builds a function that checks if a given value is a member of a list.
134

135
  """
136
  return lambda val: val in target_list
137

    
138

    
139
# Container types
140
def _TList(val):
141
  """Checks if the given value is a list.
142

143
  """
144
  return isinstance(val, list)
145

    
146

    
147
def _TDict(val):
148
  """Checks if the given value is a dictionary.
149

150
  """
151
  return isinstance(val, dict)
152

    
153

    
154
# Combinator types
155
def _TAnd(*args):
156
  """Combine multiple functions using an AND operation.
157

158
  """
159
  def fn(val):
160
    return compat.all(t(val) for t in args)
161
  return fn
162

    
163

    
164
def _TOr(*args):
165
  """Combine multiple functions using an AND operation.
166

167
  """
168
  def fn(val):
169
    return compat.any(t(val) for t in args)
170
  return fn
171

    
172

    
173
# Type aliases
174

    
175
#: a non-empty string
176
_TNonEmptyString = _TAnd(_TString, _TTrue)
177

    
178

    
179
#: a maybe non-empty string
180
_TMaybeString = _TOr(_TNonEmptyString, _TNone)
181

    
182

    
183
#: a maybe boolean (bool or none)
184
_TMaybeBool = _TOr(_TBool, _TNone)
185

    
186

    
187
#: a positive integer
188
_TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
189

    
190
#: a strictly positive integer
191
_TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
192

    
193

    
194
def _TListOf(my_type):
195
  """Checks if a given value is a list with all elements of the same type.
196

197
  """
198
  return _TAnd(_TList,
199
               lambda lst: compat.all(my_type(v) for v in lst))
200

    
201

    
202
def _TDictOf(key_type, val_type):
203
  """Checks a dict type for the type of its key/values.
204

205
  """
206
  return _TAnd(_TDict,
207
               lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
208
                                and compat.all(val_type(v)
209
                                               for v in my_dict.values())))
210

    
211

    
212
# Common opcode attributes
213

    
214
#: output fields for a query operation
215
_POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
216

    
217

    
218
#: the shutdown timeout
219
_PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
220
                     _TPositiveInt)
221

    
222
#: the force parameter
223
_PForce = ("force", False, _TBool)
224

    
225
#: a required instance name (for single-instance LUs)
226
_PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
227

    
228

    
229
#: a required node name (for single-node LUs)
230
_PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
231

    
232

    
233
# End types
234
class LogicalUnit(object):
235
  """Logical Unit base class.
236

237
  Subclasses must follow these rules:
238
    - implement ExpandNames
239
    - implement CheckPrereq (except when tasklets are used)
240
    - implement Exec (except when tasklets are used)
241
    - implement BuildHooksEnv
242
    - redefine HPATH and HTYPE
243
    - optionally redefine their run requirements:
244
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
245

246
  Note that all commands require root permissions.
247

248
  @ivar dry_run_result: the value (if any) that will be returned to the caller
249
      in dry-run mode (signalled by opcode dry_run parameter)
250
  @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
251
      they should get if not already defined, and types they must match
252

253
  """
254
  HPATH = None
255
  HTYPE = None
256
  _OP_PARAMS = []
257
  REQ_BGL = True
258

    
259
  def __init__(self, processor, op, context, rpc):
260
    """Constructor for LogicalUnit.
261

262
    This needs to be overridden in derived classes in order to check op
263
    validity.
264

265
    """
266
    self.proc = processor
267
    self.op = op
268
    self.cfg = context.cfg
269
    self.context = context
270
    self.rpc = rpc
271
    # Dicts used to declare locking needs to mcpu
272
    self.needed_locks = None
273
    self.acquired_locks = {}
274
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
275
    self.add_locks = {}
276
    self.remove_locks = {}
277
    # Used to force good behavior when calling helper functions
278
    self.recalculate_locks = {}
279
    self.__ssh = None
280
    # logging
281
    self.Log = processor.Log # pylint: disable-msg=C0103
282
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
283
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
284
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
285
    # support for dry-run
286
    self.dry_run_result = None
287
    # support for generic debug attribute
288
    if (not hasattr(self.op, "debug_level") or
289
        not isinstance(self.op.debug_level, int)):
290
      self.op.debug_level = 0
291

    
292
    # Tasklets
293
    self.tasklets = None
294

    
295
    # The new kind-of-type-system
296
    op_id = self.op.OP_ID
297
    for attr_name, aval, test in self._OP_PARAMS:
298
      if not hasattr(op, attr_name):
299
        if aval == _NoDefault:
300
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
301
                                     (op_id, attr_name), errors.ECODE_INVAL)
302
        else:
303
          if callable(aval):
304
            dval = aval()
305
          else:
306
            dval = aval
307
          setattr(self.op, attr_name, dval)
308
      attr_val = getattr(op, attr_name)
309
      if test == _NoType:
310
        # no tests here
311
        continue
312
      if not callable(test):
313
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
314
                                     " given type is not a proper type (%s)" %
315
                                     (op_id, attr_name, test))
316
      if not test(attr_val):
317
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
318
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
319
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
320
                                   (op_id, attr_name), errors.ECODE_INVAL)
321

    
322
    self.CheckArguments()
323

    
324
  def __GetSSH(self):
325
    """Returns the SshRunner object
326

327
    """
328
    if not self.__ssh:
329
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
330
    return self.__ssh
331

    
332
  ssh = property(fget=__GetSSH)
333

    
334
  def CheckArguments(self):
335
    """Check syntactic validity for the opcode arguments.
336

337
    This method is for doing a simple syntactic check and ensure
338
    validity of opcode parameters, without any cluster-related
339
    checks. While the same can be accomplished in ExpandNames and/or
340
    CheckPrereq, doing these separate is better because:
341

342
      - ExpandNames is left as as purely a lock-related function
343
      - CheckPrereq is run after we have acquired locks (and possible
344
        waited for them)
345

346
    The function is allowed to change the self.op attribute so that
347
    later methods can no longer worry about missing parameters.
348

349
    """
350
    pass
351

    
352
  def ExpandNames(self):
353
    """Expand names for this LU.
354

355
    This method is called before starting to execute the opcode, and it should
356
    update all the parameters of the opcode to their canonical form (e.g. a
357
    short node name must be fully expanded after this method has successfully
358
    completed). This way locking, hooks, logging, ecc. can work correctly.
359

360
    LUs which implement this method must also populate the self.needed_locks
361
    member, as a dict with lock levels as keys, and a list of needed lock names
362
    as values. Rules:
363

364
      - use an empty dict if you don't need any lock
365
      - if you don't need any lock at a particular level omit that level
366
      - don't put anything for the BGL level
367
      - if you want all locks at a level use locking.ALL_SET as a value
368

369
    If you need to share locks (rather than acquire them exclusively) at one
370
    level you can modify self.share_locks, setting a true value (usually 1) for
371
    that level. By default locks are not shared.
372

373
    This function can also define a list of tasklets, which then will be
374
    executed in order instead of the usual LU-level CheckPrereq and Exec
375
    functions, if those are not defined by the LU.
376

377
    Examples::
378

379
      # Acquire all nodes and one instance
380
      self.needed_locks = {
381
        locking.LEVEL_NODE: locking.ALL_SET,
382
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
383
      }
384
      # Acquire just two nodes
385
      self.needed_locks = {
386
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
387
      }
388
      # Acquire no locks
389
      self.needed_locks = {} # No, you can't leave it to the default value None
390

391
    """
392
    # The implementation of this method is mandatory only if the new LU is
393
    # concurrent, so that old LUs don't need to be changed all at the same
394
    # time.
395
    if self.REQ_BGL:
396
      self.needed_locks = {} # Exclusive LUs don't need locks.
397
    else:
398
      raise NotImplementedError
399

    
400
  def DeclareLocks(self, level):
401
    """Declare LU locking needs for a level
402

403
    While most LUs can just declare their locking needs at ExpandNames time,
404
    sometimes there's the need to calculate some locks after having acquired
405
    the ones before. This function is called just before acquiring locks at a
406
    particular level, but after acquiring the ones at lower levels, and permits
407
    such calculations. It can be used to modify self.needed_locks, and by
408
    default it does nothing.
409

410
    This function is only called if you have something already set in
411
    self.needed_locks for the level.
412

413
    @param level: Locking level which is going to be locked
414
    @type level: member of ganeti.locking.LEVELS
415

416
    """
417

    
418
  def CheckPrereq(self):
419
    """Check prerequisites for this LU.
420

421
    This method should check that the prerequisites for the execution
422
    of this LU are fulfilled. It can do internode communication, but
423
    it should be idempotent - no cluster or system changes are
424
    allowed.
425

426
    The method should raise errors.OpPrereqError in case something is
427
    not fulfilled. Its return value is ignored.
428

429
    This method should also update all the parameters of the opcode to
430
    their canonical form if it hasn't been done by ExpandNames before.
431

432
    """
433
    if self.tasklets is not None:
434
      for (idx, tl) in enumerate(self.tasklets):
435
        logging.debug("Checking prerequisites for tasklet %s/%s",
436
                      idx + 1, len(self.tasklets))
437
        tl.CheckPrereq()
438
    else:
439
      pass
440

    
441
  def Exec(self, feedback_fn):
442
    """Execute the LU.
443

444
    This method should implement the actual work. It should raise
445
    errors.OpExecError for failures that are somewhat dealt with in
446
    code, or expected.
447

448
    """
449
    if self.tasklets is not None:
450
      for (idx, tl) in enumerate(self.tasklets):
451
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
452
        tl.Exec(feedback_fn)
453
    else:
454
      raise NotImplementedError
455

    
456
  def BuildHooksEnv(self):
457
    """Build hooks environment for this LU.
458

459
    This method should return a three-node tuple consisting of: a dict
460
    containing the environment that will be used for running the
461
    specific hook for this LU, a list of node names on which the hook
462
    should run before the execution, and a list of node names on which
463
    the hook should run after the execution.
464

465
    The keys of the dict must not have 'GANETI_' prefixed as this will
466
    be handled in the hooks runner. Also note additional keys will be
467
    added by the hooks runner. If the LU doesn't define any
468
    environment, an empty dict (and not None) should be returned.
469

470
    No nodes should be returned as an empty list (and not None).
471

472
    Note that if the HPATH for a LU class is None, this function will
473
    not be called.
474

475
    """
476
    raise NotImplementedError
477

    
478
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
479
    """Notify the LU about the results of its hooks.
480

481
    This method is called every time a hooks phase is executed, and notifies
482
    the Logical Unit about the hooks' result. The LU can then use it to alter
483
    its result based on the hooks.  By default the method does nothing and the
484
    previous result is passed back unchanged but any LU can define it if it
485
    wants to use the local cluster hook-scripts somehow.
486

487
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
488
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
489
    @param hook_results: the results of the multi-node hooks rpc call
490
    @param feedback_fn: function used send feedback back to the caller
491
    @param lu_result: the previous Exec result this LU had, or None
492
        in the PRE phase
493
    @return: the new Exec result, based on the previous result
494
        and hook results
495

496
    """
497
    # API must be kept, thus we ignore the unused argument and could
498
    # be a function warnings
499
    # pylint: disable-msg=W0613,R0201
500
    return lu_result
501

    
502
  def _ExpandAndLockInstance(self):
503
    """Helper function to expand and lock an instance.
504

505
    Many LUs that work on an instance take its name in self.op.instance_name
506
    and need to expand it and then declare the expanded name for locking. This
507
    function does it, and then updates self.op.instance_name to the expanded
508
    name. It also initializes needed_locks as a dict, if this hasn't been done
509
    before.
510

511
    """
512
    if self.needed_locks is None:
513
      self.needed_locks = {}
514
    else:
515
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
516
        "_ExpandAndLockInstance called with instance-level locks set"
517
    self.op.instance_name = _ExpandInstanceName(self.cfg,
518
                                                self.op.instance_name)
519
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
520

    
521
  def _LockInstancesNodes(self, primary_only=False):
522
    """Helper function to declare instances' nodes for locking.
523

524
    This function should be called after locking one or more instances to lock
525
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
526
    with all primary or secondary nodes for instances already locked and
527
    present in self.needed_locks[locking.LEVEL_INSTANCE].
528

529
    It should be called from DeclareLocks, and for safety only works if
530
    self.recalculate_locks[locking.LEVEL_NODE] is set.
531

532
    In the future it may grow parameters to just lock some instance's nodes, or
533
    to just lock primaries or secondary nodes, if needed.
534

535
    If should be called in DeclareLocks in a way similar to::
536

537
      if level == locking.LEVEL_NODE:
538
        self._LockInstancesNodes()
539

540
    @type primary_only: boolean
541
    @param primary_only: only lock primary nodes of locked instances
542

543
    """
544
    assert locking.LEVEL_NODE in self.recalculate_locks, \
545
      "_LockInstancesNodes helper function called with no nodes to recalculate"
546

    
547
    # TODO: check if we're really been called with the instance locks held
548

    
549
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
550
    # future we might want to have different behaviors depending on the value
551
    # of self.recalculate_locks[locking.LEVEL_NODE]
552
    wanted_nodes = []
553
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
554
      instance = self.context.cfg.GetInstanceInfo(instance_name)
555
      wanted_nodes.append(instance.primary_node)
556
      if not primary_only:
557
        wanted_nodes.extend(instance.secondary_nodes)
558

    
559
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
560
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
561
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
562
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
563

    
564
    del self.recalculate_locks[locking.LEVEL_NODE]
565

    
566

    
567
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
568
  """Simple LU which runs no hooks.
569

570
  This LU is intended as a parent for other LogicalUnits which will
571
  run no hooks, in order to reduce duplicate code.
572

573
  """
574
  HPATH = None
575
  HTYPE = None
576

    
577
  def BuildHooksEnv(self):
578
    """Empty BuildHooksEnv for NoHooksLu.
579

580
    This just raises an error.
581

582
    """
583
    assert False, "BuildHooksEnv called for NoHooksLUs"
584

    
585

    
586
class Tasklet:
587
  """Tasklet base class.
588

589
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
590
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
591
  tasklets know nothing about locks.
592

593
  Subclasses must follow these rules:
594
    - Implement CheckPrereq
595
    - Implement Exec
596

597
  """
598
  def __init__(self, lu):
599
    self.lu = lu
600

    
601
    # Shortcuts
602
    self.cfg = lu.cfg
603
    self.rpc = lu.rpc
604

    
605
  def CheckPrereq(self):
606
    """Check prerequisites for this tasklets.
607

608
    This method should check whether the prerequisites for the execution of
609
    this tasklet are fulfilled. It can do internode communication, but it
610
    should be idempotent - no cluster or system changes are allowed.
611

612
    The method should raise errors.OpPrereqError in case something is not
613
    fulfilled. Its return value is ignored.
614

615
    This method should also update all parameters to their canonical form if it
616
    hasn't been done before.
617

618
    """
619
    pass
620

    
621
  def Exec(self, feedback_fn):
622
    """Execute the tasklet.
623

624
    This method should implement the actual work. It should raise
625
    errors.OpExecError for failures that are somewhat dealt with in code, or
626
    expected.
627

628
    """
629
    raise NotImplementedError
630

    
631

    
632
def _GetWantedNodes(lu, nodes):
633
  """Returns list of checked and expanded node names.
634

635
  @type lu: L{LogicalUnit}
636
  @param lu: the logical unit on whose behalf we execute
637
  @type nodes: list
638
  @param nodes: list of node names or None for all nodes
639
  @rtype: list
640
  @return: the list of nodes, sorted
641
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
642

643
  """
644
  if not nodes:
645
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
646
      " non-empty list of nodes whose name is to be expanded.")
647

    
648
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
649
  return utils.NiceSort(wanted)
650

    
651

    
652
def _GetWantedInstances(lu, instances):
653
  """Returns list of checked and expanded instance names.
654

655
  @type lu: L{LogicalUnit}
656
  @param lu: the logical unit on whose behalf we execute
657
  @type instances: list
658
  @param instances: list of instance names or None for all instances
659
  @rtype: list
660
  @return: the list of instances, sorted
661
  @raise errors.OpPrereqError: if the instances parameter is wrong type
662
  @raise errors.OpPrereqError: if any of the passed instances is not found
663

664
  """
665
  if instances:
666
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
667
  else:
668
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
669
  return wanted
670

    
671

    
672
def _GetUpdatedParams(old_params, update_dict,
673
                      use_default=True, use_none=False):
674
  """Return the new version of a parameter dictionary.
675

676
  @type old_params: dict
677
  @param old_params: old parameters
678
  @type update_dict: dict
679
  @param update_dict: dict containing new parameter values, or
680
      constants.VALUE_DEFAULT to reset the parameter to its default
681
      value
682
  @param use_default: boolean
683
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
684
      values as 'to be deleted' values
685
  @param use_none: boolean
686
  @type use_none: whether to recognise C{None} values as 'to be
687
      deleted' values
688
  @rtype: dict
689
  @return: the new parameter dictionary
690

691
  """
692
  params_copy = copy.deepcopy(old_params)
693
  for key, val in update_dict.iteritems():
694
    if ((use_default and val == constants.VALUE_DEFAULT) or
695
        (use_none and val is None)):
696
      try:
697
        del params_copy[key]
698
      except KeyError:
699
        pass
700
    else:
701
      params_copy[key] = val
702
  return params_copy
703

    
704

    
705
def _CheckOutputFields(static, dynamic, selected):
706
  """Checks whether all selected fields are valid.
707

708
  @type static: L{utils.FieldSet}
709
  @param static: static fields set
710
  @type dynamic: L{utils.FieldSet}
711
  @param dynamic: dynamic fields set
712

713
  """
714
  f = utils.FieldSet()
715
  f.Extend(static)
716
  f.Extend(dynamic)
717

    
718
  delta = f.NonMatching(selected)
719
  if delta:
720
    raise errors.OpPrereqError("Unknown output fields selected: %s"
721
                               % ",".join(delta), errors.ECODE_INVAL)
722

    
723

    
724
def _CheckGlobalHvParams(params):
725
  """Validates that given hypervisor params are not global ones.
726

727
  This will ensure that instances don't get customised versions of
728
  global params.
729

730
  """
731
  used_globals = constants.HVC_GLOBALS.intersection(params)
732
  if used_globals:
733
    msg = ("The following hypervisor parameters are global and cannot"
734
           " be customized at instance level, please modify them at"
735
           " cluster level: %s" % utils.CommaJoin(used_globals))
736
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
737

    
738

    
739
def _CheckNodeOnline(lu, node):
740
  """Ensure that a given node is online.
741

742
  @param lu: the LU on behalf of which we make the check
743
  @param node: the node to check
744
  @raise errors.OpPrereqError: if the node is offline
745

746
  """
747
  if lu.cfg.GetNodeInfo(node).offline:
748
    raise errors.OpPrereqError("Can't use offline node %s" % node,
749
                               errors.ECODE_INVAL)
750

    
751

    
752
def _CheckNodeNotDrained(lu, node):
753
  """Ensure that a given node is not drained.
754

755
  @param lu: the LU on behalf of which we make the check
756
  @param node: the node to check
757
  @raise errors.OpPrereqError: if the node is drained
758

759
  """
760
  if lu.cfg.GetNodeInfo(node).drained:
761
    raise errors.OpPrereqError("Can't use drained node %s" % node,
762
                               errors.ECODE_INVAL)
763

    
764

    
765
def _CheckNodeHasOS(lu, node, os_name, force_variant):
766
  """Ensure that a node supports a given OS.
767

768
  @param lu: the LU on behalf of which we make the check
769
  @param node: the node to check
770
  @param os_name: the OS to query about
771
  @param force_variant: whether to ignore variant errors
772
  @raise errors.OpPrereqError: if the node is not supporting the OS
773

774
  """
775
  result = lu.rpc.call_os_get(node, os_name)
776
  result.Raise("OS '%s' not in supported OS list for node %s" %
777
               (os_name, node),
778
               prereq=True, ecode=errors.ECODE_INVAL)
779
  if not force_variant:
780
    _CheckOSVariant(result.payload, os_name)
781

    
782

    
783
def _RequireFileStorage():
784
  """Checks that file storage is enabled.
785

786
  @raise errors.OpPrereqError: when file storage is disabled
787

788
  """
789
  if not constants.ENABLE_FILE_STORAGE:
790
    raise errors.OpPrereqError("File storage disabled at configure time",
791
                               errors.ECODE_INVAL)
792

    
793

    
794
def _CheckDiskTemplate(template):
795
  """Ensure a given disk template is valid.
796

797
  """
798
  if template not in constants.DISK_TEMPLATES:
799
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
800
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
801
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
802
  if template == constants.DT_FILE:
803
    _RequireFileStorage()
804
  return True
805

    
806

    
807
def _CheckStorageType(storage_type):
808
  """Ensure a given storage type is valid.
809

810
  """
811
  if storage_type not in constants.VALID_STORAGE_TYPES:
812
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
813
                               errors.ECODE_INVAL)
814
  if storage_type == constants.ST_FILE:
815
    _RequireFileStorage()
816
  return True
817

    
818

    
819
def _GetClusterDomainSecret():
820
  """Reads the cluster domain secret.
821

822
  """
823
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
824
                               strict=True)
825

    
826

    
827
def _CheckInstanceDown(lu, instance, reason):
828
  """Ensure that an instance is not running."""
829
  if instance.admin_up:
830
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
831
                               (instance.name, reason), errors.ECODE_STATE)
832

    
833
  pnode = instance.primary_node
834
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
835
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
836
              prereq=True, ecode=errors.ECODE_ENVIRON)
837

    
838
  if instance.name in ins_l.payload:
839
    raise errors.OpPrereqError("Instance %s is running, %s" %
840
                               (instance.name, reason), errors.ECODE_STATE)
841

    
842

    
843
def _ExpandItemName(fn, name, kind):
844
  """Expand an item name.
845

846
  @param fn: the function to use for expansion
847
  @param name: requested item name
848
  @param kind: text description ('Node' or 'Instance')
849
  @return: the resolved (full) name
850
  @raise errors.OpPrereqError: if the item is not found
851

852
  """
853
  full_name = fn(name)
854
  if full_name is None:
855
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
856
                               errors.ECODE_NOENT)
857
  return full_name
858

    
859

    
860
def _ExpandNodeName(cfg, name):
861
  """Wrapper over L{_ExpandItemName} for nodes."""
862
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
863

    
864

    
865
def _ExpandInstanceName(cfg, name):
866
  """Wrapper over L{_ExpandItemName} for instance."""
867
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
868

    
869

    
870
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
871
                          memory, vcpus, nics, disk_template, disks,
872
                          bep, hvp, hypervisor_name):
873
  """Builds instance related env variables for hooks
874

875
  This builds the hook environment from individual variables.
876

877
  @type name: string
878
  @param name: the name of the instance
879
  @type primary_node: string
880
  @param primary_node: the name of the instance's primary node
881
  @type secondary_nodes: list
882
  @param secondary_nodes: list of secondary nodes as strings
883
  @type os_type: string
884
  @param os_type: the name of the instance's OS
885
  @type status: boolean
886
  @param status: the should_run status of the instance
887
  @type memory: string
888
  @param memory: the memory size of the instance
889
  @type vcpus: string
890
  @param vcpus: the count of VCPUs the instance has
891
  @type nics: list
892
  @param nics: list of tuples (ip, mac, mode, link) representing
893
      the NICs the instance has
894
  @type disk_template: string
895
  @param disk_template: the disk template of the instance
896
  @type disks: list
897
  @param disks: the list of (size, mode) pairs
898
  @type bep: dict
899
  @param bep: the backend parameters for the instance
900
  @type hvp: dict
901
  @param hvp: the hypervisor parameters for the instance
902
  @type hypervisor_name: string
903
  @param hypervisor_name: the hypervisor for the instance
904
  @rtype: dict
905
  @return: the hook environment for this instance
906

907
  """
908
  if status:
909
    str_status = "up"
910
  else:
911
    str_status = "down"
912
  env = {
913
    "OP_TARGET": name,
914
    "INSTANCE_NAME": name,
915
    "INSTANCE_PRIMARY": primary_node,
916
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
917
    "INSTANCE_OS_TYPE": os_type,
918
    "INSTANCE_STATUS": str_status,
919
    "INSTANCE_MEMORY": memory,
920
    "INSTANCE_VCPUS": vcpus,
921
    "INSTANCE_DISK_TEMPLATE": disk_template,
922
    "INSTANCE_HYPERVISOR": hypervisor_name,
923
  }
924

    
925
  if nics:
926
    nic_count = len(nics)
927
    for idx, (ip, mac, mode, link) in enumerate(nics):
928
      if ip is None:
929
        ip = ""
930
      env["INSTANCE_NIC%d_IP" % idx] = ip
931
      env["INSTANCE_NIC%d_MAC" % idx] = mac
932
      env["INSTANCE_NIC%d_MODE" % idx] = mode
933
      env["INSTANCE_NIC%d_LINK" % idx] = link
934
      if mode == constants.NIC_MODE_BRIDGED:
935
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
936
  else:
937
    nic_count = 0
938

    
939
  env["INSTANCE_NIC_COUNT"] = nic_count
940

    
941
  if disks:
942
    disk_count = len(disks)
943
    for idx, (size, mode) in enumerate(disks):
944
      env["INSTANCE_DISK%d_SIZE" % idx] = size
945
      env["INSTANCE_DISK%d_MODE" % idx] = mode
946
  else:
947
    disk_count = 0
948

    
949
  env["INSTANCE_DISK_COUNT"] = disk_count
950

    
951
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
952
    for key, value in source.items():
953
      env["INSTANCE_%s_%s" % (kind, key)] = value
954

    
955
  return env
956

    
957

    
958
def _NICListToTuple(lu, nics):
959
  """Build a list of nic information tuples.
960

961
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
962
  value in LUQueryInstanceData.
963

964
  @type lu:  L{LogicalUnit}
965
  @param lu: the logical unit on whose behalf we execute
966
  @type nics: list of L{objects.NIC}
967
  @param nics: list of nics to convert to hooks tuples
968

969
  """
970
  hooks_nics = []
971
  cluster = lu.cfg.GetClusterInfo()
972
  for nic in nics:
973
    ip = nic.ip
974
    mac = nic.mac
975
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
976
    mode = filled_params[constants.NIC_MODE]
977
    link = filled_params[constants.NIC_LINK]
978
    hooks_nics.append((ip, mac, mode, link))
979
  return hooks_nics
980

    
981

    
982
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
983
  """Builds instance related env variables for hooks from an object.
984

985
  @type lu: L{LogicalUnit}
986
  @param lu: the logical unit on whose behalf we execute
987
  @type instance: L{objects.Instance}
988
  @param instance: the instance for which we should build the
989
      environment
990
  @type override: dict
991
  @param override: dictionary with key/values that will override
992
      our values
993
  @rtype: dict
994
  @return: the hook environment dictionary
995

996
  """
997
  cluster = lu.cfg.GetClusterInfo()
998
  bep = cluster.FillBE(instance)
999
  hvp = cluster.FillHV(instance)
1000
  args = {
1001
    'name': instance.name,
1002
    'primary_node': instance.primary_node,
1003
    'secondary_nodes': instance.secondary_nodes,
1004
    'os_type': instance.os,
1005
    'status': instance.admin_up,
1006
    'memory': bep[constants.BE_MEMORY],
1007
    'vcpus': bep[constants.BE_VCPUS],
1008
    'nics': _NICListToTuple(lu, instance.nics),
1009
    'disk_template': instance.disk_template,
1010
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1011
    'bep': bep,
1012
    'hvp': hvp,
1013
    'hypervisor_name': instance.hypervisor,
1014
  }
1015
  if override:
1016
    args.update(override)
1017
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1018

    
1019

    
1020
def _AdjustCandidatePool(lu, exceptions):
1021
  """Adjust the candidate pool after node operations.
1022

1023
  """
1024
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1025
  if mod_list:
1026
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1027
               utils.CommaJoin(node.name for node in mod_list))
1028
    for name in mod_list:
1029
      lu.context.ReaddNode(name)
1030
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1031
  if mc_now > mc_max:
1032
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1033
               (mc_now, mc_max))
1034

    
1035

    
1036
def _DecideSelfPromotion(lu, exceptions=None):
1037
  """Decide whether I should promote myself as a master candidate.
1038

1039
  """
1040
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1041
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1042
  # the new node will increase mc_max with one, so:
1043
  mc_should = min(mc_should + 1, cp_size)
1044
  return mc_now < mc_should
1045

    
1046

    
1047
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1048
  """Check that the brigdes needed by a list of nics exist.
1049

1050
  """
1051
  cluster = lu.cfg.GetClusterInfo()
1052
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1053
  brlist = [params[constants.NIC_LINK] for params in paramslist
1054
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1055
  if brlist:
1056
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1057
    result.Raise("Error checking bridges on destination node '%s'" %
1058
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1059

    
1060

    
1061
def _CheckInstanceBridgesExist(lu, instance, node=None):
1062
  """Check that the brigdes needed by an instance exist.
1063

1064
  """
1065
  if node is None:
1066
    node = instance.primary_node
1067
  _CheckNicsBridgesExist(lu, instance.nics, node)
1068

    
1069

    
1070
def _CheckOSVariant(os_obj, name):
1071
  """Check whether an OS name conforms to the os variants specification.
1072

1073
  @type os_obj: L{objects.OS}
1074
  @param os_obj: OS object to check
1075
  @type name: string
1076
  @param name: OS name passed by the user, to check for validity
1077

1078
  """
1079
  if not os_obj.supported_variants:
1080
    return
1081
  try:
1082
    variant = name.split("+", 1)[1]
1083
  except IndexError:
1084
    raise errors.OpPrereqError("OS name must include a variant",
1085
                               errors.ECODE_INVAL)
1086

    
1087
  if variant not in os_obj.supported_variants:
1088
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1089

    
1090

    
1091
def _GetNodeInstancesInner(cfg, fn):
1092
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1093

    
1094

    
1095
def _GetNodeInstances(cfg, node_name):
1096
  """Returns a list of all primary and secondary instances on a node.
1097

1098
  """
1099

    
1100
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1101

    
1102

    
1103
def _GetNodePrimaryInstances(cfg, node_name):
1104
  """Returns primary instances on a node.
1105

1106
  """
1107
  return _GetNodeInstancesInner(cfg,
1108
                                lambda inst: node_name == inst.primary_node)
1109

    
1110

    
1111
def _GetNodeSecondaryInstances(cfg, node_name):
1112
  """Returns secondary instances on a node.
1113

1114
  """
1115
  return _GetNodeInstancesInner(cfg,
1116
                                lambda inst: node_name in inst.secondary_nodes)
1117

    
1118

    
1119
def _GetStorageTypeArgs(cfg, storage_type):
1120
  """Returns the arguments for a storage type.
1121

1122
  """
1123
  # Special case for file storage
1124
  if storage_type == constants.ST_FILE:
1125
    # storage.FileStorage wants a list of storage directories
1126
    return [[cfg.GetFileStorageDir()]]
1127

    
1128
  return []
1129

    
1130

    
1131
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1132
  faulty = []
1133

    
1134
  for dev in instance.disks:
1135
    cfg.SetDiskID(dev, node_name)
1136

    
1137
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1138
  result.Raise("Failed to get disk status from node %s" % node_name,
1139
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1140

    
1141
  for idx, bdev_status in enumerate(result.payload):
1142
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1143
      faulty.append(idx)
1144

    
1145
  return faulty
1146

    
1147

    
1148
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1149
  """Check the sanity of iallocator and node arguments and use the
1150
  cluster-wide iallocator if appropriate.
1151

1152
  Check that at most one of (iallocator, node) is specified. If none is
1153
  specified, then the LU's opcode's iallocator slot is filled with the
1154
  cluster-wide default iallocator.
1155

1156
  @type iallocator_slot: string
1157
  @param iallocator_slot: the name of the opcode iallocator slot
1158
  @type node_slot: string
1159
  @param node_slot: the name of the opcode target node slot
1160

1161
  """
1162
  node = getattr(lu.op, node_slot, None)
1163
  iallocator = getattr(lu.op, iallocator_slot, None)
1164

    
1165
  if node is not None and iallocator is not None:
1166
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1167
                               errors.ECODE_INVAL)
1168
  elif node is None and iallocator is None:
1169
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1170
    if default_iallocator:
1171
      setattr(lu.op, iallocator_slot, default_iallocator)
1172
    else:
1173
      raise errors.OpPrereqError("No iallocator or node given and no"
1174
                                 " cluster-wide default iallocator found."
1175
                                 " Please specify either an iallocator or a"
1176
                                 " node, or set a cluster-wide default"
1177
                                 " iallocator.")
1178

    
1179

    
1180
class LUPostInitCluster(LogicalUnit):
1181
  """Logical unit for running hooks after cluster initialization.
1182

1183
  """
1184
  HPATH = "cluster-init"
1185
  HTYPE = constants.HTYPE_CLUSTER
1186

    
1187
  def BuildHooksEnv(self):
1188
    """Build hooks env.
1189

1190
    """
1191
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1192
    mn = self.cfg.GetMasterNode()
1193
    return env, [], [mn]
1194

    
1195
  def Exec(self, feedback_fn):
1196
    """Nothing to do.
1197

1198
    """
1199
    return True
1200

    
1201

    
1202
class LUDestroyCluster(LogicalUnit):
1203
  """Logical unit for destroying the cluster.
1204

1205
  """
1206
  HPATH = "cluster-destroy"
1207
  HTYPE = constants.HTYPE_CLUSTER
1208

    
1209
  def BuildHooksEnv(self):
1210
    """Build hooks env.
1211

1212
    """
1213
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1214
    return env, [], []
1215

    
1216
  def CheckPrereq(self):
1217
    """Check prerequisites.
1218

1219
    This checks whether the cluster is empty.
1220

1221
    Any errors are signaled by raising errors.OpPrereqError.
1222

1223
    """
1224
    master = self.cfg.GetMasterNode()
1225

    
1226
    nodelist = self.cfg.GetNodeList()
1227
    if len(nodelist) != 1 or nodelist[0] != master:
1228
      raise errors.OpPrereqError("There are still %d node(s) in"
1229
                                 " this cluster." % (len(nodelist) - 1),
1230
                                 errors.ECODE_INVAL)
1231
    instancelist = self.cfg.GetInstanceList()
1232
    if instancelist:
1233
      raise errors.OpPrereqError("There are still %d instance(s) in"
1234
                                 " this cluster." % len(instancelist),
1235
                                 errors.ECODE_INVAL)
1236

    
1237
  def Exec(self, feedback_fn):
1238
    """Destroys the cluster.
1239

1240
    """
1241
    master = self.cfg.GetMasterNode()
1242
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1243

    
1244
    # Run post hooks on master node before it's removed
1245
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1246
    try:
1247
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1248
    except:
1249
      # pylint: disable-msg=W0702
1250
      self.LogWarning("Errors occurred running hooks on %s" % master)
1251

    
1252
    result = self.rpc.call_node_stop_master(master, False)
1253
    result.Raise("Could not disable the master role")
1254

    
1255
    if modify_ssh_setup:
1256
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1257
      utils.CreateBackup(priv_key)
1258
      utils.CreateBackup(pub_key)
1259

    
1260
    return master
1261

    
1262

    
1263
def _VerifyCertificate(filename):
1264
  """Verifies a certificate for LUVerifyCluster.
1265

1266
  @type filename: string
1267
  @param filename: Path to PEM file
1268

1269
  """
1270
  try:
1271
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1272
                                           utils.ReadFile(filename))
1273
  except Exception, err: # pylint: disable-msg=W0703
1274
    return (LUVerifyCluster.ETYPE_ERROR,
1275
            "Failed to load X509 certificate %s: %s" % (filename, err))
1276

    
1277
  (errcode, msg) = \
1278
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1279
                                constants.SSL_CERT_EXPIRATION_ERROR)
1280

    
1281
  if msg:
1282
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1283
  else:
1284
    fnamemsg = None
1285

    
1286
  if errcode is None:
1287
    return (None, fnamemsg)
1288
  elif errcode == utils.CERT_WARNING:
1289
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1290
  elif errcode == utils.CERT_ERROR:
1291
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1292

    
1293
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1294

    
1295

    
1296
class LUVerifyCluster(LogicalUnit):
1297
  """Verifies the cluster status.
1298

1299
  """
1300
  HPATH = "cluster-verify"
1301
  HTYPE = constants.HTYPE_CLUSTER
1302
  _OP_PARAMS = [
1303
    ("skip_checks", _EmptyList,
1304
     _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1305
    ("verbose", False, _TBool),
1306
    ("error_codes", False, _TBool),
1307
    ("debug_simulate_errors", False, _TBool),
1308
    ]
1309
  REQ_BGL = False
1310

    
1311
  TCLUSTER = "cluster"
1312
  TNODE = "node"
1313
  TINSTANCE = "instance"
1314

    
1315
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1316
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1317
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1318
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1319
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1320
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1321
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1322
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1323
  ENODEDRBD = (TNODE, "ENODEDRBD")
1324
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1325
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1326
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1327
  ENODEHV = (TNODE, "ENODEHV")
1328
  ENODELVM = (TNODE, "ENODELVM")
1329
  ENODEN1 = (TNODE, "ENODEN1")
1330
  ENODENET = (TNODE, "ENODENET")
1331
  ENODEOS = (TNODE, "ENODEOS")
1332
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1333
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1334
  ENODERPC = (TNODE, "ENODERPC")
1335
  ENODESSH = (TNODE, "ENODESSH")
1336
  ENODEVERSION = (TNODE, "ENODEVERSION")
1337
  ENODESETUP = (TNODE, "ENODESETUP")
1338
  ENODETIME = (TNODE, "ENODETIME")
1339

    
1340
  ETYPE_FIELD = "code"
1341
  ETYPE_ERROR = "ERROR"
1342
  ETYPE_WARNING = "WARNING"
1343

    
1344
  class NodeImage(object):
1345
    """A class representing the logical and physical status of a node.
1346

1347
    @type name: string
1348
    @ivar name: the node name to which this object refers
1349
    @ivar volumes: a structure as returned from
1350
        L{ganeti.backend.GetVolumeList} (runtime)
1351
    @ivar instances: a list of running instances (runtime)
1352
    @ivar pinst: list of configured primary instances (config)
1353
    @ivar sinst: list of configured secondary instances (config)
1354
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1355
        of this node (config)
1356
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1357
    @ivar dfree: free disk, as reported by the node (runtime)
1358
    @ivar offline: the offline status (config)
1359
    @type rpc_fail: boolean
1360
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1361
        not whether the individual keys were correct) (runtime)
1362
    @type lvm_fail: boolean
1363
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1364
    @type hyp_fail: boolean
1365
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1366
    @type ghost: boolean
1367
    @ivar ghost: whether this is a known node or not (config)
1368
    @type os_fail: boolean
1369
    @ivar os_fail: whether the RPC call didn't return valid OS data
1370
    @type oslist: list
1371
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1372

1373
    """
1374
    def __init__(self, offline=False, name=None):
1375
      self.name = name
1376
      self.volumes = {}
1377
      self.instances = []
1378
      self.pinst = []
1379
      self.sinst = []
1380
      self.sbp = {}
1381
      self.mfree = 0
1382
      self.dfree = 0
1383
      self.offline = offline
1384
      self.rpc_fail = False
1385
      self.lvm_fail = False
1386
      self.hyp_fail = False
1387
      self.ghost = False
1388
      self.os_fail = False
1389
      self.oslist = {}
1390

    
1391
  def ExpandNames(self):
1392
    self.needed_locks = {
1393
      locking.LEVEL_NODE: locking.ALL_SET,
1394
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1395
    }
1396
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1397

    
1398
  def _Error(self, ecode, item, msg, *args, **kwargs):
1399
    """Format an error message.
1400

1401
    Based on the opcode's error_codes parameter, either format a
1402
    parseable error code, or a simpler error string.
1403

1404
    This must be called only from Exec and functions called from Exec.
1405

1406
    """
1407
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1408
    itype, etxt = ecode
1409
    # first complete the msg
1410
    if args:
1411
      msg = msg % args
1412
    # then format the whole message
1413
    if self.op.error_codes:
1414
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1415
    else:
1416
      if item:
1417
        item = " " + item
1418
      else:
1419
        item = ""
1420
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1421
    # and finally report it via the feedback_fn
1422
    self._feedback_fn("  - %s" % msg)
1423

    
1424
  def _ErrorIf(self, cond, *args, **kwargs):
1425
    """Log an error message if the passed condition is True.
1426

1427
    """
1428
    cond = bool(cond) or self.op.debug_simulate_errors
1429
    if cond:
1430
      self._Error(*args, **kwargs)
1431
    # do not mark the operation as failed for WARN cases only
1432
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1433
      self.bad = self.bad or cond
1434

    
1435
  def _VerifyNode(self, ninfo, nresult):
1436
    """Perform some basic validation on data returned from a node.
1437

1438
    - check the result data structure is well formed and has all the mandatory
1439
      fields
1440
    - check ganeti version
1441

1442
    @type ninfo: L{objects.Node}
1443
    @param ninfo: the node to check
1444
    @param nresult: the results from the node
1445
    @rtype: boolean
1446
    @return: whether overall this call was successful (and we can expect
1447
         reasonable values in the respose)
1448

1449
    """
1450
    node = ninfo.name
1451
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1452

    
1453
    # main result, nresult should be a non-empty dict
1454
    test = not nresult or not isinstance(nresult, dict)
1455
    _ErrorIf(test, self.ENODERPC, node,
1456
                  "unable to verify node: no data returned")
1457
    if test:
1458
      return False
1459

    
1460
    # compares ganeti version
1461
    local_version = constants.PROTOCOL_VERSION
1462
    remote_version = nresult.get("version", None)
1463
    test = not (remote_version and
1464
                isinstance(remote_version, (list, tuple)) and
1465
                len(remote_version) == 2)
1466
    _ErrorIf(test, self.ENODERPC, node,
1467
             "connection to node returned invalid data")
1468
    if test:
1469
      return False
1470

    
1471
    test = local_version != remote_version[0]
1472
    _ErrorIf(test, self.ENODEVERSION, node,
1473
             "incompatible protocol versions: master %s,"
1474
             " node %s", local_version, remote_version[0])
1475
    if test:
1476
      return False
1477

    
1478
    # node seems compatible, we can actually try to look into its results
1479

    
1480
    # full package version
1481
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1482
                  self.ENODEVERSION, node,
1483
                  "software version mismatch: master %s, node %s",
1484
                  constants.RELEASE_VERSION, remote_version[1],
1485
                  code=self.ETYPE_WARNING)
1486

    
1487
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1488
    if isinstance(hyp_result, dict):
1489
      for hv_name, hv_result in hyp_result.iteritems():
1490
        test = hv_result is not None
1491
        _ErrorIf(test, self.ENODEHV, node,
1492
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1493

    
1494

    
1495
    test = nresult.get(constants.NV_NODESETUP,
1496
                           ["Missing NODESETUP results"])
1497
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1498
             "; ".join(test))
1499

    
1500
    return True
1501

    
1502
  def _VerifyNodeTime(self, ninfo, nresult,
1503
                      nvinfo_starttime, nvinfo_endtime):
1504
    """Check the node time.
1505

1506
    @type ninfo: L{objects.Node}
1507
    @param ninfo: the node to check
1508
    @param nresult: the remote results for the node
1509
    @param nvinfo_starttime: the start time of the RPC call
1510
    @param nvinfo_endtime: the end time of the RPC call
1511

1512
    """
1513
    node = ninfo.name
1514
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1515

    
1516
    ntime = nresult.get(constants.NV_TIME, None)
1517
    try:
1518
      ntime_merged = utils.MergeTime(ntime)
1519
    except (ValueError, TypeError):
1520
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1521
      return
1522

    
1523
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1524
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1525
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1526
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1527
    else:
1528
      ntime_diff = None
1529

    
1530
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1531
             "Node time diverges by at least %s from master node time",
1532
             ntime_diff)
1533

    
1534
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1535
    """Check the node time.
1536

1537
    @type ninfo: L{objects.Node}
1538
    @param ninfo: the node to check
1539
    @param nresult: the remote results for the node
1540
    @param vg_name: the configured VG name
1541

1542
    """
1543
    if vg_name is None:
1544
      return
1545

    
1546
    node = ninfo.name
1547
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1548

    
1549
    # checks vg existence and size > 20G
1550
    vglist = nresult.get(constants.NV_VGLIST, None)
1551
    test = not vglist
1552
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1553
    if not test:
1554
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1555
                                            constants.MIN_VG_SIZE)
1556
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1557

    
1558
    # check pv names
1559
    pvlist = nresult.get(constants.NV_PVLIST, None)
1560
    test = pvlist is None
1561
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1562
    if not test:
1563
      # check that ':' is not present in PV names, since it's a
1564
      # special character for lvcreate (denotes the range of PEs to
1565
      # use on the PV)
1566
      for _, pvname, owner_vg in pvlist:
1567
        test = ":" in pvname
1568
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1569
                 " '%s' of VG '%s'", pvname, owner_vg)
1570

    
1571
  def _VerifyNodeNetwork(self, ninfo, nresult):
1572
    """Check the node time.
1573

1574
    @type ninfo: L{objects.Node}
1575
    @param ninfo: the node to check
1576
    @param nresult: the remote results for the node
1577

1578
    """
1579
    node = ninfo.name
1580
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1581

    
1582
    test = constants.NV_NODELIST not in nresult
1583
    _ErrorIf(test, self.ENODESSH, node,
1584
             "node hasn't returned node ssh connectivity data")
1585
    if not test:
1586
      if nresult[constants.NV_NODELIST]:
1587
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1588
          _ErrorIf(True, self.ENODESSH, node,
1589
                   "ssh communication with node '%s': %s", a_node, a_msg)
1590

    
1591
    test = constants.NV_NODENETTEST not in nresult
1592
    _ErrorIf(test, self.ENODENET, node,
1593
             "node hasn't returned node tcp connectivity data")
1594
    if not test:
1595
      if nresult[constants.NV_NODENETTEST]:
1596
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1597
        for anode in nlist:
1598
          _ErrorIf(True, self.ENODENET, node,
1599
                   "tcp communication with node '%s': %s",
1600
                   anode, nresult[constants.NV_NODENETTEST][anode])
1601

    
1602
    test = constants.NV_MASTERIP not in nresult
1603
    _ErrorIf(test, self.ENODENET, node,
1604
             "node hasn't returned node master IP reachability data")
1605
    if not test:
1606
      if not nresult[constants.NV_MASTERIP]:
1607
        if node == self.master_node:
1608
          msg = "the master node cannot reach the master IP (not configured?)"
1609
        else:
1610
          msg = "cannot reach the master IP"
1611
        _ErrorIf(True, self.ENODENET, node, msg)
1612

    
1613

    
1614
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1615
    """Verify an instance.
1616

1617
    This function checks to see if the required block devices are
1618
    available on the instance's node.
1619

1620
    """
1621
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1622
    node_current = instanceconfig.primary_node
1623

    
1624
    node_vol_should = {}
1625
    instanceconfig.MapLVsByNode(node_vol_should)
1626

    
1627
    for node in node_vol_should:
1628
      n_img = node_image[node]
1629
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1630
        # ignore missing volumes on offline or broken nodes
1631
        continue
1632
      for volume in node_vol_should[node]:
1633
        test = volume not in n_img.volumes
1634
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1635
                 "volume %s missing on node %s", volume, node)
1636

    
1637
    if instanceconfig.admin_up:
1638
      pri_img = node_image[node_current]
1639
      test = instance not in pri_img.instances and not pri_img.offline
1640
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1641
               "instance not running on its primary node %s",
1642
               node_current)
1643

    
1644
    for node, n_img in node_image.items():
1645
      if (not node == node_current):
1646
        test = instance in n_img.instances
1647
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1648
                 "instance should not run on node %s", node)
1649

    
1650
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1651
    """Verify if there are any unknown volumes in the cluster.
1652

1653
    The .os, .swap and backup volumes are ignored. All other volumes are
1654
    reported as unknown.
1655

1656
    """
1657
    for node, n_img in node_image.items():
1658
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1659
        # skip non-healthy nodes
1660
        continue
1661
      for volume in n_img.volumes:
1662
        test = (node not in node_vol_should or
1663
                volume not in node_vol_should[node])
1664
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1665
                      "volume %s is unknown", volume)
1666

    
1667
  def _VerifyOrphanInstances(self, instancelist, node_image):
1668
    """Verify the list of running instances.
1669

1670
    This checks what instances are running but unknown to the cluster.
1671

1672
    """
1673
    for node, n_img in node_image.items():
1674
      for o_inst in n_img.instances:
1675
        test = o_inst not in instancelist
1676
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1677
                      "instance %s on node %s should not exist", o_inst, node)
1678

    
1679
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1680
    """Verify N+1 Memory Resilience.
1681

1682
    Check that if one single node dies we can still start all the
1683
    instances it was primary for.
1684

1685
    """
1686
    for node, n_img in node_image.items():
1687
      # This code checks that every node which is now listed as
1688
      # secondary has enough memory to host all instances it is
1689
      # supposed to should a single other node in the cluster fail.
1690
      # FIXME: not ready for failover to an arbitrary node
1691
      # FIXME: does not support file-backed instances
1692
      # WARNING: we currently take into account down instances as well
1693
      # as up ones, considering that even if they're down someone
1694
      # might want to start them even in the event of a node failure.
1695
      for prinode, instances in n_img.sbp.items():
1696
        needed_mem = 0
1697
        for instance in instances:
1698
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1699
          if bep[constants.BE_AUTO_BALANCE]:
1700
            needed_mem += bep[constants.BE_MEMORY]
1701
        test = n_img.mfree < needed_mem
1702
        self._ErrorIf(test, self.ENODEN1, node,
1703
                      "not enough memory on to accommodate"
1704
                      " failovers should peer node %s fail", prinode)
1705

    
1706
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1707
                       master_files):
1708
    """Verifies and computes the node required file checksums.
1709

1710
    @type ninfo: L{objects.Node}
1711
    @param ninfo: the node to check
1712
    @param nresult: the remote results for the node
1713
    @param file_list: required list of files
1714
    @param local_cksum: dictionary of local files and their checksums
1715
    @param master_files: list of files that only masters should have
1716

1717
    """
1718
    node = ninfo.name
1719
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1720

    
1721
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1722
    test = not isinstance(remote_cksum, dict)
1723
    _ErrorIf(test, self.ENODEFILECHECK, node,
1724
             "node hasn't returned file checksum data")
1725
    if test:
1726
      return
1727

    
1728
    for file_name in file_list:
1729
      node_is_mc = ninfo.master_candidate
1730
      must_have = (file_name not in master_files) or node_is_mc
1731
      # missing
1732
      test1 = file_name not in remote_cksum
1733
      # invalid checksum
1734
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1735
      # existing and good
1736
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1737
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1738
               "file '%s' missing", file_name)
1739
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1740
               "file '%s' has wrong checksum", file_name)
1741
      # not candidate and this is not a must-have file
1742
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1743
               "file '%s' should not exist on non master"
1744
               " candidates (and the file is outdated)", file_name)
1745
      # all good, except non-master/non-must have combination
1746
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1747
               "file '%s' should not exist"
1748
               " on non master candidates", file_name)
1749

    
1750
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1751
                      drbd_map):
1752
    """Verifies and the node DRBD status.
1753

1754
    @type ninfo: L{objects.Node}
1755
    @param ninfo: the node to check
1756
    @param nresult: the remote results for the node
1757
    @param instanceinfo: the dict of instances
1758
    @param drbd_helper: the configured DRBD usermode helper
1759
    @param drbd_map: the DRBD map as returned by
1760
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1761

1762
    """
1763
    node = ninfo.name
1764
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1765

    
1766
    if drbd_helper:
1767
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1768
      test = (helper_result == None)
1769
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1770
               "no drbd usermode helper returned")
1771
      if helper_result:
1772
        status, payload = helper_result
1773
        test = not status
1774
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1775
                 "drbd usermode helper check unsuccessful: %s", payload)
1776
        test = status and (payload != drbd_helper)
1777
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1778
                 "wrong drbd usermode helper: %s", payload)
1779

    
1780
    # compute the DRBD minors
1781
    node_drbd = {}
1782
    for minor, instance in drbd_map[node].items():
1783
      test = instance not in instanceinfo
1784
      _ErrorIf(test, self.ECLUSTERCFG, None,
1785
               "ghost instance '%s' in temporary DRBD map", instance)
1786
        # ghost instance should not be running, but otherwise we
1787
        # don't give double warnings (both ghost instance and
1788
        # unallocated minor in use)
1789
      if test:
1790
        node_drbd[minor] = (instance, False)
1791
      else:
1792
        instance = instanceinfo[instance]
1793
        node_drbd[minor] = (instance.name, instance.admin_up)
1794

    
1795
    # and now check them
1796
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1797
    test = not isinstance(used_minors, (tuple, list))
1798
    _ErrorIf(test, self.ENODEDRBD, node,
1799
             "cannot parse drbd status file: %s", str(used_minors))
1800
    if test:
1801
      # we cannot check drbd status
1802
      return
1803

    
1804
    for minor, (iname, must_exist) in node_drbd.items():
1805
      test = minor not in used_minors and must_exist
1806
      _ErrorIf(test, self.ENODEDRBD, node,
1807
               "drbd minor %d of instance %s is not active", minor, iname)
1808
    for minor in used_minors:
1809
      test = minor not in node_drbd
1810
      _ErrorIf(test, self.ENODEDRBD, node,
1811
               "unallocated drbd minor %d is in use", minor)
1812

    
1813
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1814
    """Builds the node OS structures.
1815

1816
    @type ninfo: L{objects.Node}
1817
    @param ninfo: the node to check
1818
    @param nresult: the remote results for the node
1819
    @param nimg: the node image object
1820

1821
    """
1822
    node = ninfo.name
1823
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1824

    
1825
    remote_os = nresult.get(constants.NV_OSLIST, None)
1826
    test = (not isinstance(remote_os, list) or
1827
            not compat.all(isinstance(v, list) and len(v) == 7
1828
                           for v in remote_os))
1829

    
1830
    _ErrorIf(test, self.ENODEOS, node,
1831
             "node hasn't returned valid OS data")
1832

    
1833
    nimg.os_fail = test
1834

    
1835
    if test:
1836
      return
1837

    
1838
    os_dict = {}
1839

    
1840
    for (name, os_path, status, diagnose,
1841
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1842

    
1843
      if name not in os_dict:
1844
        os_dict[name] = []
1845

    
1846
      # parameters is a list of lists instead of list of tuples due to
1847
      # JSON lacking a real tuple type, fix it:
1848
      parameters = [tuple(v) for v in parameters]
1849
      os_dict[name].append((os_path, status, diagnose,
1850
                            set(variants), set(parameters), set(api_ver)))
1851

    
1852
    nimg.oslist = os_dict
1853

    
1854
  def _VerifyNodeOS(self, ninfo, nimg, base):
1855
    """Verifies the node OS list.
1856

1857
    @type ninfo: L{objects.Node}
1858
    @param ninfo: the node to check
1859
    @param nimg: the node image object
1860
    @param base: the 'template' node we match against (e.g. from the master)
1861

1862
    """
1863
    node = ninfo.name
1864
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1865

    
1866
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1867

    
1868
    for os_name, os_data in nimg.oslist.items():
1869
      assert os_data, "Empty OS status for OS %s?!" % os_name
1870
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1871
      _ErrorIf(not f_status, self.ENODEOS, node,
1872
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1873
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1874
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1875
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1876
      # this will catched in backend too
1877
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1878
               and not f_var, self.ENODEOS, node,
1879
               "OS %s with API at least %d does not declare any variant",
1880
               os_name, constants.OS_API_V15)
1881
      # comparisons with the 'base' image
1882
      test = os_name not in base.oslist
1883
      _ErrorIf(test, self.ENODEOS, node,
1884
               "Extra OS %s not present on reference node (%s)",
1885
               os_name, base.name)
1886
      if test:
1887
        continue
1888
      assert base.oslist[os_name], "Base node has empty OS status?"
1889
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1890
      if not b_status:
1891
        # base OS is invalid, skipping
1892
        continue
1893
      for kind, a, b in [("API version", f_api, b_api),
1894
                         ("variants list", f_var, b_var),
1895
                         ("parameters", f_param, b_param)]:
1896
        _ErrorIf(a != b, self.ENODEOS, node,
1897
                 "OS %s %s differs from reference node %s: %s vs. %s",
1898
                 kind, os_name, base.name,
1899
                 utils.CommaJoin(a), utils.CommaJoin(b))
1900

    
1901
    # check any missing OSes
1902
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1903
    _ErrorIf(missing, self.ENODEOS, node,
1904
             "OSes present on reference node %s but missing on this node: %s",
1905
             base.name, utils.CommaJoin(missing))
1906

    
1907
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1908
    """Verifies and updates the node volume data.
1909

1910
    This function will update a L{NodeImage}'s internal structures
1911
    with data from the remote call.
1912

1913
    @type ninfo: L{objects.Node}
1914
    @param ninfo: the node to check
1915
    @param nresult: the remote results for the node
1916
    @param nimg: the node image object
1917
    @param vg_name: the configured VG name
1918

1919
    """
1920
    node = ninfo.name
1921
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1922

    
1923
    nimg.lvm_fail = True
1924
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1925
    if vg_name is None:
1926
      pass
1927
    elif isinstance(lvdata, basestring):
1928
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1929
               utils.SafeEncode(lvdata))
1930
    elif not isinstance(lvdata, dict):
1931
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1932
    else:
1933
      nimg.volumes = lvdata
1934
      nimg.lvm_fail = False
1935

    
1936
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1937
    """Verifies and updates the node instance list.
1938

1939
    If the listing was successful, then updates this node's instance
1940
    list. Otherwise, it marks the RPC call as failed for the instance
1941
    list key.
1942

1943
    @type ninfo: L{objects.Node}
1944
    @param ninfo: the node to check
1945
    @param nresult: the remote results for the node
1946
    @param nimg: the node image object
1947

1948
    """
1949
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1950
    test = not isinstance(idata, list)
1951
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1952
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1953
    if test:
1954
      nimg.hyp_fail = True
1955
    else:
1956
      nimg.instances = idata
1957

    
1958
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1959
    """Verifies and computes a node information map
1960

1961
    @type ninfo: L{objects.Node}
1962
    @param ninfo: the node to check
1963
    @param nresult: the remote results for the node
1964
    @param nimg: the node image object
1965
    @param vg_name: the configured VG name
1966

1967
    """
1968
    node = ninfo.name
1969
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1970

    
1971
    # try to read free memory (from the hypervisor)
1972
    hv_info = nresult.get(constants.NV_HVINFO, None)
1973
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1974
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1975
    if not test:
1976
      try:
1977
        nimg.mfree = int(hv_info["memory_free"])
1978
      except (ValueError, TypeError):
1979
        _ErrorIf(True, self.ENODERPC, node,
1980
                 "node returned invalid nodeinfo, check hypervisor")
1981

    
1982
    # FIXME: devise a free space model for file based instances as well
1983
    if vg_name is not None:
1984
      test = (constants.NV_VGLIST not in nresult or
1985
              vg_name not in nresult[constants.NV_VGLIST])
1986
      _ErrorIf(test, self.ENODELVM, node,
1987
               "node didn't return data for the volume group '%s'"
1988
               " - it is either missing or broken", vg_name)
1989
      if not test:
1990
        try:
1991
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1992
        except (ValueError, TypeError):
1993
          _ErrorIf(True, self.ENODERPC, node,
1994
                   "node returned invalid LVM info, check LVM status")
1995

    
1996
  def BuildHooksEnv(self):
1997
    """Build hooks env.
1998

1999
    Cluster-Verify hooks just ran in the post phase and their failure makes
2000
    the output be logged in the verify output and the verification to fail.
2001

2002
    """
2003
    all_nodes = self.cfg.GetNodeList()
2004
    env = {
2005
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2006
      }
2007
    for node in self.cfg.GetAllNodesInfo().values():
2008
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2009

    
2010
    return env, [], all_nodes
2011

    
2012
  def Exec(self, feedback_fn):
2013
    """Verify integrity of cluster, performing various test on nodes.
2014

2015
    """
2016
    self.bad = False
2017
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2018
    verbose = self.op.verbose
2019
    self._feedback_fn = feedback_fn
2020
    feedback_fn("* Verifying global settings")
2021
    for msg in self.cfg.VerifyConfig():
2022
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2023

    
2024
    # Check the cluster certificates
2025
    for cert_filename in constants.ALL_CERT_FILES:
2026
      (errcode, msg) = _VerifyCertificate(cert_filename)
2027
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2028

    
2029
    vg_name = self.cfg.GetVGName()
2030
    drbd_helper = self.cfg.GetDRBDHelper()
2031
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2032
    cluster = self.cfg.GetClusterInfo()
2033
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2034
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2035
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2036
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2037
                        for iname in instancelist)
2038
    i_non_redundant = [] # Non redundant instances
2039
    i_non_a_balanced = [] # Non auto-balanced instances
2040
    n_offline = 0 # Count of offline nodes
2041
    n_drained = 0 # Count of nodes being drained
2042
    node_vol_should = {}
2043

    
2044
    # FIXME: verify OS list
2045
    # do local checksums
2046
    master_files = [constants.CLUSTER_CONF_FILE]
2047
    master_node = self.master_node = self.cfg.GetMasterNode()
2048
    master_ip = self.cfg.GetMasterIP()
2049

    
2050
    file_names = ssconf.SimpleStore().GetFileList()
2051
    file_names.extend(constants.ALL_CERT_FILES)
2052
    file_names.extend(master_files)
2053
    if cluster.modify_etc_hosts:
2054
      file_names.append(constants.ETC_HOSTS)
2055

    
2056
    local_checksums = utils.FingerprintFiles(file_names)
2057

    
2058
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2059
    node_verify_param = {
2060
      constants.NV_FILELIST: file_names,
2061
      constants.NV_NODELIST: [node.name for node in nodeinfo
2062
                              if not node.offline],
2063
      constants.NV_HYPERVISOR: hypervisors,
2064
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2065
                                  node.secondary_ip) for node in nodeinfo
2066
                                 if not node.offline],
2067
      constants.NV_INSTANCELIST: hypervisors,
2068
      constants.NV_VERSION: None,
2069
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2070
      constants.NV_NODESETUP: None,
2071
      constants.NV_TIME: None,
2072
      constants.NV_MASTERIP: (master_node, master_ip),
2073
      constants.NV_OSLIST: None,
2074
      }
2075

    
2076
    if vg_name is not None:
2077
      node_verify_param[constants.NV_VGLIST] = None
2078
      node_verify_param[constants.NV_LVLIST] = vg_name
2079
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2080
      node_verify_param[constants.NV_DRBDLIST] = None
2081

    
2082
    if drbd_helper:
2083
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2084

    
2085
    # Build our expected cluster state
2086
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2087
                                                 name=node.name))
2088
                      for node in nodeinfo)
2089

    
2090
    for instance in instancelist:
2091
      inst_config = instanceinfo[instance]
2092

    
2093
      for nname in inst_config.all_nodes:
2094
        if nname not in node_image:
2095
          # ghost node
2096
          gnode = self.NodeImage(name=nname)
2097
          gnode.ghost = True
2098
          node_image[nname] = gnode
2099

    
2100
      inst_config.MapLVsByNode(node_vol_should)
2101

    
2102
      pnode = inst_config.primary_node
2103
      node_image[pnode].pinst.append(instance)
2104

    
2105
      for snode in inst_config.secondary_nodes:
2106
        nimg = node_image[snode]
2107
        nimg.sinst.append(instance)
2108
        if pnode not in nimg.sbp:
2109
          nimg.sbp[pnode] = []
2110
        nimg.sbp[pnode].append(instance)
2111

    
2112
    # At this point, we have the in-memory data structures complete,
2113
    # except for the runtime information, which we'll gather next
2114

    
2115
    # Due to the way our RPC system works, exact response times cannot be
2116
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2117
    # time before and after executing the request, we can at least have a time
2118
    # window.
2119
    nvinfo_starttime = time.time()
2120
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2121
                                           self.cfg.GetClusterName())
2122
    nvinfo_endtime = time.time()
2123

    
2124
    all_drbd_map = self.cfg.ComputeDRBDMap()
2125

    
2126
    feedback_fn("* Verifying node status")
2127

    
2128
    refos_img = None
2129

    
2130
    for node_i in nodeinfo:
2131
      node = node_i.name
2132
      nimg = node_image[node]
2133

    
2134
      if node_i.offline:
2135
        if verbose:
2136
          feedback_fn("* Skipping offline node %s" % (node,))
2137
        n_offline += 1
2138
        continue
2139

    
2140
      if node == master_node:
2141
        ntype = "master"
2142
      elif node_i.master_candidate:
2143
        ntype = "master candidate"
2144
      elif node_i.drained:
2145
        ntype = "drained"
2146
        n_drained += 1
2147
      else:
2148
        ntype = "regular"
2149
      if verbose:
2150
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2151

    
2152
      msg = all_nvinfo[node].fail_msg
2153
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2154
      if msg:
2155
        nimg.rpc_fail = True
2156
        continue
2157

    
2158
      nresult = all_nvinfo[node].payload
2159

    
2160
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2161
      self._VerifyNodeNetwork(node_i, nresult)
2162
      self._VerifyNodeLVM(node_i, nresult, vg_name)
2163
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2164
                            master_files)
2165
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2166
                           all_drbd_map)
2167
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2168

    
2169
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2170
      self._UpdateNodeInstances(node_i, nresult, nimg)
2171
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2172
      self._UpdateNodeOS(node_i, nresult, nimg)
2173
      if not nimg.os_fail:
2174
        if refos_img is None:
2175
          refos_img = nimg
2176
        self._VerifyNodeOS(node_i, nimg, refos_img)
2177

    
2178
    feedback_fn("* Verifying instance status")
2179
    for instance in instancelist:
2180
      if verbose:
2181
        feedback_fn("* Verifying instance %s" % instance)
2182
      inst_config = instanceinfo[instance]
2183
      self._VerifyInstance(instance, inst_config, node_image)
2184
      inst_nodes_offline = []
2185

    
2186
      pnode = inst_config.primary_node
2187
      pnode_img = node_image[pnode]
2188
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2189
               self.ENODERPC, pnode, "instance %s, connection to"
2190
               " primary node failed", instance)
2191

    
2192
      if pnode_img.offline:
2193
        inst_nodes_offline.append(pnode)
2194

    
2195
      # If the instance is non-redundant we cannot survive losing its primary
2196
      # node, so we are not N+1 compliant. On the other hand we have no disk
2197
      # templates with more than one secondary so that situation is not well
2198
      # supported either.
2199
      # FIXME: does not support file-backed instances
2200
      if not inst_config.secondary_nodes:
2201
        i_non_redundant.append(instance)
2202
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2203
               instance, "instance has multiple secondary nodes: %s",
2204
               utils.CommaJoin(inst_config.secondary_nodes),
2205
               code=self.ETYPE_WARNING)
2206

    
2207
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2208
        i_non_a_balanced.append(instance)
2209

    
2210
      for snode in inst_config.secondary_nodes:
2211
        s_img = node_image[snode]
2212
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2213
                 "instance %s, connection to secondary node failed", instance)
2214

    
2215
        if s_img.offline:
2216
          inst_nodes_offline.append(snode)
2217

    
2218
      # warn that the instance lives on offline nodes
2219
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2220
               "instance lives on offline node(s) %s",
2221
               utils.CommaJoin(inst_nodes_offline))
2222
      # ... or ghost nodes
2223
      for node in inst_config.all_nodes:
2224
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2225
                 "instance lives on ghost node %s", node)
2226

    
2227
    feedback_fn("* Verifying orphan volumes")
2228
    self._VerifyOrphanVolumes(node_vol_should, node_image)
2229

    
2230
    feedback_fn("* Verifying orphan instances")
2231
    self._VerifyOrphanInstances(instancelist, node_image)
2232

    
2233
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2234
      feedback_fn("* Verifying N+1 Memory redundancy")
2235
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2236

    
2237
    feedback_fn("* Other Notes")
2238
    if i_non_redundant:
2239
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2240
                  % len(i_non_redundant))
2241

    
2242
    if i_non_a_balanced:
2243
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2244
                  % len(i_non_a_balanced))
2245

    
2246
    if n_offline:
2247
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2248

    
2249
    if n_drained:
2250
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2251

    
2252
    return not self.bad
2253

    
2254
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2255
    """Analyze the post-hooks' result
2256

2257
    This method analyses the hook result, handles it, and sends some
2258
    nicely-formatted feedback back to the user.
2259

2260
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2261
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2262
    @param hooks_results: the results of the multi-node hooks rpc call
2263
    @param feedback_fn: function used send feedback back to the caller
2264
    @param lu_result: previous Exec result
2265
    @return: the new Exec result, based on the previous result
2266
        and hook results
2267

2268
    """
2269
    # We only really run POST phase hooks, and are only interested in
2270
    # their results
2271
    if phase == constants.HOOKS_PHASE_POST:
2272
      # Used to change hooks' output to proper indentation
2273
      indent_re = re.compile('^', re.M)
2274
      feedback_fn("* Hooks Results")
2275
      assert hooks_results, "invalid result from hooks"
2276

    
2277
      for node_name in hooks_results:
2278
        res = hooks_results[node_name]
2279
        msg = res.fail_msg
2280
        test = msg and not res.offline
2281
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2282
                      "Communication failure in hooks execution: %s", msg)
2283
        if res.offline or msg:
2284
          # No need to investigate payload if node is offline or gave an error.
2285
          # override manually lu_result here as _ErrorIf only
2286
          # overrides self.bad
2287
          lu_result = 1
2288
          continue
2289
        for script, hkr, output in res.payload:
2290
          test = hkr == constants.HKR_FAIL
2291
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2292
                        "Script %s failed, output:", script)
2293
          if test:
2294
            output = indent_re.sub('      ', output)
2295
            feedback_fn("%s" % output)
2296
            lu_result = 0
2297

    
2298
      return lu_result
2299

    
2300

    
2301
class LUVerifyDisks(NoHooksLU):
2302
  """Verifies the cluster disks status.
2303

2304
  """
2305
  REQ_BGL = False
2306

    
2307
  def ExpandNames(self):
2308
    self.needed_locks = {
2309
      locking.LEVEL_NODE: locking.ALL_SET,
2310
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2311
    }
2312
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2313

    
2314
  def Exec(self, feedback_fn):
2315
    """Verify integrity of cluster disks.
2316

2317
    @rtype: tuple of three items
2318
    @return: a tuple of (dict of node-to-node_error, list of instances
2319
        which need activate-disks, dict of instance: (node, volume) for
2320
        missing volumes
2321

2322
    """
2323
    result = res_nodes, res_instances, res_missing = {}, [], {}
2324

    
2325
    vg_name = self.cfg.GetVGName()
2326
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2327
    instances = [self.cfg.GetInstanceInfo(name)
2328
                 for name in self.cfg.GetInstanceList()]
2329

    
2330
    nv_dict = {}
2331
    for inst in instances:
2332
      inst_lvs = {}
2333
      if (not inst.admin_up or
2334
          inst.disk_template not in constants.DTS_NET_MIRROR):
2335
        continue
2336
      inst.MapLVsByNode(inst_lvs)
2337
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2338
      for node, vol_list in inst_lvs.iteritems():
2339
        for vol in vol_list:
2340
          nv_dict[(node, vol)] = inst
2341

    
2342
    if not nv_dict:
2343
      return result
2344

    
2345
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2346

    
2347
    for node in nodes:
2348
      # node_volume
2349
      node_res = node_lvs[node]
2350
      if node_res.offline:
2351
        continue
2352
      msg = node_res.fail_msg
2353
      if msg:
2354
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2355
        res_nodes[node] = msg
2356
        continue
2357

    
2358
      lvs = node_res.payload
2359
      for lv_name, (_, _, lv_online) in lvs.items():
2360
        inst = nv_dict.pop((node, lv_name), None)
2361
        if (not lv_online and inst is not None
2362
            and inst.name not in res_instances):
2363
          res_instances.append(inst.name)
2364

    
2365
    # any leftover items in nv_dict are missing LVs, let's arrange the
2366
    # data better
2367
    for key, inst in nv_dict.iteritems():
2368
      if inst.name not in res_missing:
2369
        res_missing[inst.name] = []
2370
      res_missing[inst.name].append(key)
2371

    
2372
    return result
2373

    
2374

    
2375
class LURepairDiskSizes(NoHooksLU):
2376
  """Verifies the cluster disks sizes.
2377

2378
  """
2379
  _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2380
  REQ_BGL = False
2381

    
2382
  def ExpandNames(self):
2383
    if self.op.instances:
2384
      self.wanted_names = []
2385
      for name in self.op.instances:
2386
        full_name = _ExpandInstanceName(self.cfg, name)
2387
        self.wanted_names.append(full_name)
2388
      self.needed_locks = {
2389
        locking.LEVEL_NODE: [],
2390
        locking.LEVEL_INSTANCE: self.wanted_names,
2391
        }
2392
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2393
    else:
2394
      self.wanted_names = None
2395
      self.needed_locks = {
2396
        locking.LEVEL_NODE: locking.ALL_SET,
2397
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2398
        }
2399
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2400

    
2401
  def DeclareLocks(self, level):
2402
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2403
      self._LockInstancesNodes(primary_only=True)
2404

    
2405
  def CheckPrereq(self):
2406
    """Check prerequisites.
2407

2408
    This only checks the optional instance list against the existing names.
2409

2410
    """
2411
    if self.wanted_names is None:
2412
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2413

    
2414
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2415
                             in self.wanted_names]
2416

    
2417
  def _EnsureChildSizes(self, disk):
2418
    """Ensure children of the disk have the needed disk size.
2419

2420
    This is valid mainly for DRBD8 and fixes an issue where the
2421
    children have smaller disk size.
2422

2423
    @param disk: an L{ganeti.objects.Disk} object
2424

2425
    """
2426
    if disk.dev_type == constants.LD_DRBD8:
2427
      assert disk.children, "Empty children for DRBD8?"
2428
      fchild = disk.children[0]
2429
      mismatch = fchild.size < disk.size
2430
      if mismatch:
2431
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2432
                     fchild.size, disk.size)
2433
        fchild.size = disk.size
2434

    
2435
      # and we recurse on this child only, not on the metadev
2436
      return self._EnsureChildSizes(fchild) or mismatch
2437
    else:
2438
      return False
2439

    
2440
  def Exec(self, feedback_fn):
2441
    """Verify the size of cluster disks.
2442

2443
    """
2444
    # TODO: check child disks too
2445
    # TODO: check differences in size between primary/secondary nodes
2446
    per_node_disks = {}
2447
    for instance in self.wanted_instances:
2448
      pnode = instance.primary_node
2449
      if pnode not in per_node_disks:
2450
        per_node_disks[pnode] = []
2451
      for idx, disk in enumerate(instance.disks):
2452
        per_node_disks[pnode].append((instance, idx, disk))
2453

    
2454
    changed = []
2455
    for node, dskl in per_node_disks.items():
2456
      newl = [v[2].Copy() for v in dskl]
2457
      for dsk in newl:
2458
        self.cfg.SetDiskID(dsk, node)
2459
      result = self.rpc.call_blockdev_getsizes(node, newl)
2460
      if result.fail_msg:
2461
        self.LogWarning("Failure in blockdev_getsizes call to node"
2462
                        " %s, ignoring", node)
2463
        continue
2464
      if len(result.data) != len(dskl):
2465
        self.LogWarning("Invalid result from node %s, ignoring node results",
2466
                        node)
2467
        continue
2468
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2469
        if size is None:
2470
          self.LogWarning("Disk %d of instance %s did not return size"
2471
                          " information, ignoring", idx, instance.name)
2472
          continue
2473
        if not isinstance(size, (int, long)):
2474
          self.LogWarning("Disk %d of instance %s did not return valid"
2475
                          " size information, ignoring", idx, instance.name)
2476
          continue
2477
        size = size >> 20
2478
        if size != disk.size:
2479
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2480
                       " correcting: recorded %d, actual %d", idx,
2481
                       instance.name, disk.size, size)
2482
          disk.size = size
2483
          self.cfg.Update(instance, feedback_fn)
2484
          changed.append((instance.name, idx, size))
2485
        if self._EnsureChildSizes(disk):
2486
          self.cfg.Update(instance, feedback_fn)
2487
          changed.append((instance.name, idx, disk.size))
2488
    return changed
2489

    
2490

    
2491
class LURenameCluster(LogicalUnit):
2492
  """Rename the cluster.
2493

2494
  """
2495
  HPATH = "cluster-rename"
2496
  HTYPE = constants.HTYPE_CLUSTER
2497
  _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2498

    
2499
  def BuildHooksEnv(self):
2500
    """Build hooks env.
2501

2502
    """
2503
    env = {
2504
      "OP_TARGET": self.cfg.GetClusterName(),
2505
      "NEW_NAME": self.op.name,
2506
      }
2507
    mn = self.cfg.GetMasterNode()
2508
    all_nodes = self.cfg.GetNodeList()
2509
    return env, [mn], all_nodes
2510

    
2511
  def CheckPrereq(self):
2512
    """Verify that the passed name is a valid one.
2513

2514
    """
2515
    hostname = netutils.GetHostInfo(self.op.name)
2516

    
2517
    new_name = hostname.name
2518
    self.ip = new_ip = hostname.ip
2519
    old_name = self.cfg.GetClusterName()
2520
    old_ip = self.cfg.GetMasterIP()
2521
    if new_name == old_name and new_ip == old_ip:
2522
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2523
                                 " cluster has changed",
2524
                                 errors.ECODE_INVAL)
2525
    if new_ip != old_ip:
2526
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2527
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2528
                                   " reachable on the network. Aborting." %
2529
                                   new_ip, errors.ECODE_NOTUNIQUE)
2530

    
2531
    self.op.name = new_name
2532

    
2533
  def Exec(self, feedback_fn):
2534
    """Rename the cluster.
2535

2536
    """
2537
    clustername = self.op.name
2538
    ip = self.ip
2539

    
2540
    # shutdown the master IP
2541
    master = self.cfg.GetMasterNode()
2542
    result = self.rpc.call_node_stop_master(master, False)
2543
    result.Raise("Could not disable the master role")
2544

    
2545
    try:
2546
      cluster = self.cfg.GetClusterInfo()
2547
      cluster.cluster_name = clustername
2548
      cluster.master_ip = ip
2549
      self.cfg.Update(cluster, feedback_fn)
2550

    
2551
      # update the known hosts file
2552
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2553
      node_list = self.cfg.GetNodeList()
2554
      try:
2555
        node_list.remove(master)
2556
      except ValueError:
2557
        pass
2558
      result = self.rpc.call_upload_file(node_list,
2559
                                         constants.SSH_KNOWN_HOSTS_FILE)
2560
      for to_node, to_result in result.iteritems():
2561
        msg = to_result.fail_msg
2562
        if msg:
2563
          msg = ("Copy of file %s to node %s failed: %s" %
2564
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2565
          self.proc.LogWarning(msg)
2566

    
2567
    finally:
2568
      result = self.rpc.call_node_start_master(master, False, False)
2569
      msg = result.fail_msg
2570
      if msg:
2571
        self.LogWarning("Could not re-enable the master role on"
2572
                        " the master, please restart manually: %s", msg)
2573

    
2574

    
2575
class LUSetClusterParams(LogicalUnit):
2576
  """Change the parameters of the cluster.
2577

2578
  """
2579
  HPATH = "cluster-modify"
2580
  HTYPE = constants.HTYPE_CLUSTER
2581
  _OP_PARAMS = [
2582
    ("vg_name", None, _TMaybeString),
2583
    ("enabled_hypervisors", None,
2584
     _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2585
    ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2586
    ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2587
    ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2588
    ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2589
    ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2590
    ("uid_pool", None, _NoType),
2591
    ("add_uids", None, _NoType),
2592
    ("remove_uids", None, _NoType),
2593
    ("maintain_node_health", None, _TMaybeBool),
2594
    ("nicparams", None, _TOr(_TDict, _TNone)),
2595
    ("drbd_helper", None, _TOr(_TString, _TNone)),
2596
    ("default_iallocator", None, _TMaybeString),
2597
    ]
2598
  REQ_BGL = False
2599

    
2600
  def CheckArguments(self):
2601
    """Check parameters
2602

2603
    """
2604
    if self.op.uid_pool:
2605
      uidpool.CheckUidPool(self.op.uid_pool)
2606

    
2607
    if self.op.add_uids:
2608
      uidpool.CheckUidPool(self.op.add_uids)
2609

    
2610
    if self.op.remove_uids:
2611
      uidpool.CheckUidPool(self.op.remove_uids)
2612

    
2613
  def ExpandNames(self):
2614
    # FIXME: in the future maybe other cluster params won't require checking on
2615
    # all nodes to be modified.
2616
    self.needed_locks = {
2617
      locking.LEVEL_NODE: locking.ALL_SET,
2618
    }
2619
    self.share_locks[locking.LEVEL_NODE] = 1
2620

    
2621
  def BuildHooksEnv(self):
2622
    """Build hooks env.
2623

2624
    """
2625
    env = {
2626
      "OP_TARGET": self.cfg.GetClusterName(),
2627
      "NEW_VG_NAME": self.op.vg_name,
2628
      }
2629
    mn = self.cfg.GetMasterNode()
2630
    return env, [mn], [mn]
2631

    
2632
  def CheckPrereq(self):
2633
    """Check prerequisites.
2634

2635
    This checks whether the given params don't conflict and
2636
    if the given volume group is valid.
2637

2638
    """
2639
    if self.op.vg_name is not None and not self.op.vg_name:
2640
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2641
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2642
                                   " instances exist", errors.ECODE_INVAL)
2643

    
2644
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2645
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2646
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2647
                                   " drbd-based instances exist",
2648
                                   errors.ECODE_INVAL)
2649

    
2650
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2651

    
2652
    # if vg_name not None, checks given volume group on all nodes
2653
    if self.op.vg_name:
2654
      vglist = self.rpc.call_vg_list(node_list)
2655
      for node in node_list:
2656
        msg = vglist[node].fail_msg
2657
        if msg:
2658
          # ignoring down node
2659
          self.LogWarning("Error while gathering data on node %s"
2660
                          " (ignoring node): %s", node, msg)
2661
          continue
2662
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2663
                                              self.op.vg_name,
2664
                                              constants.MIN_VG_SIZE)
2665
        if vgstatus:
2666
          raise errors.OpPrereqError("Error on node '%s': %s" %
2667
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2668

    
2669
    if self.op.drbd_helper:
2670
      # checks given drbd helper on all nodes
2671
      helpers = self.rpc.call_drbd_helper(node_list)
2672
      for node in node_list:
2673
        ninfo = self.cfg.GetNodeInfo(node)
2674
        if ninfo.offline:
2675
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2676
          continue
2677
        msg = helpers[node].fail_msg
2678
        if msg:
2679
          raise errors.OpPrereqError("Error checking drbd helper on node"
2680
                                     " '%s': %s" % (node, msg),
2681
                                     errors.ECODE_ENVIRON)
2682
        node_helper = helpers[node].payload
2683
        if node_helper != self.op.drbd_helper:
2684
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2685
                                     (node, node_helper), errors.ECODE_ENVIRON)
2686

    
2687
    self.cluster = cluster = self.cfg.GetClusterInfo()
2688
    # validate params changes
2689
    if self.op.beparams:
2690
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2691
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2692

    
2693
    if self.op.nicparams:
2694
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2695
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2696
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2697
      nic_errors = []
2698

    
2699
      # check all instances for consistency
2700
      for instance in self.cfg.GetAllInstancesInfo().values():
2701
        for nic_idx, nic in enumerate(instance.nics):
2702
          params_copy = copy.deepcopy(nic.nicparams)
2703
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2704

    
2705
          # check parameter syntax
2706
          try:
2707
            objects.NIC.CheckParameterSyntax(params_filled)
2708
          except errors.ConfigurationError, err:
2709
            nic_errors.append("Instance %s, nic/%d: %s" %
2710
                              (instance.name, nic_idx, err))
2711

    
2712
          # if we're moving instances to routed, check that they have an ip
2713
          target_mode = params_filled[constants.NIC_MODE]
2714
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2715
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2716
                              (instance.name, nic_idx))
2717
      if nic_errors:
2718
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2719
                                   "\n".join(nic_errors))
2720

    
2721
    # hypervisor list/parameters
2722
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2723
    if self.op.hvparams:
2724
      for hv_name, hv_dict in self.op.hvparams.items():
2725
        if hv_name not in self.new_hvparams:
2726
          self.new_hvparams[hv_name] = hv_dict
2727
        else:
2728
          self.new_hvparams[hv_name].update(hv_dict)
2729

    
2730
    # os hypervisor parameters
2731
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2732
    if self.op.os_hvp:
2733
      for os_name, hvs in self.op.os_hvp.items():
2734
        if os_name not in self.new_os_hvp:
2735
          self.new_os_hvp[os_name] = hvs
2736
        else:
2737
          for hv_name, hv_dict in hvs.items():
2738
            if hv_name not in self.new_os_hvp[os_name]:
2739
              self.new_os_hvp[os_name][hv_name] = hv_dict
2740
            else:
2741
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2742

    
2743
    # os parameters
2744
    self.new_osp = objects.FillDict(cluster.osparams, {})
2745
    if self.op.osparams:
2746
      for os_name, osp in self.op.osparams.items():
2747
        if os_name not in self.new_osp:
2748
          self.new_osp[os_name] = {}
2749

    
2750
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2751
                                                  use_none=True)
2752

    
2753
        if not self.new_osp[os_name]:
2754
          # we removed all parameters
2755
          del self.new_osp[os_name]
2756
        else:
2757
          # check the parameter validity (remote check)
2758
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2759
                         os_name, self.new_osp[os_name])
2760

    
2761
    # changes to the hypervisor list
2762
    if self.op.enabled_hypervisors is not None:
2763
      self.hv_list = self.op.enabled_hypervisors
2764
      for hv in self.hv_list:
2765
        # if the hypervisor doesn't already exist in the cluster
2766
        # hvparams, we initialize it to empty, and then (in both
2767
        # cases) we make sure to fill the defaults, as we might not
2768
        # have a complete defaults list if the hypervisor wasn't
2769
        # enabled before
2770
        if hv not in new_hvp:
2771
          new_hvp[hv] = {}
2772
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2773
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2774
    else:
2775
      self.hv_list = cluster.enabled_hypervisors
2776

    
2777
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2778
      # either the enabled list has changed, or the parameters have, validate
2779
      for hv_name, hv_params in self.new_hvparams.items():
2780
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2781
            (self.op.enabled_hypervisors and
2782
             hv_name in self.op.enabled_hypervisors)):
2783
          # either this is a new hypervisor, or its parameters have changed
2784
          hv_class = hypervisor.GetHypervisor(hv_name)
2785
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2786
          hv_class.CheckParameterSyntax(hv_params)
2787
          _CheckHVParams(self, node_list, hv_name, hv_params)
2788

    
2789
    if self.op.os_hvp:
2790
      # no need to check any newly-enabled hypervisors, since the
2791
      # defaults have already been checked in the above code-block
2792
      for os_name, os_hvp in self.new_os_hvp.items():
2793
        for hv_name, hv_params in os_hvp.items():
2794
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2795
          # we need to fill in the new os_hvp on top of the actual hv_p
2796
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2797
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2798
          hv_class = hypervisor.GetHypervisor(hv_name)
2799
          hv_class.CheckParameterSyntax(new_osp)
2800
          _CheckHVParams(self, node_list, hv_name, new_osp)
2801

    
2802
    if self.op.default_iallocator:
2803
      alloc_script = utils.FindFile(self.op.default_iallocator,
2804
                                    constants.IALLOCATOR_SEARCH_PATH,
2805
                                    os.path.isfile)
2806
      if alloc_script is None:
2807
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2808
                                   " specified" % self.op.default_iallocator,
2809
                                   errors.ECODE_INVAL)
2810

    
2811
  def Exec(self, feedback_fn):
2812
    """Change the parameters of the cluster.
2813

2814
    """
2815
    if self.op.vg_name is not None:
2816
      new_volume = self.op.vg_name
2817
      if not new_volume:
2818
        new_volume = None
2819
      if new_volume != self.cfg.GetVGName():
2820
        self.cfg.SetVGName(new_volume)
2821
      else:
2822
        feedback_fn("Cluster LVM configuration already in desired"
2823
                    " state, not changing")
2824
    if self.op.drbd_helper is not None:
2825
      new_helper = self.op.drbd_helper
2826
      if not new_helper:
2827
        new_helper = None
2828
      if new_helper != self.cfg.GetDRBDHelper():
2829
        self.cfg.SetDRBDHelper(new_helper)
2830
      else:
2831
        feedback_fn("Cluster DRBD helper already in desired state,"
2832
                    " not changing")
2833
    if self.op.hvparams:
2834
      self.cluster.hvparams = self.new_hvparams
2835
    if self.op.os_hvp:
2836
      self.cluster.os_hvp = self.new_os_hvp
2837
    if self.op.enabled_hypervisors is not None:
2838
      self.cluster.hvparams = self.new_hvparams
2839
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2840
    if self.op.beparams:
2841
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2842
    if self.op.nicparams:
2843
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2844
    if self.op.osparams:
2845
      self.cluster.osparams = self.new_osp
2846

    
2847
    if self.op.candidate_pool_size is not None:
2848
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2849
      # we need to update the pool size here, otherwise the save will fail
2850
      _AdjustCandidatePool(self, [])
2851

    
2852
    if self.op.maintain_node_health is not None:
2853
      self.cluster.maintain_node_health = self.op.maintain_node_health
2854

    
2855
    if self.op.add_uids is not None:
2856
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2857

    
2858
    if self.op.remove_uids is not None:
2859
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2860

    
2861
    if self.op.uid_pool is not None:
2862
      self.cluster.uid_pool = self.op.uid_pool
2863

    
2864
    if self.op.default_iallocator is not None:
2865
      self.cluster.default_iallocator = self.op.default_iallocator
2866

    
2867
    self.cfg.Update(self.cluster, feedback_fn)
2868

    
2869

    
2870
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2871
  """Distribute additional files which are part of the cluster configuration.
2872

2873
  ConfigWriter takes care of distributing the config and ssconf files, but
2874
  there are more files which should be distributed to all nodes. This function
2875
  makes sure those are copied.
2876

2877
  @param lu: calling logical unit
2878
  @param additional_nodes: list of nodes not in the config to distribute to
2879

2880
  """
2881
  # 1. Gather target nodes
2882
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2883
  dist_nodes = lu.cfg.GetOnlineNodeList()
2884
  if additional_nodes is not None:
2885
    dist_nodes.extend(additional_nodes)
2886
  if myself.name in dist_nodes:
2887
    dist_nodes.remove(myself.name)
2888

    
2889
  # 2. Gather files to distribute
2890
  dist_files = set([constants.ETC_HOSTS,
2891
                    constants.SSH_KNOWN_HOSTS_FILE,
2892
                    constants.RAPI_CERT_FILE,
2893
                    constants.RAPI_USERS_FILE,
2894
                    constants.CONFD_HMAC_KEY,
2895
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2896
                   ])
2897

    
2898
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2899
  for hv_name in enabled_hypervisors:
2900
    hv_class = hypervisor.GetHypervisor(hv_name)
2901
    dist_files.update(hv_class.GetAncillaryFiles())
2902

    
2903
  # 3. Perform the files upload
2904
  for fname in dist_files:
2905
    if os.path.exists(fname):
2906
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2907
      for to_node, to_result in result.items():
2908
        msg = to_result.fail_msg
2909
        if msg:
2910
          msg = ("Copy of file %s to node %s failed: %s" %
2911
                 (fname, to_node, msg))
2912
          lu.proc.LogWarning(msg)
2913

    
2914

    
2915
class LURedistributeConfig(NoHooksLU):
2916
  """Force the redistribution of cluster configuration.
2917

2918
  This is a very simple LU.
2919

2920
  """
2921
  REQ_BGL = False
2922

    
2923
  def ExpandNames(self):
2924
    self.needed_locks = {
2925
      locking.LEVEL_NODE: locking.ALL_SET,
2926
    }
2927
    self.share_locks[locking.LEVEL_NODE] = 1
2928

    
2929
  def Exec(self, feedback_fn):
2930
    """Redistribute the configuration.
2931

2932
    """
2933
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2934
    _RedistributeAncillaryFiles(self)
2935

    
2936

    
2937
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2938
  """Sleep and poll for an instance's disk to sync.
2939

2940
  """
2941
  if not instance.disks or disks is not None and not disks:
2942
    return True
2943

    
2944
  disks = _ExpandCheckDisks(instance, disks)
2945

    
2946
  if not oneshot:
2947
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2948

    
2949
  node = instance.primary_node
2950

    
2951
  for dev in disks:
2952
    lu.cfg.SetDiskID(dev, node)
2953

    
2954
  # TODO: Convert to utils.Retry
2955

    
2956
  retries = 0
2957
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2958
  while True:
2959
    max_time = 0
2960
    done = True
2961
    cumul_degraded = False
2962
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2963
    msg = rstats.fail_msg
2964
    if msg:
2965
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2966
      retries += 1
2967
      if retries >= 10:
2968
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2969
                                 " aborting." % node)
2970
      time.sleep(6)
2971
      continue
2972
    rstats = rstats.payload
2973
    retries = 0
2974
    for i, mstat in enumerate(rstats):
2975
      if mstat is None:
2976
        lu.LogWarning("Can't compute data for node %s/%s",
2977
                           node, disks[i].iv_name)
2978
        continue
2979

    
2980
      cumul_degraded = (cumul_degraded or
2981
                        (mstat.is_degraded and mstat.sync_percent is None))
2982
      if mstat.sync_percent is not None:
2983
        done = False
2984
        if mstat.estimated_time is not None:
2985
          rem_time = ("%s remaining (estimated)" %
2986
                      utils.FormatSeconds(mstat.estimated_time))
2987
          max_time = mstat.estimated_time
2988
        else:
2989
          rem_time = "no time estimate"
2990
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2991
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
2992

    
2993
    # if we're done but degraded, let's do a few small retries, to
2994
    # make sure we see a stable and not transient situation; therefore
2995
    # we force restart of the loop
2996
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2997
      logging.info("Degraded disks found, %d retries left", degr_retries)
2998
      degr_retries -= 1
2999
      time.sleep(1)
3000
      continue
3001

    
3002
    if done or oneshot:
3003
      break
3004

    
3005
    time.sleep(min(60, max_time))
3006

    
3007
  if done:
3008
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3009
  return not cumul_degraded
3010

    
3011

    
3012
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3013
  """Check that mirrors are not degraded.
3014

3015
  The ldisk parameter, if True, will change the test from the
3016
  is_degraded attribute (which represents overall non-ok status for
3017
  the device(s)) to the ldisk (representing the local storage status).
3018

3019
  """
3020
  lu.cfg.SetDiskID(dev, node)
3021

    
3022
  result = True
3023

    
3024
  if on_primary or dev.AssembleOnSecondary():
3025
    rstats = lu.rpc.call_blockdev_find(node, dev)
3026
    msg = rstats.fail_msg
3027
    if msg:
3028
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3029
      result = False
3030
    elif not rstats.payload:
3031
      lu.LogWarning("Can't find disk on node %s", node)
3032
      result = False
3033
    else:
3034
      if ldisk:
3035
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3036
      else:
3037
        result = result and not rstats.payload.is_degraded
3038

    
3039
  if dev.children:
3040
    for child in dev.children:
3041
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3042

    
3043
  return result
3044

    
3045

    
3046
class LUDiagnoseOS(NoHooksLU):
3047
  """Logical unit for OS diagnose/query.
3048

3049
  """
3050
  _OP_PARAMS = [
3051
    _POutputFields,
3052
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3053
    ]
3054
  REQ_BGL = False
3055
  _FIELDS_STATIC = utils.FieldSet()
3056
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
3057
                                   "parameters", "api_versions")
3058

    
3059
  def CheckArguments(self):
3060
    if self.op.names:
3061
      raise errors.OpPrereqError("Selective OS query not supported",
3062
                                 errors.ECODE_INVAL)
3063

    
3064
    _CheckOutputFields(static=self._FIELDS_STATIC,
3065
                       dynamic=self._FIELDS_DYNAMIC,
3066
                       selected=self.op.output_fields)
3067

    
3068
  def ExpandNames(self):
3069
    # Lock all nodes, in shared mode
3070
    # Temporary removal of locks, should be reverted later
3071
    # TODO: reintroduce locks when they are lighter-weight
3072
    self.needed_locks = {}
3073
    #self.share_locks[locking.LEVEL_NODE] = 1
3074
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3075

    
3076
  @staticmethod
3077
  def _DiagnoseByOS(rlist):
3078
    """Remaps a per-node return list into an a per-os per-node dictionary
3079

3080
    @param rlist: a map with node names as keys and OS objects as values
3081

3082
    @rtype: dict
3083
    @return: a dictionary with osnames as keys and as value another
3084
        map, with nodes as keys and tuples of (path, status, diagnose,
3085
        variants, parameters, api_versions) as values, eg::
3086

3087
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3088
                                     (/srv/..., False, "invalid api")],
3089
                           "node2": [(/srv/..., True, "", [], [])]}
3090
          }
3091

3092
    """
3093
    all_os = {}
3094
    # we build here the list of nodes that didn't fail the RPC (at RPC
3095
    # level), so that nodes with a non-responding node daemon don't
3096
    # make all OSes invalid
3097
    good_nodes = [node_name for node_name in rlist
3098
                  if not rlist[node_name].fail_msg]
3099
    for node_name, nr in rlist.items():
3100
      if nr.fail_msg or not nr.payload:
3101
        continue
3102
      for (name, path, status, diagnose, variants,
3103
           params, api_versions) in nr.payload:
3104
        if name not in all_os:
3105
          # build a list of nodes for this os containing empty lists
3106
          # for each node in node_list
3107
          all_os[name] = {}
3108
          for nname in good_nodes:
3109
            all_os[name][nname] = []
3110
        # convert params from [name, help] to (name, help)
3111
        params = [tuple(v) for v in params]
3112
        all_os[name][node_name].append((path, status, diagnose,
3113
                                        variants, params, api_versions))
3114
    return all_os
3115

    
3116
  def Exec(self, feedback_fn):
3117
    """Compute the list of OSes.
3118

3119
    """
3120
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3121
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3122
    pol = self._DiagnoseByOS(node_data)
3123
    output = []
3124

    
3125
    for os_name, os_data in pol.items():
3126
      row = []
3127
      valid = True
3128
      (variants, params, api_versions) = null_state = (set(), set(), set())
3129
      for idx, osl in enumerate(os_data.values()):
3130
        valid = bool(valid and osl and osl[0][1])
3131
        if not valid:
3132
          (variants, params, api_versions) = null_state
3133
          break
3134
        node_variants, node_params, node_api = osl[0][3:6]
3135
        if idx == 0: # first entry
3136
          variants = set(node_variants)
3137
          params = set(node_params)
3138
          api_versions = set(node_api)
3139
        else: # keep consistency
3140
          variants.intersection_update(node_variants)
3141
          params.intersection_update(node_params)
3142
          api_versions.intersection_update(node_api)
3143

    
3144
      for field in self.op.output_fields:
3145
        if field == "name":
3146
          val = os_name
3147
        elif field == "valid":
3148
          val = valid
3149
        elif field == "node_status":
3150
          # this is just a copy of the dict
3151
          val = {}
3152
          for node_name, nos_list in os_data.items():
3153
            val[node_name] = nos_list
3154
        elif field == "variants":
3155
          val = list(variants)
3156
        elif field == "parameters":
3157
          val = list(params)
3158
        elif field == "api_versions":
3159
          val = list(api_versions)
3160
        else:
3161
          raise errors.ParameterError(field)
3162
        row.append(val)
3163
      output.append(row)
3164

    
3165
    return output
3166

    
3167

    
3168
class LURemoveNode(LogicalUnit):
3169
  """Logical unit for removing a node.
3170

3171
  """
3172
  HPATH = "node-remove"
3173
  HTYPE = constants.HTYPE_NODE
3174
  _OP_PARAMS = [
3175
    _PNodeName,
3176
    ]
3177

    
3178
  def BuildHooksEnv(self):
3179
    """Build hooks env.
3180

3181
    This doesn't run on the target node in the pre phase as a failed
3182
    node would then be impossible to remove.
3183

3184
    """
3185
    env = {
3186
      "OP_TARGET": self.op.node_name,
3187
      "NODE_NAME": self.op.node_name,
3188
      }
3189
    all_nodes = self.cfg.GetNodeList()
3190
    try:
3191
      all_nodes.remove(self.op.node_name)
3192
    except ValueError:
3193
      logging.warning("Node %s which is about to be removed not found"
3194
                      " in the all nodes list", self.op.node_name)
3195
    return env, all_nodes, all_nodes
3196

    
3197
  def CheckPrereq(self):
3198
    """Check prerequisites.
3199

3200
    This checks:
3201
     - the node exists in the configuration
3202
     - it does not have primary or secondary instances
3203
     - it's not the master
3204

3205
    Any errors are signaled by raising errors.OpPrereqError.
3206

3207
    """
3208
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3209
    node = self.cfg.GetNodeInfo(self.op.node_name)
3210
    assert node is not None
3211

    
3212
    instance_list = self.cfg.GetInstanceList()
3213

    
3214
    masternode = self.cfg.GetMasterNode()
3215
    if node.name == masternode:
3216
      raise errors.OpPrereqError("Node is the master node,"
3217
                                 " you need to failover first.",
3218
                                 errors.ECODE_INVAL)
3219

    
3220
    for instance_name in instance_list:
3221
      instance = self.cfg.GetInstanceInfo(instance_name)
3222
      if node.name in instance.all_nodes:
3223
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3224
                                   " please remove first." % instance_name,
3225
                                   errors.ECODE_INVAL)
3226
    self.op.node_name = node.name
3227
    self.node = node
3228

    
3229
  def Exec(self, feedback_fn):
3230
    """Removes the node from the cluster.
3231

3232
    """
3233
    node = self.node
3234
    logging.info("Stopping the node daemon and removing configs from node %s",
3235
                 node.name)
3236

    
3237
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3238

    
3239
    # Promote nodes to master candidate as needed
3240
    _AdjustCandidatePool(self, exceptions=[node.name])
3241
    self.context.RemoveNode(node.name)
3242

    
3243
    # Run post hooks on the node before it's removed
3244
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3245
    try:
3246
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3247
    except:
3248
      # pylint: disable-msg=W0702
3249
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3250

    
3251
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3252
    msg = result.fail_msg
3253
    if msg:
3254
      self.LogWarning("Errors encountered on the remote node while leaving"
3255
                      " the cluster: %s", msg)
3256

    
3257
    # Remove node from our /etc/hosts
3258
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3259
      # FIXME: this should be done via an rpc call to node daemon
3260
      utils.RemoveHostFromEtcHosts(node.name)
3261
      _RedistributeAncillaryFiles(self)
3262

    
3263

    
3264
class LUQueryNodes(NoHooksLU):
3265
  """Logical unit for querying nodes.
3266

3267
  """
3268
  # pylint: disable-msg=W0142
3269
  _OP_PARAMS = [
3270
    _POutputFields,
3271
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3272
    ("use_locking", False, _TBool),
3273
    ]
3274
  REQ_BGL = False
3275

    
3276
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3277
                    "master_candidate", "offline", "drained"]
3278

    
3279
  _FIELDS_DYNAMIC = utils.FieldSet(
3280
    "dtotal", "dfree",
3281
    "mtotal", "mnode", "mfree",
3282
    "bootid",
3283
    "ctotal", "cnodes", "csockets",
3284
    )
3285

    
3286
  _FIELDS_STATIC = utils.FieldSet(*[
3287
    "pinst_cnt", "sinst_cnt",
3288
    "pinst_list", "sinst_list",
3289
    "pip", "sip", "tags",
3290
    "master",
3291
    "role"] + _SIMPLE_FIELDS
3292
    )
3293

    
3294
  def CheckArguments(self):
3295
    _CheckOutputFields(static=self._FIELDS_STATIC,
3296
                       dynamic=self._FIELDS_DYNAMIC,
3297
                       selected=self.op.output_fields)
3298

    
3299
  def ExpandNames(self):
3300
    self.needed_locks = {}
3301
    self.share_locks[locking.LEVEL_NODE] = 1
3302

    
3303
    if self.op.names:
3304
      self.wanted = _GetWantedNodes(self, self.op.names)
3305
    else:
3306
      self.wanted = locking.ALL_SET
3307

    
3308
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3309
    self.do_locking = self.do_node_query and self.op.use_locking
3310
    if self.do_locking:
3311
      # if we don't request only static fields, we need to lock the nodes
3312
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
3313

    
3314
  def Exec(self, feedback_fn):
3315
    """Computes the list of nodes and their attributes.
3316

3317
    """
3318
    all_info = self.cfg.GetAllNodesInfo()
3319
    if self.do_locking:
3320
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
3321
    elif self.wanted != locking.ALL_SET:
3322
      nodenames = self.wanted
3323
      missing = set(nodenames).difference(all_info.keys())
3324
      if missing:
3325
        raise errors.OpExecError(
3326
          "Some nodes were removed before retrieving their data: %s" % missing)
3327
    else:
3328
      nodenames = all_info.keys()
3329

    
3330
    nodenames = utils.NiceSort(nodenames)
3331
    nodelist = [all_info[name] for name in nodenames]
3332

    
3333
    # begin data gathering
3334

    
3335
    if self.do_node_query:
3336
      live_data = {}
3337
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3338
                                          self.cfg.GetHypervisorType())
3339
      for name in nodenames:
3340
        nodeinfo = node_data[name]
3341
        if not nodeinfo.fail_msg and nodeinfo.payload:
3342
          nodeinfo = nodeinfo.payload
3343
          fn = utils.TryConvert
3344
          live_data[name] = {
3345
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3346
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3347
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
3348
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3349
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3350
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3351
            "bootid": nodeinfo.get('bootid', None),
3352
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3353
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3354
            }
3355
        else:
3356
          live_data[name] = {}
3357
    else:
3358
      live_data = dict.fromkeys(nodenames, {})
3359

    
3360
    node_to_primary = dict([(name, set()) for name in nodenames])
3361
    node_to_secondary = dict([(name, set()) for name in nodenames])
3362

    
3363
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3364
                             "sinst_cnt", "sinst_list"))
3365
    if inst_fields & frozenset(self.op.output_fields):
3366
      inst_data = self.cfg.GetAllInstancesInfo()
3367

    
3368
      for inst in inst_data.values():
3369
        if inst.primary_node in node_to_primary:
3370
          node_to_primary[inst.primary_node].add(inst.name)
3371
        for secnode in inst.secondary_nodes:
3372
          if secnode in node_to_secondary:
3373
            node_to_secondary[secnode].add(inst.name)
3374

    
3375
    master_node = self.cfg.GetMasterNode()
3376

    
3377
    # end data gathering
3378

    
3379
    output = []
3380
    for node in nodelist:
3381
      node_output = []
3382
      for field in self.op.output_fields:
3383
        if field in self._SIMPLE_FIELDS:
3384
          val = getattr(node, field)
3385
        elif field == "pinst_list":
3386
          val = list(node_to_primary[node.name])
3387
        elif field == "sinst_list":
3388
          val = list(node_to_secondary[node.name])
3389
        elif field == "pinst_cnt":
3390
          val = len(node_to_primary[node.name])
3391
        elif field == "sinst_cnt":
3392
          val = len(node_to_secondary[node.name])
3393
        elif field == "pip":
3394
          val = node.primary_ip
3395
        elif field == "sip":
3396
          val = node.secondary_ip
3397
        elif field == "tags":
3398
          val = list(node.GetTags())
3399
        elif field == "master":
3400
          val = node.name == master_node
3401
        elif self._FIELDS_DYNAMIC.Matches(field):
3402
          val = live_data[node.name].get(field, None)
3403
        elif field == "role":
3404
          if node.name == master_node:
3405
            val = "M"
3406
          elif node.master_candidate:
3407
            val = "C"
3408
          elif node.drained:
3409
            val = "D"
3410
          elif node.offline:
3411
            val = "O"
3412
          else:
3413
            val = "R"
3414
        else:
3415
          raise errors.ParameterError(field)
3416
        node_output.append(val)
3417
      output.append(node_output)
3418

    
3419
    return output
3420

    
3421

    
3422
class LUQueryNodeVolumes(NoHooksLU):
3423
  """Logical unit for getting volumes on node(s).
3424

3425
  """
3426
  _OP_PARAMS = [
3427
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3428
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3429
    ]
3430
  REQ_BGL = False
3431
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3432
  _FIELDS_STATIC = utils.FieldSet("node")
3433

    
3434
  def CheckArguments(self):
3435
    _CheckOutputFields(static=self._FIELDS_STATIC,
3436
                       dynamic=self._FIELDS_DYNAMIC,
3437
                       selected=self.op.output_fields)
3438

    
3439
  def ExpandNames(self):
3440
    self.needed_locks = {}
3441
    self.share_locks[locking.LEVEL_NODE] = 1
3442
    if not self.op.nodes:
3443
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3444
    else:
3445
      self.needed_locks[locking.LEVEL_NODE] = \
3446
        _GetWantedNodes(self, self.op.nodes)
3447

    
3448
  def Exec(self, feedback_fn):
3449
    """Computes the list of nodes and their attributes.
3450

3451
    """
3452
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3453
    volumes = self.rpc.call_node_volumes(nodenames)
3454

    
3455
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3456
             in self.cfg.GetInstanceList()]
3457

    
3458
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3459

    
3460
    output = []
3461
    for node in nodenames:
3462
      nresult = volumes[node]
3463
      if nresult.offline:
3464
        continue
3465
      msg = nresult.fail_msg
3466
      if msg:
3467
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3468
        continue
3469

    
3470
      node_vols = nresult.payload[:]
3471
      node_vols.sort(key=lambda vol: vol['dev'])
3472

    
3473
      for vol in node_vols:
3474
        node_output = []
3475
        for field in self.op.output_fields:
3476
          if field == "node":
3477
            val = node
3478
          elif field == "phys":
3479
            val = vol['dev']
3480
          elif field == "vg":
3481
            val = vol['vg']
3482
          elif field == "name":
3483
            val = vol['name']
3484
          elif field == "size":
3485
            val = int(float(vol['size']))
3486
          elif field == "instance":
3487
            for inst in ilist:
3488
              if node not in lv_by_node[inst]:
3489
                continue
3490
              if vol['name'] in lv_by_node[inst][node]:
3491
                val = inst.name
3492
                break
3493
            else:
3494
              val = '-'
3495
          else:
3496
            raise errors.ParameterError(field)
3497
          node_output.append(str(val))
3498

    
3499
        output.append(node_output)
3500

    
3501
    return output
3502

    
3503

    
3504
class LUQueryNodeStorage(NoHooksLU):
3505
  """Logical unit for getting information on storage units on node(s).
3506

3507
  """
3508
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3509
  _OP_PARAMS = [
3510
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3511
    ("storage_type", _NoDefault, _CheckStorageType),
3512
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3513
    ("name", None, _TMaybeString),
3514
    ]
3515
  REQ_BGL = False
3516

    
3517
  def CheckArguments(self):
3518
    _CheckOutputFields(static=self._FIELDS_STATIC,
3519
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3520
                       selected=self.op.output_fields)
3521

    
3522
  def ExpandNames(self):
3523
    self.needed_locks = {}
3524
    self.share_locks[locking.LEVEL_NODE] = 1
3525

    
3526
    if self.op.nodes:
3527
      self.needed_locks[locking.LEVEL_NODE] = \
3528
        _GetWantedNodes(self, self.op.nodes)
3529
    else:
3530
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3531

    
3532
  def Exec(self, feedback_fn):
3533
    """Computes the list of nodes and their attributes.
3534

3535
    """
3536
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3537

    
3538
    # Always get name to sort by
3539
    if constants.SF_NAME in self.op.output_fields:
3540
      fields = self.op.output_fields[:]
3541
    else:
3542
      fields = [constants.SF_NAME] + self.op.output_fields
3543

    
3544
    # Never ask for node or type as it's only known to the LU
3545
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3546
      while extra in fields:
3547
        fields.remove(extra)
3548

    
3549
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3550
    name_idx = field_idx[constants.SF_NAME]
3551

    
3552
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3553
    data = self.rpc.call_storage_list(self.nodes,
3554
                                      self.op.storage_type, st_args,
3555
                                      self.op.name, fields)
3556

    
3557
    result = []
3558

    
3559
    for node in utils.NiceSort(self.nodes):
3560
      nresult = data[node]
3561
      if nresult.offline:
3562
        continue
3563

    
3564
      msg = nresult.fail_msg
3565
      if msg:
3566
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3567
        continue
3568

    
3569
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3570

    
3571
      for name in utils.NiceSort(rows.keys()):
3572
        row = rows[name]
3573

    
3574
        out = []
3575

    
3576
        for field in self.op.output_fields:
3577
          if field == constants.SF_NODE:
3578
            val = node
3579
          elif field == constants.SF_TYPE:
3580
            val = self.op.storage_type
3581
          elif field in field_idx:
3582
            val = row[field_idx[field]]
3583
          else:
3584
            raise errors.ParameterError(field)
3585

    
3586
          out.append(val)
3587

    
3588
        result.append(out)
3589

    
3590
    return result
3591

    
3592

    
3593
class LUModifyNodeStorage(NoHooksLU):
3594
  """Logical unit for modifying a storage volume on a node.
3595

3596
  """
3597
  _OP_PARAMS = [
3598
    _PNodeName,
3599
    ("storage_type", _NoDefault, _CheckStorageType),
3600
    ("name", _NoDefault, _TNonEmptyString),
3601
    ("changes", _NoDefault, _TDict),
3602
    ]
3603
  REQ_BGL = False
3604

    
3605
  def CheckArguments(self):
3606
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3607

    
3608
    storage_type = self.op.storage_type
3609

    
3610
    try:
3611
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3612
    except KeyError:
3613
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3614
                                 " modified" % storage_type,
3615
                                 errors.ECODE_INVAL)
3616

    
3617
    diff = set(self.op.changes.keys()) - modifiable
3618
    if diff:
3619
      raise errors.OpPrereqError("The following fields can not be modified for"
3620
                                 " storage units of type '%s': %r" %
3621
                                 (storage_type, list(diff)),
3622
                                 errors.ECODE_INVAL)
3623

    
3624
  def ExpandNames(self):
3625
    self.needed_locks = {
3626
      locking.LEVEL_NODE: self.op.node_name,
3627
      }
3628

    
3629
  def Exec(self, feedback_fn):
3630
    """Computes the list of nodes and their attributes.
3631

3632
    """
3633
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3634
    result = self.rpc.call_storage_modify(self.op.node_name,
3635
                                          self.op.storage_type, st_args,
3636
                                          self.op.name, self.op.changes)
3637
    result.Raise("Failed to modify storage unit '%s' on %s" %
3638
                 (self.op.name, self.op.node_name))
3639

    
3640

    
3641
class LUAddNode(LogicalUnit):
3642
  """Logical unit for adding node to the cluster.
3643

3644
  """
3645
  HPATH = "node-add"
3646
  HTYPE = constants.HTYPE_NODE
3647
  _OP_PARAMS = [
3648
    _PNodeName,
3649
    ("primary_ip", None, _NoType),
3650
    ("secondary_ip", None, _TMaybeString),
3651
    ("readd", False, _TBool),
3652
    ]
3653

    
3654
  def CheckArguments(self):
3655
    # validate/normalize the node name
3656
    self.op.node_name = netutils.HostInfo.NormalizeName(self.op.node_name)
3657

    
3658
  def BuildHooksEnv(self):
3659
    """Build hooks env.
3660

3661
    This will run on all nodes before, and on all nodes + the new node after.
3662

3663
    """
3664
    env = {
3665
      "OP_TARGET": self.op.node_name,
3666
      "NODE_NAME": self.op.node_name,
3667
      "NODE_PIP": self.op.primary_ip,
3668
      "NODE_SIP": self.op.secondary_ip,
3669
      }
3670
    nodes_0 = self.cfg.GetNodeList()
3671
    nodes_1 = nodes_0 + [self.op.node_name, ]
3672
    return env, nodes_0, nodes_1
3673

    
3674
  def CheckPrereq(self):
3675
    """Check prerequisites.
3676

3677
    This checks:
3678
     - the new node is not already in the config
3679
     - it is resolvable
3680
     - its parameters (single/dual homed) matches the cluster
3681

3682
    Any errors are signaled by raising errors.OpPrereqError.
3683

3684
    """
3685
    node_name = self.op.node_name
3686
    cfg = self.cfg
3687

    
3688
    dns_data = netutils.GetHostInfo(node_name)
3689

    
3690
    node = dns_data.name
3691
    primary_ip = self.op.primary_ip = dns_data.ip
3692
    if self.op.secondary_ip is None:
3693
      self.op.secondary_ip = primary_ip
3694
    if not netutils.IsValidIP4(self.op.secondary_ip):
3695
      raise errors.OpPrereqError("Invalid secondary IP given",
3696
                                 errors.ECODE_INVAL)
3697
    secondary_ip = self.op.secondary_ip
3698

    
3699
    node_list = cfg.GetNodeList()
3700
    if not self.op.readd and node in node_list:
3701
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3702
                                 node, errors.ECODE_EXISTS)
3703
    elif self.op.readd and node not in node_list:
3704
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3705
                                 errors.ECODE_NOENT)
3706

    
3707
    self.changed_primary_ip = False
3708

    
3709
    for existing_node_name in node_list:
3710
      existing_node = cfg.GetNodeInfo(existing_node_name)
3711

    
3712
      if self.op.readd and node == existing_node_name:
3713
        if existing_node.secondary_ip != secondary_ip:
3714
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3715
                                     " address configuration as before",
3716
                                     errors.ECODE_INVAL)
3717
        if existing_node.primary_ip != primary_ip:
3718
          self.changed_primary_ip = True
3719

    
3720
        continue
3721

    
3722
      if (existing_node.primary_ip == primary_ip or
3723
          existing_node.secondary_ip == primary_ip or
3724
          existing_node.primary_ip == secondary_ip or
3725
          existing_node.secondary_ip == secondary_ip):
3726
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3727
                                   " existing node %s" % existing_node.name,
3728
                                   errors.ECODE_NOTUNIQUE)
3729

    
3730
    # check that the type of the node (single versus dual homed) is the
3731
    # same as for the master
3732
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3733
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3734
    newbie_singlehomed = secondary_ip == primary_ip
3735
    if master_singlehomed != newbie_singlehomed:
3736
      if master_singlehomed:
3737
        raise errors.OpPrereqError("The master has no private ip but the"
3738
                                   " new node has one",
3739
                                   errors.ECODE_INVAL)
3740
      else:
3741
        raise errors.OpPrereqError("The master has a private ip but the"
3742
                                   " new node doesn't have one",
3743
                                   errors.ECODE_INVAL)
3744

    
3745
    # checks reachability
3746
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3747
      raise errors.OpPrereqError("Node not reachable by ping",
3748
                                 errors.ECODE_ENVIRON)
3749

    
3750
    if not newbie_singlehomed:
3751
      # check reachability from my secondary ip to newbie's secondary ip
3752
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3753
                           source=myself.secondary_ip):
3754
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3755
                                   " based ping to noded port",
3756
                                   errors.ECODE_ENVIRON)
3757

    
3758
    if self.op.readd:
3759
      exceptions = [node]
3760
    else:
3761
      exceptions = []
3762

    
3763
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3764

    
3765
    if self.op.readd:
3766
      self.new_node = self.cfg.GetNodeInfo(node)
3767
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3768
    else:
3769
      self.new_node = objects.Node(name=node,
3770
                                   primary_ip=primary_ip,
3771
                                   secondary_ip=secondary_ip,
3772
                                   master_candidate=self.master_candidate,
3773
                                   offline=False, drained=False)
3774

    
3775
  def Exec(self, feedback_fn):
3776
    """Adds the new node to the cluster.
3777

3778
    """
3779
    new_node = self.new_node
3780
    node = new_node.name
3781

    
3782
    # for re-adds, reset the offline/drained/master-candidate flags;
3783
    # we need to reset here, otherwise offline would prevent RPC calls
3784
    # later in the procedure; this also means that if the re-add
3785
    # fails, we are left with a non-offlined, broken node
3786
    if self.op.readd:
3787
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3788
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3789
      # if we demote the node, we do cleanup later in the procedure
3790
      new_node.master_candidate = self.master_candidate
3791
      if self.changed_primary_ip:
3792
        new_node.primary_ip = self.op.primary_ip
3793

    
3794
    # notify the user about any possible mc promotion
3795
    if new_node.master_candidate:
3796
      self.LogInfo("Node will be a master candidate")
3797

    
3798
    # check connectivity
3799
    result = self.rpc.call_version([node])[node]
3800
    result.Raise("Can't get version information from node %s" % node)
3801
    if constants.PROTOCOL_VERSION == result.payload:
3802
      logging.info("Communication to node %s fine, sw version %s match",
3803
                   node, result.payload)
3804
    else:
3805
      raise errors.OpExecError("Version mismatch master version %s,"
3806
                               " node version %s" %
3807
                               (constants.PROTOCOL_VERSION, result.payload))
3808

    
3809
    # setup ssh on node
3810
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3811
      logging.info("Copy ssh key to node %s", node)
3812
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3813
      keyarray = []
3814
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3815
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3816
                  priv_key, pub_key]
3817

    
3818
      for i in keyfiles:
3819
        keyarray.append(utils.ReadFile(i))
3820

    
3821
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3822
                                      keyarray[2], keyarray[3], keyarray[4],
3823
                                      keyarray[5])
3824
      result.Raise("Cannot transfer ssh keys to the new node")
3825

    
3826
    # Add node to our /etc/hosts, and add key to known_hosts
3827
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3828
      # FIXME: this should be done via an rpc call to node daemon
3829
      utils.AddHostToEtcHosts(new_node.name)
3830

    
3831
    if new_node.secondary_ip != new_node.primary_ip:
3832
      result = self.rpc.call_node_has_ip_address(new_node.name,
3833
                                                 new_node.secondary_ip)
3834
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3835
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3836
      if not result.payload:
3837
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3838
                                 " you gave (%s). Please fix and re-run this"
3839
                                 " command." % new_node.secondary_ip)
3840

    
3841
    node_verify_list = [self.cfg.GetMasterNode()]
3842
    node_verify_param = {
3843
      constants.NV_NODELIST: [node],
3844
      # TODO: do a node-net-test as well?
3845
    }
3846

    
3847
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3848
                                       self.cfg.GetClusterName())
3849
    for verifier in node_verify_list:
3850
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3851
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3852
      if nl_payload:
3853
        for failed in nl_payload:
3854
          feedback_fn("ssh/hostname verification failed"
3855
                      " (checking from %s): %s" %
3856
                      (verifier, nl_payload[failed]))
3857
        raise errors.OpExecError("ssh/hostname verification failed.")
3858

    
3859
    if self.op.readd:
3860
      _RedistributeAncillaryFiles(self)
3861
      self.context.ReaddNode(new_node)
3862
      # make sure we redistribute the config
3863
      self.cfg.Update(new_node, feedback_fn)
3864
      # and make sure the new node will not have old files around
3865
      if not new_node.master_candidate:
3866
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3867
        msg = result.fail_msg
3868
        if msg:
3869
          self.LogWarning("Node failed to demote itself from master"
3870
                          " candidate status: %s" % msg)
3871
    else:
3872
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3873
      self.context.AddNode(new_node, self.proc.GetECId())
3874

    
3875

    
3876
class LUSetNodeParams(LogicalUnit):
3877
  """Modifies the parameters of a node.
3878

3879
  """
3880
  HPATH = "node-modify"
3881
  HTYPE = constants.HTYPE_NODE
3882
  _OP_PARAMS = [
3883
    _PNodeName,
3884
    ("master_candidate", None, _TMaybeBool),
3885
    ("offline", None, _TMaybeBool),
3886
    ("drained", None, _TMaybeBool),
3887
    ("auto_promote", False, _TBool),
3888
    _PForce,
3889
    ]
3890
  REQ_BGL = False
3891

    
3892
  def CheckArguments(self):
3893
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3894
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3895
    if all_mods.count(None) == 3:
3896
      raise errors.OpPrereqError("Please pass at least one modification",
3897
                                 errors.ECODE_INVAL)
3898
    if all_mods.count(True) > 1:
3899
      raise errors.OpPrereqError("Can't set the node into more than one"
3900
                                 " state at the same time",
3901
                                 errors.ECODE_INVAL)
3902

    
3903
    # Boolean value that tells us whether we're offlining or draining the node
3904
    self.offline_or_drain = (self.op.offline == True or
3905
                             self.op.drained == True)
3906
    self.deoffline_or_drain = (self.op.offline == False or
3907
                               self.op.drained == False)
3908
    self.might_demote = (self.op.master_candidate == False or
3909
                         self.offline_or_drain)
3910

    
3911
    self.lock_all = self.op.auto_promote and self.might_demote
3912

    
3913

    
3914
  def ExpandNames(self):
3915
    if self.lock_all:
3916
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3917
    else:
3918
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3919

    
3920
  def BuildHooksEnv(self):
3921
    """Build hooks env.
3922

3923
    This runs on the master node.
3924

3925
    """
3926
    env = {
3927
      "OP_TARGET": self.op.node_name,
3928
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3929
      "OFFLINE": str(self.op.offline),
3930
      "DRAINED": str(self.op.drained),
3931
      }
3932
    nl = [self.cfg.GetMasterNode(),
3933
          self.op.node_name]
3934
    return env, nl, nl
3935

    
3936
  def CheckPrereq(self):
3937
    """Check prerequisites.
3938

3939
    This only checks the instance list against the existing names.
3940

3941
    """
3942
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3943

    
3944
    if (self.op.master_candidate is not None or
3945
        self.op.drained is not None or
3946
        self.op.offline is not None):
3947
      # we can't change the master's node flags
3948
      if self.op.node_name == self.cfg.GetMasterNode():
3949
        raise errors.OpPrereqError("The master role can be changed"
3950
                                   " only via masterfailover",
3951
                                   errors.ECODE_INVAL)
3952

    
3953

    
3954
    if node.master_candidate and self.might_demote and not self.lock_all:
3955
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3956
      # check if after removing the current node, we're missing master
3957
      # candidates
3958
      (mc_remaining, mc_should, _) = \
3959
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3960
      if mc_remaining < mc_should:
3961
        raise errors.OpPrereqError("Not enough master candidates, please"
3962
                                   " pass auto_promote to allow promotion",
3963
                                   errors.ECODE_INVAL)
3964

    
3965
    if (self.op.master_candidate == True and
3966
        ((node.offline and not self.op.offline == False) or
3967
         (node.drained and not self.op.drained == False))):
3968
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3969
                                 " to master_candidate" % node.name,
3970
                                 errors.ECODE_INVAL)
3971

    
3972
    # If we're being deofflined/drained, we'll MC ourself if needed
3973
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3974
        self.op.master_candidate == True and not node.master_candidate):
3975
      self.op.master_candidate = _DecideSelfPromotion(self)
3976
      if self.op.master_candidate:
3977
        self.LogInfo("Autopromoting node to master candidate")
3978

    
3979
    return
3980

    
3981
  def Exec(self, feedback_fn):
3982
    """Modifies a node.
3983

3984
    """
3985
    node = self.node
3986

    
3987
    result = []
3988
    changed_mc = False
3989

    
3990
    if self.op.offline is not None:
3991
      node.offline = self.op.offline
3992
      result.append(("offline", str(self.op.offline)))
3993
      if self.op.offline == True:
3994
        if node.master_candidate:
3995
          node.master_candidate = False
3996
          changed_mc = True
3997
          result.append(("master_candidate", "auto-demotion due to offline"))
3998
        if node.drained:
3999
          node.drained = False
4000
          result.append(("drained", "clear drained status due to offline"))
4001

    
4002
    if self.op.master_candidate is not None:
4003
      node.master_candidate = self.op.master_candidate
4004
      changed_mc = True
4005
      result.append(("master_candidate", str(self.op.master_candidate)))
4006
      if self.op.master_candidate == False:
4007
        rrc = self.rpc.call_node_demote_from_mc(node.name)
4008
        msg = rrc.fail_msg
4009
        if msg:
4010
          self.LogWarning("Node failed to demote itself: %s" % msg)
4011

    
4012
    if self.op.drained is not None:
4013
      node.drained = self.op.drained
4014
      result.append(("drained", str(self.op.drained)))
4015
      if self.op.drained == True:
4016
        if node.master_candidate:
4017
          node.master_candidate = False
4018
          changed_mc = True
4019
          result.append(("master_candidate", "auto-demotion due to drain"))
4020
          rrc = self.rpc.call_node_demote_from_mc(node.name)
4021
          msg = rrc.fail_msg
4022
          if msg:
4023
            self.LogWarning("Node failed to demote itself: %s" % msg)
4024
        if node.offline:
4025
          node.offline = False
4026
          result.append(("offline", "clear offline status due to drain"))
4027

    
4028
    # we locked all nodes, we adjust the CP before updating this node
4029
    if self.lock_all:
4030
      _AdjustCandidatePool(self, [node.name])
4031

    
4032
    # this will trigger configuration file update, if needed
4033
    self.cfg.Update(node, feedback_fn)
4034

    
4035
    # this will trigger job queue propagation or cleanup
4036
    if changed_mc:
4037
      self.context.ReaddNode(node)
4038

    
4039
    return result
4040

    
4041

    
4042
class LUPowercycleNode(NoHooksLU):
4043
  """Powercycles a node.
4044

4045
  """
4046
  _OP_PARAMS = [
4047
    _PNodeName,
4048
    _PForce,
4049
    ]
4050
  REQ_BGL = False
4051

    
4052
  def CheckArguments(self):
4053
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4054
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4055
      raise errors.OpPrereqError("The node is the master and the force"
4056
                                 " parameter was not set",
4057
                                 errors.ECODE_INVAL)
4058

    
4059
  def ExpandNames(self):
4060
    """Locking for PowercycleNode.
4061

4062
    This is a last-resort option and shouldn't block on other
4063
    jobs. Therefore, we grab no locks.
4064

4065
    """
4066
    self.needed_locks = {}
4067

    
4068
  def Exec(self, feedback_fn):
4069
    """Reboots a node.
4070

4071
    """
4072
    result = self.rpc.call_node_powercycle(self.op.node_name,
4073
                                           self.cfg.GetHypervisorType())
4074
    result.Raise("Failed to schedule the reboot")
4075
    return result.payload
4076

    
4077

    
4078
class LUQueryClusterInfo(NoHooksLU):
4079
  """Query cluster configuration.
4080

4081
  """
4082
  REQ_BGL = False
4083

    
4084
  def ExpandNames(self):
4085
    self.needed_locks = {}
4086

    
4087
  def Exec(self, feedback_fn):
4088
    """Return cluster config.
4089

4090
    """
4091
    cluster = self.cfg.GetClusterInfo()
4092
    os_hvp = {}
4093

    
4094
    # Filter just for enabled hypervisors
4095
    for os_name, hv_dict in cluster.os_hvp.items():
4096
      os_hvp[os_name] = {}
4097
      for hv_name, hv_params in hv_dict.items():
4098
        if hv_name in cluster.enabled_hypervisors:
4099
          os_hvp[os_name][hv_name] = hv_params
4100

    
4101
    result = {
4102
      "software_version": constants.RELEASE_VERSION,
4103
      "protocol_version": constants.PROTOCOL_VERSION,
4104
      "config_version": constants.CONFIG_VERSION,
4105
      "os_api_version": max(constants.OS_API_VERSIONS),
4106
      "export_version": constants.EXPORT_VERSION,
4107
      "architecture": (platform.architecture()[0], platform.machine()),
4108
      "name": cluster.cluster_name,
4109
      "master": cluster.master_node,
4110
      "default_hypervisor": cluster.enabled_hypervisors[0],
4111
      "enabled_hypervisors": cluster.enabled_hypervisors,
4112
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4113
                        for hypervisor_name in cluster.enabled_hypervisors]),
4114
      "os_hvp": os_hvp,
4115
      "beparams": cluster.beparams,
4116
      "osparams": cluster.osparams,
4117
      "nicparams": cluster.nicparams,
4118
      "candidate_pool_size": cluster.candidate_pool_size,
4119
      "master_netdev": cluster.master_netdev,
4120
      "volume_group_name": cluster.volume_group_name,
4121
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4122
      "file_storage_dir": cluster.file_storage_dir,
4123
      "maintain_node_health": cluster.maintain_node_health,
4124
      "ctime": cluster.ctime,
4125
      "mtime": cluster.mtime,
4126
      "uuid": cluster.uuid,
4127
      "tags": list(cluster.GetTags()),
4128
      "uid_pool": cluster.uid_pool,
4129
      "default_iallocator": cluster.default_iallocator,
4130
      }
4131

    
4132
    return result
4133

    
4134

    
4135
class LUQueryConfigValues(NoHooksLU):
4136
  """Return configuration values.
4137

4138
  """
4139
  _OP_PARAMS = [_POutputFields]
4140
  REQ_BGL = False
4141
  _FIELDS_DYNAMIC = utils.FieldSet()
4142
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4143
                                  "watcher_pause")
4144

    
4145
  def CheckArguments(self):
4146
    _CheckOutputFields(static=self._FIELDS_STATIC,
4147
                       dynamic=self._FIELDS_DYNAMIC,
4148
                       selected=self.op.output_fields)
4149

    
4150
  def ExpandNames(self):
4151
    self.needed_locks = {}
4152

    
4153
  def Exec(self, feedback_fn):
4154
    """Dump a representation of the cluster config to the standard output.
4155

4156
    """
4157
    values = []
4158
    for field in self.op.output_fields:
4159
      if field == "cluster_name":
4160
        entry = self.cfg.GetClusterName()
4161
      elif field == "master_node":
4162
        entry = self.cfg.GetMasterNode()
4163
      elif field == "drain_flag":
4164
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4165
      elif field == "watcher_pause":
4166
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4167
      else:
4168
        raise errors.ParameterError(field)
4169
      values.append(entry)
4170
    return values
4171

    
4172

    
4173
class LUActivateInstanceDisks(NoHooksLU):
4174
  """Bring up an instance's disks.
4175

4176
  """
4177
  _OP_PARAMS = [
4178
    _PInstanceName,
4179
    ("ignore_size", False, _TBool),
4180
    ]
4181
  REQ_BGL = False
4182

    
4183
  def ExpandNames(self):
4184
    self._ExpandAndLockInstance()
4185
    self.needed_locks[locking.LEVEL_NODE] = []
4186
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4187

    
4188
  def DeclareLocks(self, level):
4189
    if level == locking.LEVEL_NODE:
4190
      self._LockInstancesNodes()
4191

    
4192
  def CheckPrereq(self):
4193
    """Check prerequisites.
4194

4195
    This checks that the instance is in the cluster.
4196

4197
    """
4198
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4199
    assert self.instance is not None, \
4200
      "Cannot retrieve locked instance %s" % self.op.instance_name
4201
    _CheckNodeOnline(self, self.instance.primary_node)
4202

    
4203
  def Exec(self, feedback_fn):
4204
    """Activate the disks.
4205

4206
    """
4207
    disks_ok, disks_info = \
4208
              _AssembleInstanceDisks(self, self.instance,
4209
                                     ignore_size=self.op.ignore_size)
4210
    if not disks_ok:
4211
      raise errors.OpExecError("Cannot activate block devices")
4212

    
4213
    return disks_info
4214

    
4215

    
4216
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4217
                           ignore_size=False):
4218
  """Prepare the block devices for an instance.
4219

4220
  This sets up the block devices on all nodes.
4221

4222
  @type lu: L{LogicalUnit}
4223
  @param lu: the logical unit on whose behalf we execute
4224
  @type instance: L{objects.Instance}
4225
  @param instance: the instance for whose disks we assemble
4226
  @type disks: list of L{objects.Disk} or None
4227
  @param disks: which disks to assemble (or all, if None)
4228
  @type ignore_secondaries: boolean
4229
  @param ignore_secondaries: if true, errors on secondary nodes
4230
      won't result in an error return from the function
4231
  @type ignore_size: boolean
4232
  @param ignore_size: if true, the current known size of the disk
4233
      will not be used during the disk activation, useful for cases
4234
      when the size is wrong
4235
  @return: False if the operation failed, otherwise a list of
4236
      (host, instance_visible_name, node_visible_name)
4237
      with the mapping from node devices to instance devices
4238

4239
  """
4240
  device_info = []
4241
  disks_ok = True
4242
  iname = instance.name
4243
  disks = _ExpandCheckDisks(instance, disks)
4244

    
4245
  # With the two passes mechanism we try to reduce the window of
4246
  # opportunity for the race condition of switching DRBD to primary
4247
  # before handshaking occured, but we do not eliminate it
4248

    
4249
  # The proper fix would be to wait (with some limits) until the
4250
  # connection has been made and drbd transitions from WFConnection
4251
  # into any other network-connected state (Connected, SyncTarget,
4252
  # SyncSource, etc.)
4253

    
4254
  # 1st pass, assemble on all nodes in secondary mode
4255
  for inst_disk in disks:
4256
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4257
      if ignore_size:
4258
        node_disk = node_disk.Copy()
4259
        node_disk.UnsetSize()
4260
      lu.cfg.SetDiskID(node_disk, node)
4261
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4262
      msg = result.fail_msg
4263
      if msg:
4264
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4265
                           " (is_primary=False, pass=1): %s",
4266
                           inst_disk.iv_name, node, msg)
4267
        if not ignore_secondaries:
4268
          disks_ok = False
4269

    
4270
  # FIXME: race condition on drbd migration to primary
4271

    
4272
  # 2nd pass, do only the primary node
4273
  for inst_disk in disks:
4274
    dev_path = None
4275

    
4276
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4277
      if node != instance.primary_node:
4278
        continue
4279
      if ignore_size:
4280
        node_disk = node_disk.Copy()
4281
        node_disk.UnsetSize()
4282
      lu.cfg.SetDiskID(node_disk, node)
4283
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4284
      msg = result.fail_msg
4285
      if msg:
4286
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4287
                           " (is_primary=True, pass=2): %s",
4288
                           inst_disk.iv_name, node, msg)
4289
        disks_ok = False
4290
      else:
4291
        dev_path = result.payload
4292

    
4293
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4294

    
4295
  # leave the disks configured for the primary node
4296
  # this is a workaround that would be fixed better by
4297
  # improving the logical/physical id handling
4298
  for disk in disks:
4299
    lu.cfg.SetDiskID(disk, instance.primary_node)
4300

    
4301
  return disks_ok, device_info
4302

    
4303

    
4304
def _StartInstanceDisks(lu, instance, force):
4305
  """Start the disks of an instance.
4306

4307
  """
4308
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4309
                                           ignore_secondaries=force)
4310
  if not disks_ok:
4311
    _ShutdownInstanceDisks(lu, instance)
4312
    if force is not None and not force:
4313
      lu.proc.LogWarning("", hint="If the message above refers to a"
4314
                         " secondary node,"
4315
                         " you can retry the operation using '--force'.")
4316
    raise errors.OpExecError("Disk consistency error")
4317

    
4318

    
4319
class LUDeactivateInstanceDisks(NoHooksLU):
4320
  """Shutdown an instance's disks.
4321

4322
  """
4323
  _OP_PARAMS = [
4324
    _PInstanceName,
4325
    ]
4326
  REQ_BGL = False
4327

    
4328
  def ExpandNames(self):
4329
    self._ExpandAndLockInstance()
4330
    self.needed_locks[locking.LEVEL_NODE] = []
4331
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4332

    
4333
  def DeclareLocks(self, level):
4334
    if level == locking.LEVEL_NODE:
4335
      self._LockInstancesNodes()
4336

    
4337
  def CheckPrereq(self):
4338
    """Check prerequisites.
4339

4340
    This checks that the instance is in the cluster.
4341

4342
    """
4343
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4344
    assert self.instance is not None, \
4345
      "Cannot retrieve locked instance %s" % self.op.instance_name
4346

    
4347
  def Exec(self, feedback_fn):
4348
    """Deactivate the disks
4349

4350
    """
4351
    instance = self.instance
4352
    _SafeShutdownInstanceDisks(self, instance)
4353

    
4354

    
4355
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4356
  """Shutdown block devices of an instance.
4357

4358
  This function checks if an instance is running, before calling
4359
  _ShutdownInstanceDisks.
4360

4361
  """
4362
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4363
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4364

    
4365

    
4366
def _ExpandCheckDisks(instance, disks):
4367
  """Return the instance disks selected by the disks list
4368

4369
  @type disks: list of L{objects.Disk} or None
4370
  @param disks: selected disks
4371
  @rtype: list of L{objects.Disk}
4372
  @return: selected instance disks to act on
4373

4374
  """
4375
  if disks is None:
4376
    return instance.disks
4377
  else:
4378
    if not set(disks).issubset(instance.disks):
4379
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4380
                                   " target instance")
4381
    return disks
4382

    
4383

    
4384
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4385
  """Shutdown block devices of an instance.
4386

4387
  This does the shutdown on all nodes of the instance.
4388

4389
  If the ignore_primary is false, errors on the primary node are
4390
  ignored.
4391

4392
  """
4393
  all_result = True
4394
  disks = _ExpandCheckDisks(instance, disks)
4395

    
4396
  for disk in disks:
4397
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4398
      lu.cfg.SetDiskID(top_disk, node)
4399
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4400
      msg = result.fail_msg
4401
      if msg:
4402
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4403
                      disk.iv_name, node, msg)
4404
        if not ignore_primary or node != instance.primary_node:
4405
          all_result = False
4406
  return all_result
4407

    
4408

    
4409
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4410
  """Checks if a node has enough free memory.
4411

4412
  This function check if a given node has the needed amount of free
4413
  memory. In case the node has less memory or we cannot get the
4414
  information from the node, this function raise an OpPrereqError
4415
  exception.
4416

4417
  @type lu: C{LogicalUnit}
4418
  @param lu: a logical unit from which we get configuration data
4419
  @type node: C{str}
4420
  @param node: the node to check
4421
  @type reason: C{str}
4422
  @param reason: string to use in the error message
4423
  @type requested: C{int}
4424
  @param requested: the amount of memory in MiB to check for
4425
  @type hypervisor_name: C{str}
4426
  @param hypervisor_name: the hypervisor to ask for memory stats
4427
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4428
      we cannot check the node
4429

4430
  """
4431
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4432
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4433
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4434
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4435
  if not isinstance(free_mem, int):
4436
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4437
                               " was '%s'" % (node, free_mem),
4438
                               errors.ECODE_ENVIRON)
4439
  if requested > free_mem:
4440
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4441
                               " needed %s MiB, available %s MiB" %
4442
                               (node, reason, requested, free_mem),
4443
                               errors.ECODE_NORES)
4444

    
4445

    
4446
def _CheckNodesFreeDisk(lu, nodenames, requested):
4447
  """Checks if nodes have enough free disk space in the default VG.
4448

4449
  This function check if all given nodes have the needed amount of
4450
  free disk. In case any node has less disk or we cannot get the
4451
  information from the node, this function raise an OpPrereqError
4452
  exception.
4453

4454
  @type lu: C{LogicalUnit}
4455
  @param lu: a logical unit from which we get configuration data
4456
  @type nodenames: C{list}
4457
  @param nodenames: the list of node names to check
4458
  @type requested: C{int}
4459
  @param requested: the amount of disk in MiB to check for
4460
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4461
      we cannot check the node
4462

4463
  """
4464
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4465
                                   lu.cfg.GetHypervisorType())
4466
  for node in nodenames:
4467
    info = nodeinfo[node]
4468
    info.Raise("Cannot get current information from node %s" % node,
4469
               prereq=True, ecode=errors.ECODE_ENVIRON)
4470
    vg_free = info.payload.get("vg_free", None)
4471
    if not isinstance(vg_free, int):
4472
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4473
                                 " result was '%s'" % (node, vg_free),
4474
                                 errors.ECODE_ENVIRON)
4475
    if requested > vg_free:
4476
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4477
                                 " required %d MiB, available %d MiB" %
4478
                                 (node, requested, vg_free),
4479
                                 errors.ECODE_NORES)
4480

    
4481

    
4482
class LUStartupInstance(LogicalUnit):
4483
  """Starts an instance.
4484

4485
  """
4486
  HPATH = "instance-start"
4487
  HTYPE = constants.HTYPE_INSTANCE
4488
  _OP_PARAMS = [
4489
    _PInstanceName,
4490
    _PForce,
4491
    ("hvparams", _EmptyDict, _TDict),
4492
    ("beparams", _EmptyDict, _TDict),
4493
    ]
4494
  REQ_BGL = False
4495

    
4496
  def CheckArguments(self):
4497
    # extra beparams
4498
    if self.op.beparams:
4499
      # fill the beparams dict
4500
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4501

    
4502
  def ExpandNames(self):
4503
    self._ExpandAndLockInstance()
4504

    
4505
  def BuildHooksEnv(self):
4506
    """Build hooks env.
4507

4508
    This runs on master, primary and secondary nodes of the instance.
4509

4510
    """
4511
    env = {
4512
      "FORCE": self.op.force,
4513
      }
4514
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4515
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4516
    return env, nl, nl
4517

    
4518
  def CheckPrereq(self):
4519
    """Check prerequisites.
4520

4521
    This checks that the instance is in the cluster.
4522

4523
    """
4524
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4525
    assert self.instance is not None, \
4526
      "Cannot retrieve locked instance %s" % self.op.instance_name
4527

    
4528
    # extra hvparams
4529
    if self.op.hvparams:
4530
      # check hypervisor parameter syntax (locally)
4531
      cluster = self.cfg.GetClusterInfo()
4532
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4533
      filled_hvp = cluster.FillHV(instance)
4534
      filled_hvp.update(self.op.hvparams)
4535
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4536
      hv_type.CheckParameterSyntax(filled_hvp)
4537
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4538

    
4539
    _CheckNodeOnline(self, instance.primary_node)
4540

    
4541
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4542
    # check bridges existence
4543
    _CheckInstanceBridgesExist(self, instance)
4544

    
4545
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4546
                                              instance.name,
4547
                                              instance.hypervisor)
4548
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4549
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4550
    if not remote_info.payload: # not running already
4551
      _CheckNodeFreeMemory(self, instance.primary_node,
4552
                           "starting instance %s" % instance.name,
4553
                           bep[constants.BE_MEMORY], instance.hypervisor)
4554

    
4555
  def Exec(self, feedback_fn):
4556
    """Start the instance.
4557

4558
    """
4559
    instance = self.instance
4560
    force = self.op.force
4561

    
4562
    self.cfg.MarkInstanceUp(instance.name)
4563

    
4564
    node_current = instance.primary_node
4565

    
4566
    _StartInstanceDisks(self, instance, force)
4567

    
4568
    result = self.rpc.call_instance_start(node_current, instance,
4569
                                          self.op.hvparams, self.op.beparams)
4570
    msg = result.fail_msg
4571
    if msg:
4572
      _ShutdownInstanceDisks(self, instance)
4573
      raise errors.OpExecError("Could not start instance: %s" % msg)
4574

    
4575

    
4576
class LURebootInstance(LogicalUnit):
4577
  """Reboot an instance.
4578

4579
  """
4580
  HPATH = "instance-reboot"
4581
  HTYPE = constants.HTYPE_INSTANCE
4582
  _OP_PARAMS = [
4583
    _PInstanceName,
4584
    ("ignore_secondaries", False, _TBool),
4585
    ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4586
    _PShutdownTimeout,
4587
    ]
4588
  REQ_BGL = False
4589

    
4590
  def ExpandNames(self):
4591
    self._ExpandAndLockInstance()
4592

    
4593
  def BuildHooksEnv(self):
4594
    """Build hooks env.
4595

4596
    This runs on master, primary and secondary nodes of the instance.
4597

4598
    """
4599
    env = {
4600
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4601
      "REBOOT_TYPE": self.op.reboot_type,
4602
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4603
      }
4604
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4605
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4606
    return env, nl, nl
4607

    
4608
  def CheckPrereq(self):
4609
    """Check prerequisites.
4610

4611
    This checks that the instance is in the cluster.
4612

4613
    """
4614
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4615
    assert self.instance is not None, \
4616
      "Cannot retrieve locked instance %s" % self.op.instance_name
4617

    
4618
    _CheckNodeOnline(self, instance.primary_node)
4619

    
4620
    # check bridges existence
4621
    _CheckInstanceBridgesExist(self, instance)
4622

    
4623
  def Exec(self, feedback_fn):
4624
    """Reboot the instance.
4625

4626
    """
4627
    instance = self.instance
4628
    ignore_secondaries = self.op.ignore_secondaries
4629
    reboot_type = self.op.reboot_type
4630

    
4631
    node_current = instance.primary_node
4632

    
4633
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4634
                       constants.INSTANCE_REBOOT_HARD]:
4635
      for disk in instance.disks:
4636
        self.cfg.SetDiskID(disk, node_current)
4637
      result = self.rpc.call_instance_reboot(node_current, instance,
4638
                                             reboot_type,
4639
                                             self.op.shutdown_timeout)
4640
      result.Raise("Could not reboot instance")
4641
    else:
4642
      result = self.rpc.call_instance_shutdown(node_current, instance,
4643
                                               self.op.shutdown_timeout)
4644
      result.Raise("Could not shutdown instance for full reboot")
4645
      _ShutdownInstanceDisks(self, instance)
4646
      _StartInstanceDisks(self, instance, ignore_secondaries)
4647
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4648
      msg = result.fail_msg
4649
      if msg:
4650
        _ShutdownInstanceDisks(self, instance)
4651
        raise errors.OpExecError("Could not start instance for"
4652
                                 " full reboot: %s" % msg)
4653

    
4654
    self.cfg.MarkInstanceUp(instance.name)
4655

    
4656

    
4657
class LUShutdownInstance(LogicalUnit):
4658
  """Shutdown an instance.
4659

4660
  """
4661
  HPATH = "instance-stop"
4662
  HTYPE = constants.HTYPE_INSTANCE
4663
  _OP_PARAMS = [
4664
    _PInstanceName,
4665
    ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4666
    ]
4667
  REQ_BGL = False
4668

    
4669
  def ExpandNames(self):
4670
    self._ExpandAndLockInstance()
4671

    
4672
  def BuildHooksEnv(self):
4673
    """Build hooks env.
4674

4675
    This runs on master, primary and secondary nodes of the instance.
4676

4677
    """
4678
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4679
    env["TIMEOUT"] = self.op.timeout
4680
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4681
    return env, nl, nl
4682

    
4683
  def CheckPrereq(self):
4684
    """Check prerequisites.
4685

4686
    This checks that the instance is in the cluster.
4687

4688
    """
4689
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4690
    assert self.instance is not None, \
4691
      "Cannot retrieve locked instance %s" % self.op.instance_name
4692
    _CheckNodeOnline(self, self.instance.primary_node)
4693

    
4694
  def Exec(self, feedback_fn):
4695
    """Shutdown the instance.
4696

4697
    """
4698
    instance = self.instance
4699
    node_current = instance.primary_node
4700
    timeout = self.op.timeout
4701
    self.cfg.MarkInstanceDown(instance.name)
4702
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4703
    msg = result.fail_msg
4704
    if msg:
4705
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4706

    
4707
    _ShutdownInstanceDisks(self, instance)
4708

    
4709

    
4710
class LUReinstallInstance(LogicalUnit):
4711
  """Reinstall an instance.
4712

4713
  """
4714
  HPATH = "instance-reinstall"
4715
  HTYPE = constants.HTYPE_INSTANCE
4716
  _OP_PARAMS = [
4717
    _PInstanceName,
4718
    ("os_type", None, _TMaybeString),
4719
    ("force_variant", False, _TBool),
4720
    ]
4721
  REQ_BGL = False
4722

    
4723
  def ExpandNames(self):
4724
    self._ExpandAndLockInstance()
4725

    
4726
  def BuildHooksEnv(self):
4727
    """Build hooks env.
4728

4729
    This runs on master, primary and secondary nodes of the instance.
4730

4731
    """
4732
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4733
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4734
    return env, nl, nl
4735

    
4736
  def CheckPrereq(self):
4737
    """Check prerequisites.
4738

4739
    This checks that the instance is in the cluster and is not running.
4740

4741
    """
4742
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4743
    assert instance is not None, \
4744
      "Cannot retrieve locked instance %s" % self.op.instance_name
4745
    _CheckNodeOnline(self, instance.primary_node)
4746

    
4747
    if instance.disk_template == constants.DT_DISKLESS:
4748
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4749
                                 self.op.instance_name,
4750
                                 errors.ECODE_INVAL)
4751
    _CheckInstanceDown(self, instance, "cannot reinstall")
4752

    
4753
    if self.op.os_type is not None:
4754
      # OS verification
4755
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4756
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4757

    
4758
    self.instance = instance
4759

    
4760
  def Exec(self, feedback_fn):
4761
    """Reinstall the instance.
4762

4763
    """
4764
    inst = self.instance
4765

    
4766
    if self.op.os_type is not None:
4767
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4768
      inst.os = self.op.os_type
4769
      self.cfg.Update(inst, feedback_fn)
4770

    
4771
    _StartInstanceDisks(self, inst, None)
4772
    try:
4773
      feedback_fn("Running the instance OS create scripts...")
4774
      # FIXME: pass debug option from opcode to backend
4775
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4776
                                             self.op.debug_level)
4777
      result.Raise("Could not install OS for instance %s on node %s" %
4778
                   (inst.name, inst.primary_node))
4779
    finally:
4780
      _ShutdownInstanceDisks(self, inst)
4781

    
4782

    
4783
class LURecreateInstanceDisks(LogicalUnit):
4784
  """Recreate an instance's missing disks.
4785

4786
  """
4787
  HPATH = "instance-recreate-disks"
4788
  HTYPE = constants.HTYPE_INSTANCE
4789
  _OP_PARAMS = [
4790
    _PInstanceName,
4791
    ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4792
    ]
4793
  REQ_BGL = False
4794

    
4795
  def ExpandNames(self):
4796
    self._ExpandAndLockInstance()
4797

    
4798
  def BuildHooksEnv(self):
4799
    """Build hooks env.
4800

4801
    This runs on master, primary and secondary nodes of the instance.
4802

4803
    """
4804
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4805
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4806
    return env, nl, nl
4807

    
4808
  def CheckPrereq(self):
4809
    """Check prerequisites.
4810

4811
    This checks that the instance is in the cluster and is not running.
4812

4813
    """
4814
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4815
    assert instance is not None, \
4816
      "Cannot retrieve locked instance %s" % self.op.instance_name
4817
    _CheckNodeOnline(self, instance.primary_node)
4818

    
4819
    if instance.disk_template == constants.DT_DISKLESS:
4820
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4821
                                 self.op.instance_name, errors.ECODE_INVAL)
4822
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4823

    
4824
    if not self.op.disks:
4825
      self.op.disks = range(len(instance.disks))
4826
    else:
4827
      for idx in self.op.disks:
4828
        if idx >= len(instance.disks):
4829
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4830
                                     errors.ECODE_INVAL)
4831

    
4832
    self.instance = instance
4833

    
4834
  def Exec(self, feedback_fn):
4835
    """Recreate the disks.
4836

4837
    """
4838
    to_skip = []
4839
    for idx, _ in enumerate(self.instance.disks):
4840
      if idx not in self.op.disks: # disk idx has not been passed in
4841
        to_skip.append(idx)
4842
        continue
4843

    
4844
    _CreateDisks(self, self.instance, to_skip=to_skip)
4845

    
4846

    
4847
class LURenameInstance(LogicalUnit):
4848
  """Rename an instance.
4849

4850
  """
4851
  HPATH = "instance-rename"
4852
  HTYPE = constants.HTYPE_INSTANCE
4853
  _OP_PARAMS = [
4854
    _PInstanceName,
4855
    ("new_name", _NoDefault, _TNonEmptyString),
4856
    ("ignore_ip", False, _TBool),
4857
    ("check_name", True, _TBool),
4858
    ]
4859

    
4860
  def BuildHooksEnv(self):
4861
    """Build hooks env.
4862

4863
    This runs on master, primary and secondary nodes of the instance.
4864

4865
    """
4866
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4867
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4868
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4869
    return env, nl, nl
4870

    
4871
  def CheckPrereq(self):
4872
    """Check prerequisites.
4873

4874
    This checks that the instance is in the cluster and is not running.
4875

4876
    """
4877
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4878
                                                self.op.instance_name)
4879
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4880
    assert instance is not None
4881
    _CheckNodeOnline(self, instance.primary_node)
4882
    _CheckInstanceDown(self, instance, "cannot rename")
4883
    self.instance = instance
4884

    
4885
    # new name verification
4886
    if self.op.check_name:
4887
      name_info = netutils.GetHostInfo(self.op.new_name)
4888
      self.op.new_name = name_info.name
4889

    
4890
    new_name = self.op.new_name
4891

    
4892
    instance_list = self.cfg.GetInstanceList()
4893
    if new_name in instance_list:
4894
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4895
                                 new_name, errors.ECODE_EXISTS)
4896

    
4897
    if not self.op.ignore_ip:
4898
      if netutils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4899
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4900
                                   (name_info.ip, new_name),
4901
                                   errors.ECODE_NOTUNIQUE)
4902

    
4903
  def Exec(self, feedback_fn):
4904
    """Reinstall the instance.
4905

4906
    """
4907
    inst = self.instance
4908
    old_name = inst.name
4909

    
4910
    if inst.disk_template == constants.DT_FILE:
4911
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4912

    
4913
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4914
    # Change the instance lock. This is definitely safe while we hold the BGL
4915
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4916
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4917

    
4918
    # re-read the instance from the configuration after rename
4919
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4920

    
4921
    if inst.disk_template == constants.DT_FILE:
4922
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4923
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4924
                                                     old_file_storage_dir,
4925
                                                     new_file_storage_dir)
4926
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4927
                   " (but the instance has been renamed in Ganeti)" %
4928
                   (inst.primary_node, old_file_storage_dir,
4929
                    new_file_storage_dir))
4930

    
4931
    _StartInstanceDisks(self, inst, None)
4932
    try:
4933
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4934
                                                 old_name, self.op.debug_level)
4935
      msg = result.fail_msg
4936
      if msg:
4937
        msg = ("Could not run OS rename script for instance %s on node %s"
4938
               " (but the instance has been renamed in Ganeti): %s" %
4939
               (inst.name, inst.primary_node, msg))
4940
        self.proc.LogWarning(msg)
4941
    finally:
4942
      _ShutdownInstanceDisks(self, inst)
4943

    
4944

    
4945
class LURemoveInstance(LogicalUnit):
4946
  """Remove an instance.
4947

4948
  """
4949
  HPATH = "instance-remove"
4950
  HTYPE = constants.HTYPE_INSTANCE
4951
  _OP_PARAMS = [
4952
    _PInstanceName,
4953
    ("ignore_failures", False, _TBool),
4954
    _PShutdownTimeout,
4955
    ]
4956
  REQ_BGL = False
4957

    
4958
  def ExpandNames(self):
4959
    self._ExpandAndLockInstance()
4960
    self.needed_locks[locking.LEVEL_NODE] = []
4961
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4962

    
4963
  def DeclareLocks(self, level):
4964
    if level == locking.LEVEL_NODE:
4965
      self._LockInstancesNodes()
4966

    
4967
  def BuildHooksEnv(self):
4968
    """Build hooks env.
4969

4970
    This runs on master, primary and secondary nodes of the instance.
4971

4972
    """
4973
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4974
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4975
    nl = [self.cfg.GetMasterNode()]
4976
    nl_post = list(self.instance.all_nodes) + nl
4977
    return env, nl, nl_post
4978

    
4979
  def CheckPrereq(self):
4980
    """Check prerequisites.
4981

4982
    This checks that the instance is in the cluster.
4983

4984
    """
4985
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4986
    assert self.instance is not None, \
4987
      "Cannot retrieve locked instance %s" % self.op.instance_name
4988

    
4989
  def Exec(self, feedback_fn):
4990
    """Remove the instance.
4991

4992
    """
4993
    instance = self.instance
4994
    logging.info("Shutting down instance %s on node %s",
4995
                 instance.name, instance.primary_node)
4996

    
4997
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4998
                                             self.op.shutdown_timeout)
4999
    msg = result.fail_msg
5000
    if msg:
5001
      if self.op.ignore_failures:
5002
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5003
      else:
5004
        raise errors.OpExecError("Could not shutdown instance %s on"
5005
                                 " node %s: %s" %
5006
                                 (instance.name, instance.primary_node, msg))
5007

    
5008
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5009

    
5010

    
5011
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5012
  """Utility function to remove an instance.
5013

5014
  """
5015
  logging.info("Removing block devices for instance %s", instance.name)
5016

    
5017
  if not _RemoveDisks(lu, instance):
5018
    if not ignore_failures:
5019
      raise errors.OpExecError("Can't remove instance's disks")
5020
    feedback_fn("Warning: can't remove instance's disks")
5021

    
5022
  logging.info("Removing instance %s out of cluster config", instance.name)
5023

    
5024
  lu.cfg.RemoveInstance(instance.name)
5025

    
5026
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5027
    "Instance lock removal conflict"
5028

    
5029
  # Remove lock for the instance
5030
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5031

    
5032

    
5033
class LUQueryInstances(NoHooksLU):
5034
  """Logical unit for querying instances.
5035

5036
  """
5037
  # pylint: disable-msg=W0142
5038
  _OP_PARAMS = [
5039
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5040
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5041
    ("use_locking", False, _TBool),
5042
    ]
5043
  REQ_BGL = False
5044
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5045
                    "serial_no", "ctime", "mtime", "uuid"]
5046
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5047
                                    "admin_state",
5048
                                    "disk_template", "ip", "mac", "bridge",
5049
                                    "nic_mode", "nic_link",
5050
                                    "sda_size", "sdb_size", "vcpus", "tags",
5051
                                    "network_port", "beparams",
5052
                                    r"(disk)\.(size)/([0-9]+)",
5053
                                    r"(disk)\.(sizes)", "disk_usage",
5054
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5055
                                    r"(nic)\.(bridge)/([0-9]+)",
5056
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
5057
                                    r"(disk|nic)\.(count)",
5058
                                    "hvparams",
5059
                                    ] + _SIMPLE_FIELDS +
5060
                                  ["hv/%s" % name
5061
                                   for name in constants.HVS_PARAMETERS
5062
                                   if name not in constants.HVC_GLOBALS] +
5063
                                  ["be/%s" % name
5064
                                   for name in constants.BES_PARAMETERS])
5065
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5066
                                   "oper_ram",
5067
                                   "oper_vcpus",
5068
                                   "status")
5069

    
5070

    
5071
  def CheckArguments(self):
5072
    _CheckOutputFields(static=self._FIELDS_STATIC,
5073
                       dynamic=self._FIELDS_DYNAMIC,
5074
                       selected=self.op.output_fields)
5075

    
5076
  def ExpandNames(self):
5077
    self.needed_locks = {}
5078
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5079
    self.share_locks[locking.LEVEL_NODE] = 1
5080

    
5081
    if self.op.names:
5082
      self.wanted = _GetWantedInstances(self, self.op.names)
5083
    else:
5084
      self.wanted = locking.ALL_SET
5085

    
5086
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5087
    self.do_locking = self.do_node_query and self.op.use_locking
5088
    if self.do_locking:
5089
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5090
      self.needed_locks[locking.LEVEL_NODE] = []
5091
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5092

    
5093
  def DeclareLocks(self, level):
5094
    if level == locking.LEVEL_NODE and self.do_locking:
5095
      self._LockInstancesNodes()
5096

    
5097
  def Exec(self, feedback_fn):
5098
    """Computes the list of nodes and their attributes.
5099

5100
    """
5101
    # pylint: disable-msg=R0912
5102
    # way too many branches here
5103
    all_info = self.cfg.GetAllInstancesInfo()
5104
    if self.wanted == locking.ALL_SET:
5105
      # caller didn't specify instance names, so ordering is not important
5106
      if self.do_locking:
5107
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5108
      else:
5109
        instance_names = all_info.keys()
5110
      instance_names = utils.NiceSort(instance_names)
5111
    else:
5112
      # caller did specify names, so we must keep the ordering
5113
      if self.do_locking:
5114
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5115
      else:
5116
        tgt_set = all_info.keys()
5117
      missing = set(self.wanted).difference(tgt_set)
5118
      if missing:
5119
        raise errors.OpExecError("Some instances were removed before"
5120
                                 " retrieving their data: %s" % missing)
5121
      instance_names = self.wanted
5122

    
5123
    instance_list = [all_info[iname] for iname in instance_names]
5124

    
5125
    # begin data gathering
5126

    
5127
    nodes = frozenset([inst.primary_node for inst in instance_list])
5128
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
5129

    
5130
    bad_nodes = []
5131
    off_nodes = []
5132
    if self.do_node_query:
5133
      live_data = {}
5134
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5135
      for name in nodes:
5136
        result = node_data[name]
5137
        if result.offline:
5138
          # offline nodes will be in both lists
5139
          off_nodes.append(name)
5140
        if result.fail_msg:
5141
          bad_nodes.append(name)
5142
        else:
5143
          if result.payload:
5144
            live_data.update(result.payload)
5145
          # else no instance is alive
5146
    else:
5147
      live_data = dict([(name, {}) for name in instance_names])
5148

    
5149
    # end data gathering
5150

    
5151
    HVPREFIX = "hv/"
5152
    BEPREFIX = "be/"
5153
    output = []
5154
    cluster = self.cfg.GetClusterInfo()
5155
    for instance in instance_list:
5156
      iout = []
5157
      i_hv = cluster.FillHV(instance, skip_globals=True)
5158
      i_be = cluster.FillBE(instance)
5159
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5160
      for field in self.op.output_fields:
5161
        st_match = self._FIELDS_STATIC.Matches(field)
5162
        if field in self._SIMPLE_FIELDS:
5163
          val = getattr(instance, field)
5164
        elif field == "pnode":
5165
          val = instance.primary_node
5166
        elif field == "snodes":
5167
          val = list(instance.secondary_nodes)
5168
        elif field == "admin_state":
5169
          val = instance.admin_up
5170
        elif field == "oper_state":
5171
          if instance.primary_node in bad_nodes:
5172
            val = None
5173
          else:
5174
            val = bool(live_data.get(instance.name))
5175
        elif field == "status":
5176
          if instance.primary_node in off_nodes:
5177
            val = "ERROR_nodeoffline"
5178
          elif instance.primary_node in bad_nodes:
5179
            val = "ERROR_nodedown"
5180
          else:
5181
            running = bool(live_data.get(instance.name))
5182
            if running:
5183
              if instance.admin_up:
5184
                val = "running"
5185
              else:
5186
                val = "ERROR_up"
5187
            else:
5188
              if instance.admin_up:
5189
                val = "ERROR_down"
5190
              else:
5191
                val = "ADMIN_down"
5192
        elif field == "oper_ram":
5193
          if instance.primary_node in bad_nodes:
5194
            val = None
5195
          elif instance.name in live_data:
5196
            val = live_data[instance.name].get("memory", "?")
5197
          else:
5198
            val = "-"
5199
        elif field == "oper_vcpus":
5200
          if instance.primary_node in bad_nodes:
5201
            val = None
5202
          elif instance.name in live_data:
5203
            val = live_data[instance.name].get("vcpus", "?")
5204
          else:
5205
            val = "-"
5206
        elif field == "vcpus":
5207
          val = i_be[constants.BE_VCPUS]
5208
        elif field == "disk_template":
5209
          val = instance.disk_template
5210
        elif field == "ip":
5211
          if instance.nics:
5212
            val = instance.nics[0].ip
5213
          else:
5214
            val = None
5215
        elif field == "nic_mode":
5216
          if instance.nics:
5217
            val = i_nicp[0][constants.NIC_MODE]
5218
          else:
5219
            val = None
5220
        elif field == "nic_link":
5221
          if instance.nics:
5222
            val = i_nicp[0][constants.NIC_LINK]
5223
          else:
5224
            val = None
5225
        elif field == "bridge":
5226
          if (instance.nics and
5227
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5228
            val = i_nicp[0][constants.NIC_LINK]
5229
          else:
5230
            val = None
5231
        elif field == "mac":
5232
          if instance.nics:
5233
            val = instance.nics[0].mac
5234
          else:
5235
            val = None
5236
        elif field == "sda_size" or field == "sdb_size":
5237
          idx = ord(field[2]) - ord('a')
5238
          try:
5239
            val = instance.FindDisk(idx).size
5240
          except errors.OpPrereqError:
5241
            val = None
5242
        elif field == "disk_usage": # total disk usage per node
5243
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5244
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5245
        elif field == "tags":
5246
          val = list(instance.GetTags())
5247
        elif field == "hvparams":
5248
          val = i_hv
5249
        elif (field.startswith(HVPREFIX) and
5250
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5251
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5252
          val = i_hv.get(field[len(HVPREFIX):], None)
5253
        elif field == "beparams":
5254
          val = i_be
5255
        elif (field.startswith(BEPREFIX) and
5256
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5257
          val = i_be.get(field[len(BEPREFIX):], None)
5258
        elif st_match and st_match.groups():
5259
          # matches a variable list
5260
          st_groups = st_match.groups()
5261
          if st_groups and st_groups[0] == "disk":
5262
            if st_groups[1] == "count":
5263
              val = len(instance.disks)
5264
            elif st_groups[1] == "sizes":
5265
              val = [disk.size for disk in instance.disks]
5266
            elif st_groups[1] == "size":
5267
              try:
5268
                val = instance.FindDisk(st_groups[2]).size
5269
              except errors.OpPrereqError:
5270
                val = None
5271
            else:
5272
              assert False, "Unhandled disk parameter"
5273
          elif st_groups[0] == "nic":
5274
            if st_groups[1] == "count":
5275
              val = len(instance.nics)
5276
            elif st_groups[1] == "macs":
5277
              val = [nic.mac for nic in instance.nics]
5278
            elif st_groups[1] == "ips":
5279
              val = [nic.ip for nic in instance.nics]
5280
            elif st_groups[1] == "modes":
5281
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5282
            elif st_groups[1] == "links":
5283
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5284
            elif st_groups[1] == "bridges":
5285
              val = []
5286
              for nicp in i_nicp:
5287
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5288
                  val.append(nicp[constants.NIC_LINK])
5289
                else:
5290
                  val.append(None)
5291
            else:
5292
              # index-based item
5293
              nic_idx = int(st_groups[2])
5294
              if nic_idx >= len(instance.nics):
5295
                val = None
5296
              else:
5297
                if st_groups[1] == "mac":
5298
                  val = instance.nics[nic_idx].mac
5299
                elif st_groups[1] == "ip":
5300
                  val = instance.nics[nic_idx].ip
5301
                elif st_groups[1] == "mode":
5302
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5303
                elif st_groups[1] == "link":
5304
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5305
                elif st_groups[1] == "bridge":
5306
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5307
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5308
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5309
                  else:
5310
                    val = None
5311
                else:
5312
                  assert False, "Unhandled NIC parameter"
5313
          else:
5314
            assert False, ("Declared but unhandled variable parameter '%s'" %
5315
                           field)
5316
        else:
5317
          assert False, "Declared but unhandled parameter '%s'" % field
5318
        iout.append(val)
5319
      output.append(iout)
5320

    
5321
    return output
5322

    
5323

    
5324
class LUFailoverInstance(LogicalUnit):
5325
  """Failover an instance.
5326

5327
  """
5328
  HPATH = "instance-failover"
5329
  HTYPE = constants.HTYPE_INSTANCE
5330
  _OP_PARAMS = [
5331
    _PInstanceName,
5332
    ("ignore_consistency", False, _TBool),
5333
    _PShutdownTimeout,
5334
    ]
5335
  REQ_BGL = False
5336

    
5337
  def ExpandNames(self):
5338
    self._ExpandAndLockInstance()
5339
    self.needed_locks[locking.LEVEL_NODE] = []
5340
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5341

    
5342
  def DeclareLocks(self, level):
5343
    if level == locking.LEVEL_NODE:
5344
      self._LockInstancesNodes()
5345

    
5346
  def BuildHooksEnv(self):
5347
    """Build hooks env.
5348

5349
    This runs on master, primary and secondary nodes of the instance.
5350

5351
    """
5352
    instance = self.instance
5353
    source_node = instance.primary_node
5354
    target_node = instance.secondary_nodes[0]
5355
    env = {
5356
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5357
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5358
      "OLD_PRIMARY": source_node,
5359
      "OLD_SECONDARY": target_node,
5360
      "NEW_PRIMARY": target_node,
5361
      "NEW_SECONDARY": source_node,
5362
      }
5363
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5364
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5365
    nl_post = list(nl)
5366
    nl_post.append(source_node)
5367
    return env, nl, nl_post
5368

    
5369
  def CheckPrereq(self):
5370
    """Check prerequisites.
5371

5372
    This checks that the instance is in the cluster.
5373

5374
    """
5375
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5376
    assert self.instance is not None, \
5377
      "Cannot retrieve locked instance %s" % self.op.instance_name
5378

    
5379
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5380
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5381
      raise errors.OpPrereqError("Instance's disk layout is not"
5382
                                 " network mirrored, cannot failover.",
5383
                                 errors.ECODE_STATE)
5384

    
5385
    secondary_nodes = instance.secondary_nodes
5386
    if not secondary_nodes:
5387
      raise errors.ProgrammerError("no secondary node but using "
5388
                                   "a mirrored disk template")
5389

    
5390
    target_node = secondary_nodes[0]
5391
    _CheckNodeOnline(self, target_node)
5392
    _CheckNodeNotDrained(self, target_node)
5393
    if instance.admin_up:
5394
      # check memory requirements on the secondary node
5395
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5396
                           instance.name, bep[constants.BE_MEMORY],
5397
                           instance.hypervisor)
5398
    else:
5399
      self.LogInfo("Not checking memory on the secondary node as"
5400
                   " instance will not be started")
5401

    
5402
    # check bridge existance
5403
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5404

    
5405
  def Exec(self, feedback_fn):
5406
    """Failover an instance.
5407

5408
    The failover is done by shutting it down on its present node and
5409
    starting it on the secondary.
5410

5411
    """
5412
    instance = self.instance
5413

    
5414
    source_node = instance.primary_node
5415
    target_node = instance.secondary_nodes[0]
5416

    
5417
    if instance.admin_up:
5418
      feedback_fn("* checking disk consistency between source and target")
5419
      for dev in instance.disks:
5420
        # for drbd, these are drbd over lvm
5421
        if not _CheckDiskConsistency(self, dev, target_node, False):
5422
          if not self.op.ignore_consistency:
5423
            raise errors.OpExecError("Disk %s is degraded on target node,"
5424
                                     " aborting failover." % dev.iv_name)
5425
    else:
5426
      feedback_fn("* not checking disk consistency as instance is not running")
5427

    
5428
    feedback_fn("* shutting down instance on source node")
5429
    logging.info("Shutting down instance %s on node %s",
5430
                 instance.name, source_node)
5431

    
5432
    result = self.rpc.call_instance_shutdown(source_node, instance,
5433
                                             self.op.shutdown_timeout)
5434
    msg = result.fail_msg
5435
    if msg:
5436
      if self.op.ignore_consistency:
5437
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5438
                             " Proceeding anyway. Please make sure node"
5439
                             " %s is down. Error details: %s",
5440
                             instance.name, source_node, source_node, msg)
5441
      else:
5442
        raise errors.OpExecError("Could not shutdown instance %s on"
5443
                                 " node %s: %s" %
5444
                                 (instance.name, source_node, msg))
5445

    
5446
    feedback_fn("* deactivating the instance's disks on source node")
5447
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5448
      raise errors.OpExecError("Can't shut down the instance's disks.")
5449

    
5450
    instance.primary_node = target_node
5451
    # distribute new instance config to the other nodes
5452
    self.cfg.Update(instance, feedback_fn)
5453

    
5454
    # Only start the instance if it's marked as up
5455
    if instance.admin_up:
5456
      feedback_fn("* activating the instance's disks on target node")
5457
      logging.info("Starting instance %s on node %s",
5458
                   instance.name, target_node)
5459

    
5460
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5461
                                           ignore_secondaries=True)
5462
      if not disks_ok:
5463
        _ShutdownInstanceDisks(self, instance)
5464
        raise errors.OpExecError("Can't activate the instance's disks")
5465

    
5466
      feedback_fn("* starting the instance on the target node")
5467
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5468
      msg = result.fail_msg
5469
      if msg:
5470
        _ShutdownInstanceDisks(self, instance)
5471
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5472
                                 (instance.name, target_node, msg))
5473

    
5474

    
5475
class LUMigrateInstance(LogicalUnit):
5476
  """Migrate an instance.
5477

5478
  This is migration without shutting down, compared to the failover,
5479
  which is done with shutdown.
5480

5481
  """
5482
  HPATH = "instance-migrate"
5483
  HTYPE = constants.HTYPE_INSTANCE
5484
  _OP_PARAMS = [
5485
    _PInstanceName,
5486
    ("live", True, _TBool),
5487
    ("cleanup", False, _TBool),
5488
    ]
5489

    
5490
  REQ_BGL = False
5491

    
5492
  def ExpandNames(self):
5493
    self._ExpandAndLockInstance()
5494

    
5495
    self.needed_locks[locking.LEVEL_NODE] = []
5496
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5497

    
5498
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5499
                                       self.op.live, self.op.cleanup)
5500
    self.tasklets = [self._migrater]
5501

    
5502
  def DeclareLocks(self, level):
5503
    if level == locking.LEVEL_NODE:
5504
      self._LockInstancesNodes()
5505

    
5506
  def BuildHooksEnv(self):
5507
    """Build hooks env.
5508

5509
    This runs on master, primary and secondary nodes of the instance.
5510

5511
    """
5512
    instance = self._migrater.instance
5513
    source_node = instance.primary_node
5514
    target_node = instance.secondary_nodes[0]
5515
    env = _BuildInstanceHookEnvByObject(self, instance)
5516
    env["MIGRATE_LIVE"] = self.op.live
5517
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5518
    env.update({
5519
        "OLD_PRIMARY": source_node,
5520
        "OLD_SECONDARY": target_node,
5521
        "NEW_PRIMARY": target_node,
5522
        "NEW_SECONDARY": source_node,
5523
        })
5524
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5525
    nl_post = list(nl)
5526
    nl_post.append(source_node)
5527
    return env, nl, nl_post
5528

    
5529

    
5530
class LUMoveInstance(LogicalUnit):
5531
  """Move an instance by data-copying.
5532

5533
  """
5534
  HPATH = "instance-move"
5535
  HTYPE = constants.HTYPE_INSTANCE
5536
  _OP_PARAMS = [
5537
    _PInstanceName,
5538
    ("target_node", _NoDefault, _TNonEmptyString),
5539
    _PShutdownTimeout,
5540
    ]
5541
  REQ_BGL = False
5542

    
5543
  def ExpandNames(self):
5544
    self._ExpandAndLockInstance()
5545
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5546
    self.op.target_node = target_node
5547
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5548
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5549

    
5550
  def DeclareLocks(self, level):
5551
    if level == locking.LEVEL_NODE:
5552
      self._LockInstancesNodes(primary_only=True)
5553

    
5554
  def BuildHooksEnv(self):
5555
    """Build hooks env.
5556

5557
    This runs on master, primary and secondary nodes of the instance.
5558

5559
    """
5560
    env = {
5561
      "TARGET_NODE": self.op.target_node,
5562
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5563
      }
5564
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5565
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5566
                                       self.op.target_node]
5567
    return env, nl, nl
5568

    
5569
  def CheckPrereq(self):
5570
    """Check prerequisites.
5571

5572
    This checks that the instance is in the cluster.
5573

5574
    """
5575
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5576
    assert self.instance is not None, \
5577
      "Cannot retrieve locked instance %s" % self.op.instance_name
5578

    
5579
    node = self.cfg.GetNodeInfo(self.op.target_node)
5580
    assert node is not None, \
5581
      "Cannot retrieve locked node %s" % self.op.target_node
5582

    
5583
    self.target_node = target_node = node.name
5584

    
5585
    if target_node == instance.primary_node:
5586
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5587
                                 (instance.name, target_node),
5588
                                 errors.ECODE_STATE)
5589

    
5590
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5591

    
5592
    for idx, dsk in enumerate(instance.disks):
5593
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5594
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5595
                                   " cannot copy" % idx, errors.ECODE_STATE)
5596

    
5597
    _CheckNodeOnline(self, target_node)
5598
    _CheckNodeNotDrained(self, target_node)
5599

    
5600
    if instance.admin_up:
5601
      # check memory requirements on the secondary node
5602
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5603
                           instance.name, bep[constants.BE_MEMORY],
5604
                           instance.hypervisor)
5605
    else:
5606
      self.LogInfo("Not checking memory on the secondary node as"
5607
                   " instance will not be started")
5608

    
5609
    # check bridge existance
5610
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5611

    
5612
  def Exec(self, feedback_fn):
5613
    """Move an instance.
5614

5615
    The move is done by shutting it down on its present node, copying
5616
    the data over (slow) and starting it on the new node.
5617

5618
    """
5619
    instance = self.instance
5620

    
5621
    source_node = instance.primary_node
5622
    target_node = self.target_node
5623

    
5624
    self.LogInfo("Shutting down instance %s on source node %s",
5625
                 instance.name, source_node)
5626

    
5627
    result = self.rpc.call_instance_shutdown(source_node, instance,
5628
                                             self.op.shutdown_timeout)
5629
    msg = result.fail_msg
5630
    if msg:
5631
      if self.op.ignore_consistency:
5632
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5633
                             " Proceeding anyway. Please make sure node"
5634
                             " %s is down. Error details: %s",
5635
                             instance.name, source_node, source_node, msg)
5636
      else:
5637
        raise errors.OpExecError("Could not shutdown instance %s on"
5638
                                 " node %s: %s" %
5639
                                 (instance.name, source_node, msg))
5640

    
5641
    # create the target disks
5642
    try:
5643
      _CreateDisks(self, instance, target_node=target_node)
5644
    except errors.OpExecError:
5645
      self.LogWarning("Device creation failed, reverting...")
5646
      try:
5647
        _RemoveDisks(self, instance, target_node=target_node)
5648
      finally:
5649
        self.cfg.ReleaseDRBDMinors(instance.name)
5650
        raise
5651

    
5652
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5653

    
5654
    errs = []
5655
    # activate, get path, copy the data over
5656
    for idx, disk in enumerate(instance.disks):
5657
      self.LogInfo("Copying data for disk %d", idx)
5658
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5659
                                               instance.name, True)
5660
      if result.fail_msg:
5661
        self.LogWarning("Can't assemble newly created disk %d: %s",
5662
                        idx, result.fail_msg)
5663
        errs.append(result.fail_msg)
5664
        break
5665
      dev_path = result.payload
5666
      result = self.rpc.call_blockdev_export(source_node, disk,
5667
                                             target_node, dev_path,
5668
                                             cluster_name)
5669
      if result.fail_msg:
5670
        self.LogWarning("Can't copy data over for disk %d: %s",
5671
                        idx, result.fail_msg)
5672
        errs.append(result.fail_msg)
5673
        break
5674

    
5675
    if errs:
5676
      self.LogWarning("Some disks failed to copy, aborting")
5677
      try:
5678
        _RemoveDisks(self, instance, target_node=target_node)
5679
      finally:
5680
        self.cfg.ReleaseDRBDMinors(instance.name)
5681
        raise errors.OpExecError("Errors during disk copy: %s" %
5682
                                 (",".join(errs),))
5683

    
5684
    instance.primary_node = target_node
5685
    self.cfg.Update(instance, feedback_fn)
5686

    
5687
    self.LogInfo("Removing the disks on the original node")
5688
    _RemoveDisks(self, instance, target_node=source_node)
5689

    
5690
    # Only start the instance if it's marked as up
5691
    if instance.admin_up:
5692
      self.LogInfo("Starting instance %s on node %s",
5693
                   instance.name, target_node)
5694

    
5695
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5696
                                           ignore_secondaries=True)
5697
      if not disks_ok:
5698
        _ShutdownInstanceDisks(self, instance)
5699
        raise errors.OpExecError("Can't activate the instance's disks")
5700

    
5701
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5702
      msg = result.fail_msg
5703
      if msg:
5704
        _ShutdownInstanceDisks(self, instance)
5705
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5706
                                 (instance.name, target_node, msg))
5707

    
5708

    
5709
class LUMigrateNode(LogicalUnit):
5710
  """Migrate all instances from a node.
5711

5712
  """
5713
  HPATH = "node-migrate"
5714
  HTYPE = constants.HTYPE_NODE
5715
  _OP_PARAMS = [
5716
    _PNodeName,
5717
    ("live", False, _TBool),
5718
    ]
5719
  REQ_BGL = False
5720

    
5721
  def ExpandNames(self):
5722
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5723

    
5724
    self.needed_locks = {
5725
      locking.LEVEL_NODE: [self.op.node_name],
5726
      }
5727

    
5728
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5729

    
5730
    # Create tasklets for migrating instances for all instances on this node
5731
    names = []
5732
    tasklets = []
5733

    
5734
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5735
      logging.debug("Migrating instance %s", inst.name)
5736
      names.append(inst.name)
5737

    
5738
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5739

    
5740
    self.tasklets = tasklets
5741

    
5742
    # Declare instance locks
5743
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5744

    
5745
  def DeclareLocks(self, level):
5746
    if level == locking.LEVEL_NODE:
5747
      self._LockInstancesNodes()
5748

    
5749
  def BuildHooksEnv(self):
5750
    """Build hooks env.
5751

5752
    This runs on the master, the primary and all the secondaries.
5753

5754
    """
5755
    env = {
5756
      "NODE_NAME": self.op.node_name,
5757
      }
5758

    
5759
    nl = [self.cfg.GetMasterNode()]
5760

    
5761
    return (env, nl, nl)
5762

    
5763

    
5764
class TLMigrateInstance(Tasklet):
5765
  def __init__(self, lu, instance_name, live, cleanup):
5766
    """Initializes this class.
5767

5768
    """
5769
    Tasklet.__init__(self, lu)
5770

    
5771
    # Parameters
5772
    self.instance_name = instance_name
5773
    self.live = live
5774
    self.cleanup = cleanup
5775

    
5776
  def CheckPrereq(self):
5777
    """Check prerequisites.
5778

5779
    This checks that the instance is in the cluster.
5780

5781
    """
5782
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5783
    instance = self.cfg.GetInstanceInfo(instance_name)
5784
    assert instance is not None
5785

    
5786
    if instance.disk_template != constants.DT_DRBD8:
5787
      raise errors.OpPrereqError("Instance's disk layout is not"
5788
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5789

    
5790
    secondary_nodes = instance.secondary_nodes
5791
    if not secondary_nodes:
5792
      raise errors.ConfigurationError("No secondary node but using"
5793
                                      " drbd8 disk template")
5794

    
5795
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5796

    
5797
    target_node = secondary_nodes[0]
5798
    # check memory requirements on the secondary node
5799
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5800
                         instance.name, i_be[constants.BE_MEMORY],
5801
                         instance.hypervisor)
5802

    
5803
    # check bridge existance
5804
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5805

    
5806
    if not self.cleanup:
5807
      _CheckNodeNotDrained(self.lu, target_node)
5808
      result = self.rpc.call_instance_migratable(instance.primary_node,
5809
                                                 instance)
5810
      result.Raise("Can't migrate, please use failover",
5811
                   prereq=True, ecode=errors.ECODE_STATE)
5812

    
5813
    self.instance = instance
5814

    
5815
  def _WaitUntilSync(self):
5816
    """Poll with custom rpc for disk sync.
5817

5818
    This uses our own step-based rpc call.
5819

5820
    """
5821
    self.feedback_fn("* wait until resync is done")
5822
    all_done = False
5823
    while not all_done:
5824
      all_done = True
5825
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5826
                                            self.nodes_ip,
5827
                                            self.instance.disks)
5828
      min_percent = 100
5829
      for node, nres in result.items():
5830
        nres.Raise("Cannot resync disks on node %s" % node)
5831
        node_done, node_percent = nres.payload
5832
        all_done = all_done and node_done
5833
        if node_percent is not None:
5834
          min_percent = min(min_percent, node_percent)
5835
      if not all_done:
5836
        if min_percent < 100:
5837
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5838
        time.sleep(2)
5839

    
5840
  def _EnsureSecondary(self, node):
5841
    """Demote a node to secondary.
5842

5843
    """
5844
    self.feedback_fn("* switching node %s to secondary mode" % node)
5845

    
5846
    for dev in self.instance.disks:
5847
      self.cfg.SetDiskID(dev, node)
5848

    
5849
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5850
                                          self.instance.disks)
5851
    result.Raise("Cannot change disk to secondary on node %s" % node)
5852

    
5853
  def _GoStandalone(self):
5854
    """Disconnect from the network.
5855

5856
    """
5857
    self.feedback_fn("* changing into standalone mode")
5858
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5859
                                               self.instance.disks)
5860
    for node, nres in result.items():
5861
      nres.Raise("Cannot disconnect disks node %s" % node)
5862

    
5863
  def _GoReconnect(self, multimaster):
5864
    """Reconnect to the network.
5865

5866
    """
5867
    if multimaster:
5868
      msg = "dual-master"
5869
    else:
5870
      msg = "single-master"
5871
    self.feedback_fn("* changing disks into %s mode" % msg)
5872
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5873
                                           self.instance.disks,
5874
                                           self.instance.name, multimaster)
5875
    for node, nres in result.items():
5876
      nres.Raise("Cannot change disks config on node %s" % node)
5877

    
5878
  def _ExecCleanup(self):
5879
    """Try to cleanup after a failed migration.
5880

5881
    The cleanup is done by:
5882
      - check that the instance is running only on one node
5883
        (and update the config if needed)
5884
      - change disks on its secondary node to secondary
5885
      - wait until disks are fully synchronized
5886
      - disconnect from the network
5887
      - change disks into single-master mode
5888
      - wait again until disks are fully synchronized
5889

5890
    """
5891
    instance = self.instance
5892
    target_node = self.target_node
5893
    source_node = self.source_node
5894

    
5895
    # check running on only one node
5896
    self.feedback_fn("* checking where the instance actually runs"
5897
                     " (if this hangs, the hypervisor might be in"
5898
                     " a bad state)")
5899
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5900
    for node, result in ins_l.items():
5901
      result.Raise("Can't contact node %s" % node)
5902

    
5903
    runningon_source = instance.name in ins_l[source_node].payload
5904
    runningon_target = instance.name in ins_l[target_node].payload
5905

    
5906
    if runningon_source and runningon_target:
5907
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5908
                               " or the hypervisor is confused. You will have"
5909
                               " to ensure manually that it runs only on one"
5910
                               " and restart this operation.")
5911

    
5912
    if not (runningon_source or runningon_target):
5913
      raise errors.OpExecError("Instance does not seem to be running at all."
5914
                               " In this case, it's safer to repair by"
5915
                               " running 'gnt-instance stop' to ensure disk"
5916
                               " shutdown, and then restarting it.")
5917

    
5918
    if runningon_target:
5919
      # the migration has actually succeeded, we need to update the config
5920
      self.feedback_fn("* instance running on secondary node (%s),"
5921
                       " updating config" % target_node)
5922
      instance.primary_node = target_node
5923
      self.cfg.Update(instance, self.feedback_fn)
5924
      demoted_node = source_node
5925
    else:
5926
      self.feedback_fn("* instance confirmed to be running on its"
5927
                       " primary node (%s)" % source_node)
5928
      demoted_node = target_node
5929

    
5930
    self._EnsureSecondary(demoted_node)
5931
    try:
5932
      self._WaitUntilSync()
5933
    except errors.OpExecError:
5934
      # we ignore here errors, since if the device is standalone, it
5935
      # won't be able to sync
5936
      pass
5937
    self._GoStandalone()
5938
    self._GoReconnect(False)
5939
    self._WaitUntilSync()
5940

    
5941
    self.feedback_fn("* done")
5942

    
5943
  def _RevertDiskStatus(self):
5944
    """Try to revert the disk status after a failed migration.
5945

5946
    """
5947
    target_node = self.target_node
5948
    try:
5949
      self._EnsureSecondary(target_node)
5950
      self._GoStandalone()
5951
      self._GoReconnect(False)
5952
      self._WaitUntilSync()
5953
    except errors.OpExecError, err:
5954
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5955
                         " drives: error '%s'\n"
5956
                         "Please look and recover the instance status" %
5957
                         str(err))
5958

    
5959
  def _AbortMigration(self):
5960
    """Call the hypervisor code to abort a started migration.
5961

5962
    """
5963
    instance = self.instance
5964
    target_node = self.target_node
5965
    migration_info = self.migration_info
5966

    
5967
    abort_result = self.rpc.call_finalize_migration(target_node,
5968
                                                    instance,
5969
                                                    migration_info,
5970
                                                    False)
5971
    abort_msg = abort_result.fail_msg
5972
    if abort_msg:
5973
      logging.error("Aborting migration failed on target node %s: %s",
5974
                    target_node, abort_msg)
5975
      # Don't raise an exception here, as we stil have to try to revert the
5976
      # disk status, even if this step failed.
5977

    
5978
  def _ExecMigration(self):
5979
    """Migrate an instance.
5980

5981
    The migrate is done by:
5982
      - change the disks into dual-master mode
5983
      - wait until disks are fully synchronized again
5984
      - migrate the instance
5985
      - change disks on the new secondary node (the old primary) to secondary
5986
      - wait until disks are fully synchronized
5987
      - change disks into single-master mode
5988

5989
    """
5990
    instance = self.instance
5991
    target_node = self.target_node
5992
    source_node = self.source_node
5993

    
5994
    self.feedback_fn("* checking disk consistency between source and target")
5995
    for dev in instance.disks:
5996
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5997
        raise errors.OpExecError("Disk %s is degraded or not fully"
5998
                                 " synchronized on target node,"
5999
                                 " aborting migrate." % dev.iv_name)
6000

    
6001
    # First get the migration information from the remote node
6002
    result = self.rpc.call_migration_info(source_node, instance)
6003
    msg = result.fail_msg
6004
    if msg:
6005
      log_err = ("Failed fetching source migration information from %s: %s" %
6006
                 (source_node, msg))
6007
      logging.error(log_err)
6008
      raise errors.OpExecError(log_err)
6009

    
6010
    self.migration_info = migration_info = result.payload
6011

    
6012
    # Then switch the disks to master/master mode
6013
    self._EnsureSecondary(target_node)
6014
    self._GoStandalone()
6015
    self._GoReconnect(True)
6016
    self._WaitUntilSync()
6017

    
6018
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6019
    result = self.rpc.call_accept_instance(target_node,
6020
                                           instance,
6021
                                           migration_info,
6022
                                           self.nodes_ip[target_node])
6023

    
6024
    msg = result.fail_msg
6025
    if msg:
6026
      logging.error("Instance pre-migration failed, trying to revert"
6027
                    " disk status: %s", msg)
6028
      self.feedback_fn("Pre-migration failed, aborting")
6029
      self._AbortMigration()
6030
      self._RevertDiskStatus()
6031
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6032
                               (instance.name, msg))
6033

    
6034
    self.feedback_fn("* migrating instance to %s" % target_node)
6035
    time.sleep(10)
6036
    result = self.rpc.call_instance_migrate(source_node, instance,
6037
                                            self.nodes_ip[target_node],
6038
                                            self.live)
6039
    msg = result.fail_msg
6040
    if msg:
6041
      logging.error("Instance migration failed, trying to revert"
6042
                    " disk status: %s", msg)
6043
      self.feedback_fn("Migration failed, aborting")
6044
      self._AbortMigration()
6045
      self._RevertDiskStatus()
6046
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6047
                               (instance.name, msg))
6048
    time.sleep(10)
6049

    
6050
    instance.primary_node = target_node
6051
    # distribute new instance config to the other nodes
6052
    self.cfg.Update(instance, self.feedback_fn)
6053

    
6054
    result = self.rpc.call_finalize_migration(target_node,
6055
                                              instance,
6056
                                              migration_info,
6057
                                              True)
6058
    msg = result.fail_msg
6059
    if msg:
6060
      logging.error("Instance migration succeeded, but finalization failed:"
6061
                    " %s", msg)
6062
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6063
                               msg)
6064

    
6065
    self._EnsureSecondary(source_node)
6066
    self._WaitUntilSync()
6067
    self._GoStandalone()
6068
    self._GoReconnect(False)
6069
    self._WaitUntilSync()
6070

    
6071
    self.feedback_fn("* done")
6072

    
6073
  def Exec(self, feedback_fn):
6074
    """Perform the migration.
6075

6076
    """
6077
    feedback_fn("Migrating instance %s" % self.instance.name)
6078

    
6079
    self.feedback_fn = feedback_fn
6080

    
6081
    self.source_node = self.instance.primary_node
6082
    self.target_node = self.instance.secondary_nodes[0]
6083
    self.all_nodes = [self.source_node, self.target_node]
6084
    self.nodes_ip = {
6085
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6086
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6087
      }
6088

    
6089
    if self.cleanup:
6090
      return self._ExecCleanup()
6091
    else:
6092
      return self._ExecMigration()
6093

    
6094

    
6095
def _CreateBlockDev(lu, node, instance, device, force_create,
6096
                    info, force_open):
6097
  """Create a tree of block devices on a given node.
6098

6099
  If this device type has to be created on secondaries, create it and
6100
  all its children.
6101

6102
  If not, just recurse to children keeping the same 'force' value.
6103

6104
  @param lu: the lu on whose behalf we execute
6105
  @param node: the node on which to create the device
6106
  @type instance: L{objects.Instance}
6107
  @param instance: the instance which owns the device
6108
  @type device: L{objects.Disk}
6109
  @param device: the device to create
6110
  @type force_create: boolean
6111
  @param force_create: whether to force creation of this device; this
6112
      will be change to True whenever we find a device which has
6113
      CreateOnSecondary() attribute
6114
  @param info: the extra 'metadata' we should attach to the device
6115
      (this will be represented as a LVM tag)
6116
  @type force_open: boolean
6117
  @param force_open: this parameter will be passes to the
6118
      L{backend.BlockdevCreate} function where it specifies
6119
      whether we run on primary or not, and it affects both
6120
      the child assembly and the device own Open() execution
6121

6122
  """
6123
  if device.CreateOnSecondary():
6124
    force_create = True
6125

    
6126
  if device.children:
6127
    for child in device.children:
6128
      _CreateBlockDev(lu, node, instance, child, force_create,
6129
                      info, force_open)
6130

    
6131
  if not force_create:
6132
    return
6133

    
6134
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6135

    
6136

    
6137
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6138
  """Create a single block device on a given node.
6139

6140
  This will not recurse over children of the device, so they must be
6141
  created in advance.
6142

6143
  @param lu: the lu on whose behalf we execute
6144
  @param node: the node on which to create the device
6145
  @type instance: L{objects.Instance}
6146
  @param instance: the instance which owns the device
6147
  @type device: L{objects.Disk}
6148
  @param device: the device to create
6149
  @param info: the extra 'metadata' we should attach to the device
6150
      (this will be represented as a LVM tag)
6151
  @type force_open: boolean
6152
  @param force_open: this parameter will be passes to the
6153
      L{backend.BlockdevCreate} function where it specifies
6154
      whether we run on primary or not, and it affects both
6155
      the child assembly and the device own Open() execution
6156

6157
  """
6158
  lu.cfg.SetDiskID(device, node)
6159
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6160
                                       instance.name, force_open, info)
6161
  result.Raise("Can't create block device %s on"
6162
               " node %s for instance %s" % (device, node, instance.name))
6163
  if device.physical_id is None:
6164
    device.physical_id = result.payload
6165

    
6166

    
6167
def _GenerateUniqueNames(lu, exts):
6168
  """Generate a suitable LV name.
6169

6170
  This will generate a logical volume name for the given instance.
6171

6172
  """
6173
  results = []
6174
  for val in exts:
6175
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6176
    results.append("%s%s" % (new_id, val))
6177
  return results
6178

    
6179

    
6180
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6181
                         p_minor, s_minor):
6182
  """Generate a drbd8 device complete with its children.
6183

6184
  """
6185
  port = lu.cfg.AllocatePort()
6186
  vgname = lu.cfg.GetVGName()
6187
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6188
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6189
                          logical_id=(vgname, names[0]))
6190
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6191
                          logical_id=(vgname, names[1]))
6192
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6193
                          logical_id=(primary, secondary, port,
6194
                                      p_minor, s_minor,
6195
                                      shared_secret),
6196
                          children=[dev_data, dev_meta],
6197
                          iv_name=iv_name)
6198
  return drbd_dev
6199

    
6200

    
6201
def _GenerateDiskTemplate(lu, template_name,
6202
                          instance_name, primary_node,
6203
                          secondary_nodes, disk_info,
6204
                          file_storage_dir, file_driver,
6205
                          base_index):
6206
  """Generate the entire disk layout for a given template type.
6207

6208
  """
6209
  #TODO: compute space requirements
6210

    
6211
  vgname = lu.cfg.GetVGName()
6212
  disk_count = len(disk_info)
6213
  disks = []
6214
  if template_name == constants.DT_DISKLESS:
6215
    pass
6216
  elif template_name == constants.DT_PLAIN:
6217
    if len(secondary_nodes) != 0:
6218
      raise errors.ProgrammerError("Wrong template configuration")
6219

    
6220
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6221
                                      for i in range(disk_count)])
6222
    for idx, disk in enumerate(disk_info):
6223
      disk_index = idx + base_index
6224
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6225
                              logical_id=(vgname, names[idx]),
6226
                              iv_name="disk/%d" % disk_index,
6227
                              mode=disk["mode"])
6228
      disks.append(disk_dev)
6229
  elif template_name == constants.DT_DRBD8:
6230
    if len(secondary_nodes) != 1:
6231
      raise errors.ProgrammerError("Wrong template configuration")
6232
    remote_node = secondary_nodes[0]
6233
    minors = lu.cfg.AllocateDRBDMinor(
6234
      [primary_node, remote_node] * len(disk_info), instance_name)
6235

    
6236
    names = []
6237
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6238
                                               for i in range(disk_count)]):
6239
      names.append(lv_prefix + "_data")
6240
      names.append(lv_prefix + "_meta")
6241
    for idx, disk in enumerate(disk_info):
6242
      disk_index = idx + base_index
6243
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6244
                                      disk["size"], names[idx*2:idx*2+2],
6245
                                      "disk/%d" % disk_index,
6246
                                      minors[idx*2], minors[idx*2+1])
6247
      disk_dev.mode = disk["mode"]
6248
      disks.append(disk_dev)
6249
  elif template_name == constants.DT_FILE:
6250
    if len(secondary_nodes) != 0:
6251
      raise errors.ProgrammerError("Wrong template configuration")
6252

    
6253
    _RequireFileStorage()
6254

    
6255
    for idx, disk in enumerate(disk_info):
6256
      disk_index = idx + base_index
6257
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6258
                              iv_name="disk/%d" % disk_index,
6259
                              logical_id=(file_driver,
6260
                                          "%s/disk%d" % (file_storage_dir,
6261
                                                         disk_index)),
6262
                              mode=disk["mode"])
6263
      disks.append(disk_dev)
6264
  else:
6265
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6266
  return disks
6267

    
6268

    
6269
def _GetInstanceInfoText(instance):
6270
  """Compute that text that should be added to the disk's metadata.
6271

6272
  """
6273
  return "originstname+%s" % instance.name
6274

    
6275

    
6276
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6277
  """Create all disks for an instance.
6278

6279
  This abstracts away some work from AddInstance.
6280

6281
  @type lu: L{LogicalUnit}
6282
  @param lu: the logical unit on whose behalf we execute
6283
  @type instance: L{objects.Instance}
6284
  @param instance: the instance whose disks we should create
6285
  @type to_skip: list
6286
  @param to_skip: list of indices to skip
6287
  @type target_node: string
6288
  @param target_node: if passed, overrides the target node for creation
6289
  @rtype: boolean
6290
  @return: the success of the creation
6291

6292
  """
6293
  info = _GetInstanceInfoText(instance)
6294
  if target_node is None:
6295
    pnode = instance.primary_node
6296
    all_nodes = instance.all_nodes
6297
  else:
6298
    pnode = target_node
6299
    all_nodes = [pnode]
6300

    
6301
  if instance.disk_template == constants.DT_FILE:
6302
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6303
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6304

    
6305
    result.Raise("Failed to create directory '%s' on"
6306
                 " node %s" % (file_storage_dir, pnode))
6307

    
6308
  # Note: this needs to be kept in sync with adding of disks in
6309
  # LUSetInstanceParams
6310
  for idx, device in enumerate(instance.disks):
6311
    if to_skip and idx in to_skip:
6312
      continue
6313
    logging.info("Creating volume %s for instance %s",
6314
                 device.iv_name, instance.name)
6315
    #HARDCODE
6316
    for node in all_nodes:
6317
      f_create = node == pnode
6318
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6319

    
6320

    
6321
def _RemoveDisks(lu, instance, target_node=None):
6322
  """Remove all disks for an instance.
6323

6324
  This abstracts away some work from `AddInstance()` and
6325
  `RemoveInstance()`. Note that in case some of the devices couldn't
6326
  be removed, the removal will continue with the other ones (compare
6327
  with `_CreateDisks()`).
6328

6329
  @type lu: L{LogicalUnit}
6330
  @param lu: the logical unit on whose behalf we execute
6331
  @type instance: L{objects.Instance}
6332
  @param instance: the instance whose disks we should remove
6333
  @type target_node: string
6334
  @param target_node: used to override the node on which to remove the disks
6335
  @rtype: boolean
6336
  @return: the success of the removal
6337

6338
  """
6339
  logging.info("Removing block devices for instance %s", instance.name)
6340

    
6341
  all_result = True
6342
  for device in instance.disks:
6343
    if target_node:
6344
      edata = [(target_node, device)]
6345
    else:
6346
      edata = device.ComputeNodeTree(instance.primary_node)
6347
    for node, disk in edata:
6348
      lu.cfg.SetDiskID(disk, node)
6349
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6350
      if msg:
6351
        lu.LogWarning("Could not remove block device %s on node %s,"
6352
                      " continuing anyway: %s", device.iv_name, node, msg)
6353
        all_result = False
6354

    
6355
  if instance.disk_template == constants.DT_FILE:
6356
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6357
    if target_node:
6358
      tgt = target_node
6359
    else:
6360
      tgt = instance.primary_node
6361
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6362
    if result.fail_msg:
6363
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6364
                    file_storage_dir, instance.primary_node, result.fail_msg)
6365
      all_result = False
6366

    
6367
  return all_result
6368

    
6369

    
6370
def _ComputeDiskSize(disk_template, disks):
6371
  """Compute disk size requirements in the volume group
6372

6373
  """
6374
  # Required free disk space as a function of disk and swap space
6375
  req_size_dict = {
6376
    constants.DT_DISKLESS: None,
6377
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6378
    # 128 MB are added for drbd metadata for each disk
6379
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6380
    constants.DT_FILE: None,
6381
  }
6382

    
6383
  if disk_template not in req_size_dict:
6384
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6385
                                 " is unknown" %  disk_template)
6386

    
6387
  return req_size_dict[disk_template]
6388

    
6389

    
6390
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6391
  """Hypervisor parameter validation.
6392

6393
  This function abstract the hypervisor parameter validation to be
6394
  used in both instance create and instance modify.
6395

6396
  @type lu: L{LogicalUnit}
6397
  @param lu: the logical unit for which we check
6398
  @type nodenames: list
6399
  @param nodenames: the list of nodes on which we should check
6400
  @type hvname: string
6401
  @param hvname: the name of the hypervisor we should use
6402
  @type hvparams: dict
6403
  @param hvparams: the parameters which we need to check
6404
  @raise errors.OpPrereqError: if the parameters are not valid
6405

6406
  """
6407
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6408
                                                  hvname,
6409
                                                  hvparams)
6410
  for node in nodenames:
6411
    info = hvinfo[node]
6412
    if info.offline:
6413
      continue
6414
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6415

    
6416

    
6417
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6418
  """OS parameters validation.
6419

6420
  @type lu: L{LogicalUnit}
6421
  @param lu: the logical unit for which we check
6422
  @type required: boolean
6423
  @param required: whether the validation should fail if the OS is not
6424
      found
6425
  @type nodenames: list
6426
  @param nodenames: the list of nodes on which we should check
6427
  @type osname: string
6428
  @param osname: the name of the hypervisor we should use
6429
  @type osparams: dict
6430
  @param osparams: the parameters which we need to check
6431
  @raise errors.OpPrereqError: if the parameters are not valid
6432

6433
  """
6434
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6435
                                   [constants.OS_VALIDATE_PARAMETERS],
6436
                                   osparams)
6437
  for node, nres in result.items():
6438
    # we don't check for offline cases since this should be run only
6439
    # against the master node and/or an instance's nodes
6440
    nres.Raise("OS Parameters validation failed on node %s" % node)
6441
    if not nres.payload:
6442
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6443
                 osname, node)
6444

    
6445

    
6446
class LUCreateInstance(LogicalUnit):
6447
  """Create an instance.
6448

6449
  """
6450
  HPATH = "instance-add"
6451
  HTYPE = constants.HTYPE_INSTANCE
6452
  _OP_PARAMS = [
6453
    _PInstanceName,
6454
    ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6455
    ("start", True, _TBool),
6456
    ("wait_for_sync", True, _TBool),
6457
    ("ip_check", True, _TBool),
6458
    ("name_check", True, _TBool),
6459
    ("disks", _NoDefault, _TListOf(_TDict)),
6460
    ("nics", _NoDefault, _TListOf(_TDict)),
6461
    ("hvparams", _EmptyDict, _TDict),
6462
    ("beparams", _EmptyDict, _TDict),
6463
    ("osparams", _EmptyDict, _TDict),
6464
    ("no_install", None, _TMaybeBool),
6465
    ("os_type", None, _TMaybeString),
6466
    ("force_variant", False, _TBool),
6467
    ("source_handshake", None, _TOr(_TList, _TNone)),
6468
    ("source_x509_ca", None, _TOr(_TList, _TNone)),
6469
    ("source_instance_name", None, _TMaybeString),
6470
    ("src_node", None, _TMaybeString),
6471
    ("src_path", None, _TMaybeString),
6472
    ("pnode", None, _TMaybeString),
6473
    ("snode", None, _TMaybeString),
6474
    ("iallocator", None, _TMaybeString),
6475
    ("hypervisor", None, _TMaybeString),
6476
    ("disk_template", _NoDefault, _CheckDiskTemplate),
6477
    ("identify_defaults", False, _TBool),
6478
    ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6479
    ("file_storage_dir", None, _TMaybeString),
6480
    ("dry_run", False, _TBool),
6481
    ]
6482
  REQ_BGL = False
6483

    
6484
  def CheckArguments(self):
6485
    """Check arguments.
6486

6487
    """
6488
    # do not require name_check to ease forward/backward compatibility
6489
    # for tools
6490
    if self.op.no_install and self.op.start:
6491
      self.LogInfo("No-installation mode selected, disabling startup")
6492
      self.op.start = False
6493
    # validate/normalize the instance name
6494
    self.op.instance_name = \
6495
      netutils.HostInfo.NormalizeName(self.op.instance_name)
6496

    
6497
    if self.op.ip_check and not self.op.name_check:
6498
      # TODO: make the ip check more flexible and not depend on the name check
6499
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6500
                                 errors.ECODE_INVAL)
6501

    
6502
    # check nics' parameter names
6503
    for nic in self.op.nics:
6504
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6505

    
6506
    # check disks. parameter names and consistent adopt/no-adopt strategy
6507
    has_adopt = has_no_adopt = False
6508
    for disk in self.op.disks:
6509
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6510
      if "adopt" in disk:
6511
        has_adopt = True
6512
      else:
6513
        has_no_adopt = True
6514
    if has_adopt and has_no_adopt:
6515
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6516
                                 errors.ECODE_INVAL)
6517
    if has_adopt:
6518
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6519
        raise errors.OpPrereqError("Disk adoption is not supported for the"
6520
                                   " '%s' disk template" %
6521
                                   self.op.disk_template,
6522
                                   errors.ECODE_INVAL)
6523
      if self.op.iallocator is not None:
6524
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6525
                                   " iallocator script", errors.ECODE_INVAL)
6526
      if self.op.mode == constants.INSTANCE_IMPORT:
6527
        raise errors.OpPrereqError("Disk adoption not allowed for"
6528
                                   " instance import", errors.ECODE_INVAL)
6529

    
6530
    self.adopt_disks = has_adopt
6531

    
6532
    # instance name verification
6533
    if self.op.name_check:
6534
      self.hostname1 = netutils.GetHostInfo(self.op.instance_name)
6535
      self.op.instance_name = self.hostname1.name
6536
      # used in CheckPrereq for ip ping check
6537
      self.check_ip = self.hostname1.ip
6538
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6539
      raise errors.OpPrereqError("Remote imports require names to be checked" %
6540
                                 errors.ECODE_INVAL)
6541
    else:
6542
      self.check_ip = None
6543

    
6544
    # file storage checks
6545
    if (self.op.file_driver and
6546
        not self.op.file_driver in constants.FILE_DRIVER):
6547
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6548
                                 self.op.file_driver, errors.ECODE_INVAL)
6549

    
6550
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6551
      raise errors.OpPrereqError("File storage directory path not absolute",
6552
                                 errors.ECODE_INVAL)
6553

    
6554
    ### Node/iallocator related checks
6555
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6556

    
6557
    self._cds = _GetClusterDomainSecret()
6558

    
6559
    if self.op.mode == constants.INSTANCE_IMPORT:
6560
      # On import force_variant must be True, because if we forced it at
6561
      # initial install, our only chance when importing it back is that it
6562
      # works again!
6563
      self.op.force_variant = True
6564

    
6565
      if self.op.no_install:
6566
        self.LogInfo("No-installation mode has no effect during import")
6567

    
6568
    elif self.op.mode == constants.INSTANCE_CREATE:
6569
      if self.op.os_type is None:
6570
        raise errors.OpPrereqError("No guest OS specified",
6571
                                   errors.ECODE_INVAL)
6572
      if self.op.disk_template is None:
6573
        raise errors.OpPrereqError("No disk template specified",
6574
                                   errors.ECODE_INVAL)
6575

    
6576
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6577
      # Check handshake to ensure both clusters have the same domain secret
6578
      src_handshake = self.op.source_handshake
6579
      if not src_handshake:
6580
        raise errors.OpPrereqError("Missing source handshake",
6581
                                   errors.ECODE_INVAL)
6582

    
6583
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6584
                                                           src_handshake)
6585
      if errmsg:
6586
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6587
                                   errors.ECODE_INVAL)
6588

    
6589
      # Load and check source CA
6590
      self.source_x509_ca_pem = self.op.source_x509_ca
6591
      if not self.source_x509_ca_pem:
6592
        raise errors.OpPrereqError("Missing source X509 CA",
6593
                                   errors.ECODE_INVAL)
6594

    
6595
      try:
6596
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6597
                                                    self._cds)
6598
      except OpenSSL.crypto.Error, err:
6599
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6600
                                   (err, ), errors.ECODE_INVAL)
6601

    
6602
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6603
      if errcode is not None:
6604
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6605
                                   errors.ECODE_INVAL)
6606

    
6607
      self.source_x509_ca = cert
6608

    
6609
      src_instance_name = self.op.source_instance_name
6610
      if not src_instance_name:
6611
        raise errors.OpPrereqError("Missing source instance name",
6612
                                   errors.ECODE_INVAL)
6613

    
6614
      norm_name = netutils.HostInfo.NormalizeName(src_instance_name)
6615
      self.source_instance_name = netutils.GetHostInfo(norm_name).name
6616

    
6617
    else:
6618
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6619
                                 self.op.mode, errors.ECODE_INVAL)
6620

    
6621
  def ExpandNames(self):
6622
    """ExpandNames for CreateInstance.
6623

6624
    Figure out the right locks for instance creation.
6625

6626
    """
6627
    self.needed_locks = {}
6628

    
6629
    instance_name = self.op.instance_name
6630
    # this is just a preventive check, but someone might still add this
6631
    # instance in the meantime, and creation will fail at lock-add time
6632
    if instance_name in self.cfg.GetInstanceList():
6633
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6634
                                 instance_name, errors.ECODE_EXISTS)
6635

    
6636
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6637

    
6638
    if self.op.iallocator:
6639
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6640
    else:
6641
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6642
      nodelist = [self.op.pnode]
6643
      if self.op.snode is not None:
6644
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6645
        nodelist.append(self.op.snode)
6646
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6647

    
6648
    # in case of import lock the source node too
6649
    if self.op.mode == constants.INSTANCE_IMPORT:
6650
      src_node = self.op.src_node
6651
      src_path = self.op.src_path
6652

    
6653
      if src_path is None:
6654
        self.op.src_path = src_path = self.op.instance_name
6655

    
6656
      if src_node is None:
6657
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6658
        self.op.src_node = None
6659
        if os.path.isabs(src_path):
6660
          raise errors.OpPrereqError("Importing an instance from an absolute"
6661
                                     " path requires a source node option.",
6662
                                     errors.ECODE_INVAL)
6663
      else:
6664
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6665
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6666
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6667
        if not os.path.isabs(src_path):
6668
          self.op.src_path = src_path = \
6669
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6670

    
6671
  def _RunAllocator(self):
6672
    """Run the allocator based on input opcode.
6673

6674
    """
6675
    nics = [n.ToDict() for n in self.nics]
6676
    ial = IAllocator(self.cfg, self.rpc,
6677
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6678
                     name=self.op.instance_name,
6679
                     disk_template=self.op.disk_template,
6680
                     tags=[],
6681
                     os=self.op.os_type,
6682
                     vcpus=self.be_full[constants.BE_VCPUS],
6683
                     mem_size=self.be_full[constants.BE_MEMORY],
6684
                     disks=self.disks,
6685
                     nics=nics,
6686
                     hypervisor=self.op.hypervisor,
6687
                     )
6688

    
6689
    ial.Run(self.op.iallocator)
6690

    
6691
    if not ial.success:
6692
      raise errors.OpPrereqError("Can't compute nodes using"
6693
                                 " iallocator '%s': %s" %
6694
                                 (self.op.iallocator, ial.info),
6695
                                 errors.ECODE_NORES)
6696
    if len(ial.result) != ial.required_nodes:
6697
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6698
                                 " of nodes (%s), required %s" %
6699
                                 (self.op.iallocator, len(ial.result),
6700
                                  ial.required_nodes), errors.ECODE_FAULT)
6701
    self.op.pnode = ial.result[0]
6702
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6703
                 self.op.instance_name, self.op.iallocator,
6704
                 utils.CommaJoin(ial.result))
6705
    if ial.required_nodes == 2:
6706
      self.op.snode = ial.result[1]
6707

    
6708
  def BuildHooksEnv(self):
6709
    """Build hooks env.
6710

6711
    This runs on master, primary and secondary nodes of the instance.
6712

6713
    """
6714
    env = {
6715
      "ADD_MODE": self.op.mode,
6716
      }
6717
    if self.op.mode == constants.INSTANCE_IMPORT:
6718
      env["SRC_NODE"] = self.op.src_node
6719
      env["SRC_PATH"] = self.op.src_path
6720
      env["SRC_IMAGES"] = self.src_images
6721

    
6722
    env.update(_BuildInstanceHookEnv(
6723
      name=self.op.instance_name,
6724
      primary_node=self.op.pnode,
6725
      secondary_nodes=self.secondaries,
6726
      status=self.op.start,
6727
      os_type=self.op.os_type,
6728
      memory=self.be_full[constants.BE_MEMORY],
6729
      vcpus=self.be_full[constants.BE_VCPUS],
6730
      nics=_NICListToTuple(self, self.nics),
6731
      disk_template=self.op.disk_template,
6732
      disks=[(d["size"], d["mode"]) for d in self.disks],
6733
      bep=self.be_full,
6734
      hvp=self.hv_full,
6735
      hypervisor_name=self.op.hypervisor,
6736
    ))
6737

    
6738
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6739
          self.secondaries)
6740
    return env, nl, nl
6741

    
6742
  def _ReadExportInfo(self):
6743
    """Reads the export information from disk.
6744

6745
    It will override the opcode source node and path with the actual
6746
    information, if these two were not specified before.
6747

6748
    @return: the export information
6749

6750
    """
6751
    assert self.op.mode == constants.INSTANCE_IMPORT
6752

    
6753
    src_node = self.op.src_node
6754
    src_path = self.op.src_path
6755

    
6756
    if src_node is None:
6757
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6758
      exp_list = self.rpc.call_export_list(locked_nodes)
6759
      found = False
6760
      for node in exp_list:
6761
        if exp_list[node].fail_msg:
6762
          continue
6763
        if src_path in exp_list[node].payload:
6764
          found = True
6765
          self.op.src_node = src_node = node
6766
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6767
                                                       src_path)
6768
          break
6769
      if not found:
6770
        raise errors.OpPrereqError("No export found for relative path %s" %
6771
                                    src_path, errors.ECODE_INVAL)
6772

    
6773
    _CheckNodeOnline(self, src_node)
6774
    result = self.rpc.call_export_info(src_node, src_path)
6775
    result.Raise("No export or invalid export found in dir %s" % src_path)
6776

    
6777
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6778
    if not export_info.has_section(constants.INISECT_EXP):
6779
      raise errors.ProgrammerError("Corrupted export config",
6780
                                   errors.ECODE_ENVIRON)
6781

    
6782
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6783
    if (int(ei_version) != constants.EXPORT_VERSION):
6784
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6785
                                 (ei_version, constants.EXPORT_VERSION),
6786
                                 errors.ECODE_ENVIRON)
6787
    return export_info
6788

    
6789
  def _ReadExportParams(self, einfo):
6790
    """Use export parameters as defaults.
6791

6792
    In case the opcode doesn't specify (as in override) some instance
6793
    parameters, then try to use them from the export information, if
6794
    that declares them.
6795

6796
    """
6797
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6798

    
6799
    if self.op.disk_template is None:
6800
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6801
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6802
                                          "disk_template")
6803
      else:
6804
        raise errors.OpPrereqError("No disk template specified and the export"
6805
                                   " is missing the disk_template information",
6806
                                   errors.ECODE_INVAL)
6807

    
6808
    if not self.op.disks:
6809
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6810
        disks = []
6811
        # TODO: import the disk iv_name too
6812
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6813
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6814
          disks.append({"size": disk_sz})
6815
        self.op.disks = disks
6816
      else:
6817
        raise errors.OpPrereqError("No disk info specified and the export"
6818
                                   " is missing the disk information",
6819
                                   errors.ECODE_INVAL)
6820

    
6821
    if (not self.op.nics and
6822
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6823
      nics = []
6824
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6825
        ndict = {}
6826
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6827
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6828
          ndict[name] = v
6829
        nics.append(ndict)
6830
      self.op.nics = nics
6831

    
6832
    if (self.op.hypervisor is None and
6833
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6834
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6835
    if einfo.has_section(constants.INISECT_HYP):
6836
      # use the export parameters but do not override the ones
6837
      # specified by the user
6838
      for name, value in einfo.items(constants.INISECT_HYP):
6839
        if name not in self.op.hvparams:
6840
          self.op.hvparams[name] = value
6841

    
6842
    if einfo.has_section(constants.INISECT_BEP):
6843
      # use the parameters, without overriding
6844
      for name, value in einfo.items(constants.INISECT_BEP):
6845
        if name not in self.op.beparams:
6846
          self.op.beparams[name] = value
6847
    else:
6848
      # try to read the parameters old style, from the main section
6849
      for name in constants.BES_PARAMETERS:
6850
        if (name not in self.op.beparams and
6851
            einfo.has_option(constants.INISECT_INS, name)):
6852
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6853

    
6854
    if einfo.has_section(constants.INISECT_OSP):
6855
      # use the parameters, without overriding
6856
      for name, value in einfo.items(constants.INISECT_OSP):
6857
        if name not in self.op.osparams:
6858
          self.op.osparams[name] = value
6859

    
6860
  def _RevertToDefaults(self, cluster):
6861
    """Revert the instance parameters to the default values.
6862

6863
    """
6864
    # hvparams
6865
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6866
    for name in self.op.hvparams.keys():
6867
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6868
        del self.op.hvparams[name]
6869
    # beparams
6870
    be_defs = cluster.SimpleFillBE({})
6871
    for name in self.op.beparams.keys():
6872
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6873
        del self.op.beparams[name]
6874
    # nic params
6875
    nic_defs = cluster.SimpleFillNIC({})
6876
    for nic in self.op.nics:
6877
      for name in constants.NICS_PARAMETERS:
6878
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6879
          del nic[name]
6880
    # osparams
6881
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6882
    for name in self.op.osparams.keys():
6883
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
6884
        del self.op.osparams[name]
6885

    
6886
  def CheckPrereq(self):
6887
    """Check prerequisites.
6888

6889
    """
6890
    if self.op.mode == constants.INSTANCE_IMPORT:
6891
      export_info = self._ReadExportInfo()
6892
      self._ReadExportParams(export_info)
6893

    
6894
    _CheckDiskTemplate(self.op.disk_template)
6895

    
6896
    if (not self.cfg.GetVGName() and
6897
        self.op.disk_template not in constants.DTS_NOT_LVM):
6898
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6899
                                 " instances", errors.ECODE_STATE)
6900

    
6901
    if self.op.hypervisor is None:
6902
      self.op.hypervisor = self.cfg.GetHypervisorType()
6903

    
6904
    cluster = self.cfg.GetClusterInfo()
6905
    enabled_hvs = cluster.enabled_hypervisors
6906
    if self.op.hypervisor not in enabled_hvs:
6907
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6908
                                 " cluster (%s)" % (self.op.hypervisor,
6909
                                  ",".join(enabled_hvs)),
6910
                                 errors.ECODE_STATE)
6911

    
6912
    # check hypervisor parameter syntax (locally)
6913
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6914
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6915
                                      self.op.hvparams)
6916
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6917
    hv_type.CheckParameterSyntax(filled_hvp)
6918
    self.hv_full = filled_hvp
6919
    # check that we don't specify global parameters on an instance
6920
    _CheckGlobalHvParams(self.op.hvparams)
6921

    
6922
    # fill and remember the beparams dict
6923
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6924
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
6925

    
6926
    # build os parameters
6927
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6928

    
6929
    # now that hvp/bep are in final format, let's reset to defaults,
6930
    # if told to do so
6931
    if self.op.identify_defaults:
6932
      self._RevertToDefaults(cluster)
6933

    
6934
    # NIC buildup
6935
    self.nics = []
6936
    for idx, nic in enumerate(self.op.nics):
6937
      nic_mode_req = nic.get("mode", None)
6938
      nic_mode = nic_mode_req
6939
      if nic_mode is None:
6940
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6941

    
6942
      # in routed mode, for the first nic, the default ip is 'auto'
6943
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6944
        default_ip_mode = constants.VALUE_AUTO
6945
      else:
6946
        default_ip_mode = constants.VALUE_NONE
6947

    
6948
      # ip validity checks
6949
      ip = nic.get("ip", default_ip_mode)
6950
      if ip is None or ip.lower() == constants.VALUE_NONE:
6951
        nic_ip = None
6952
      elif ip.lower() == constants.VALUE_AUTO:
6953
        if not self.op.name_check:
6954
          raise errors.OpPrereqError("IP address set to auto but name checks"
6955
                                     " have been skipped. Aborting.",
6956
                                     errors.ECODE_INVAL)
6957
        nic_ip = self.hostname1.ip
6958
      else:
6959
        if not netutils.IsValidIP4(ip):
6960
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6961
                                     " like a valid IP" % ip,
6962
                                     errors.ECODE_INVAL)
6963
        nic_ip = ip
6964

    
6965
      # TODO: check the ip address for uniqueness
6966
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6967
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6968
                                   errors.ECODE_INVAL)
6969

    
6970
      # MAC address verification
6971
      mac = nic.get("mac", constants.VALUE_AUTO)
6972
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6973
        mac = utils.NormalizeAndValidateMac(mac)
6974

    
6975
        try:
6976
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6977
        except errors.ReservationError:
6978
          raise errors.OpPrereqError("MAC address %s already in use"
6979
                                     " in cluster" % mac,
6980
                                     errors.ECODE_NOTUNIQUE)
6981

    
6982
      # bridge verification
6983
      bridge = nic.get("bridge", None)
6984
      link = nic.get("link", None)
6985
      if bridge and link:
6986
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6987
                                   " at the same time", errors.ECODE_INVAL)
6988
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6989
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6990
                                   errors.ECODE_INVAL)
6991
      elif bridge:
6992
        link = bridge
6993

    
6994
      nicparams = {}
6995
      if nic_mode_req:
6996
        nicparams[constants.NIC_MODE] = nic_mode_req
6997
      if link:
6998
        nicparams[constants.NIC_LINK] = link
6999

    
7000
      check_params = cluster.SimpleFillNIC(nicparams)
7001
      objects.NIC.CheckParameterSyntax(check_params)
7002
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7003

    
7004
    # disk checks/pre-build
7005
    self.disks = []
7006
    for disk in self.op.disks:
7007
      mode = disk.get("mode", constants.DISK_RDWR)
7008
      if mode not in constants.DISK_ACCESS_SET:
7009
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7010
                                   mode, errors.ECODE_INVAL)
7011
      size = disk.get("size", None)
7012
      if size is None:
7013
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7014
      try:
7015
        size = int(size)
7016
      except (TypeError, ValueError):
7017
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7018
                                   errors.ECODE_INVAL)
7019
      new_disk = {"size": size, "mode": mode}
7020
      if "adopt" in disk:
7021
        new_disk["adopt"] = disk["adopt"]
7022
      self.disks.append(new_disk)
7023

    
7024
    if self.op.mode == constants.INSTANCE_IMPORT:
7025

    
7026
      # Check that the new instance doesn't have less disks than the export
7027
      instance_disks = len(self.disks)
7028
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7029
      if instance_disks < export_disks:
7030
        raise errors.OpPrereqError("Not enough disks to import."
7031
                                   " (instance: %d, export: %d)" %
7032
                                   (instance_disks, export_disks),
7033
                                   errors.ECODE_INVAL)
7034

    
7035
      disk_images = []
7036
      for idx in range(export_disks):
7037
        option = 'disk%d_dump' % idx
7038
        if export_info.has_option(constants.INISECT_INS, option):
7039
          # FIXME: are the old os-es, disk sizes, etc. useful?
7040
          export_name = export_info.get(constants.INISECT_INS, option)
7041
          image = utils.PathJoin(self.op.src_path, export_name)
7042
          disk_images.append(image)
7043
        else:
7044
          disk_images.append(False)
7045

    
7046
      self.src_images = disk_images
7047

    
7048
      old_name = export_info.get(constants.INISECT_INS, 'name')
7049
      try:
7050
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7051
      except (TypeError, ValueError), err:
7052
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7053
                                   " an integer: %s" % str(err),
7054
                                   errors.ECODE_STATE)
7055
      if self.op.instance_name == old_name:
7056
        for idx, nic in enumerate(self.nics):
7057
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7058
            nic_mac_ini = 'nic%d_mac' % idx
7059
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7060

    
7061
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7062

    
7063
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7064
    if self.op.ip_check:
7065
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7066
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7067
                                   (self.check_ip, self.op.instance_name),
7068
                                   errors.ECODE_NOTUNIQUE)
7069

    
7070
    #### mac address generation
7071
    # By generating here the mac address both the allocator and the hooks get
7072
    # the real final mac address rather than the 'auto' or 'generate' value.
7073
    # There is a race condition between the generation and the instance object
7074
    # creation, which means that we know the mac is valid now, but we're not
7075
    # sure it will be when we actually add the instance. If things go bad
7076
    # adding the instance will abort because of a duplicate mac, and the
7077
    # creation job will fail.
7078
    for nic in self.nics:
7079
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7080
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7081

    
7082
    #### allocator run
7083

    
7084
    if self.op.iallocator is not None:
7085
      self._RunAllocator()
7086

    
7087
    #### node related checks
7088

    
7089
    # check primary node
7090
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7091
    assert self.pnode is not None, \
7092
      "Cannot retrieve locked node %s" % self.op.pnode
7093
    if pnode.offline:
7094
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7095
                                 pnode.name, errors.ECODE_STATE)
7096
    if pnode.drained:
7097
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7098
                                 pnode.name, errors.ECODE_STATE)
7099

    
7100
    self.secondaries = []
7101

    
7102
    # mirror node verification
7103
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7104
      if self.op.snode is None:
7105
        raise errors.OpPrereqError("The networked disk templates need"
7106
                                   " a mirror node", errors.ECODE_INVAL)
7107
      if self.op.snode == pnode.name:
7108
        raise errors.OpPrereqError("The secondary node cannot be the"
7109
                                   " primary node.", errors.ECODE_INVAL)
7110
      _CheckNodeOnline(self, self.op.snode)
7111
      _CheckNodeNotDrained(self, self.op.snode)
7112
      self.secondaries.append(self.op.snode)
7113

    
7114
    nodenames = [pnode.name] + self.secondaries
7115

    
7116
    req_size = _ComputeDiskSize(self.op.disk_template,
7117
                                self.disks)
7118

    
7119
    # Check lv size requirements, if not adopting
7120
    if req_size is not None and not self.adopt_disks:
7121
      _CheckNodesFreeDisk(self, nodenames, req_size)
7122

    
7123
    if self.adopt_disks: # instead, we must check the adoption data
7124
      all_lvs = set([i["adopt"] for i in self.disks])
7125
      if len(all_lvs) != len(self.disks):
7126
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7127
                                   errors.ECODE_INVAL)
7128
      for lv_name in all_lvs:
7129
        try:
7130
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7131
        except errors.ReservationError:
7132
          raise errors.OpPrereqError("LV named %s used by another instance" %
7133
                                     lv_name, errors.ECODE_NOTUNIQUE)
7134

    
7135
      node_lvs = self.rpc.call_lv_list([pnode.name],
7136
                                       self.cfg.GetVGName())[pnode.name]
7137
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7138
      node_lvs = node_lvs.payload
7139
      delta = all_lvs.difference(node_lvs.keys())
7140
      if delta:
7141
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7142
                                   utils.CommaJoin(delta),
7143
                                   errors.ECODE_INVAL)
7144
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7145
      if online_lvs:
7146
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7147
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7148
                                   errors.ECODE_STATE)
7149
      # update the size of disk based on what is found
7150
      for dsk in self.disks:
7151
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7152

    
7153
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7154

    
7155
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7156
    # check OS parameters (remotely)
7157
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7158

    
7159
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7160

    
7161
    # memory check on primary node
7162
    if self.op.start:
7163
      _CheckNodeFreeMemory(self, self.pnode.name,
7164
                           "creating instance %s" % self.op.instance_name,
7165
                           self.be_full[constants.BE_MEMORY],
7166
                           self.op.hypervisor)
7167

    
7168
    self.dry_run_result = list(nodenames)
7169

    
7170
  def Exec(self, feedback_fn):
7171
    """Create and add the instance to the cluster.
7172

7173
    """
7174
    instance = self.op.instance_name
7175
    pnode_name = self.pnode.name
7176

    
7177
    ht_kind = self.op.hypervisor
7178
    if ht_kind in constants.HTS_REQ_PORT:
7179
      network_port = self.cfg.AllocatePort()
7180
    else:
7181
      network_port = None
7182

    
7183
    if constants.ENABLE_FILE_STORAGE:
7184
      # this is needed because os.path.join does not accept None arguments
7185
      if self.op.file_storage_dir is None:
7186
        string_file_storage_dir = ""
7187
      else:
7188
        string_file_storage_dir = self.op.file_storage_dir
7189

    
7190
      # build the full file storage dir path
7191
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7192
                                        string_file_storage_dir, instance)
7193
    else:
7194
      file_storage_dir = ""
7195

    
7196
    disks = _GenerateDiskTemplate(self,
7197
                                  self.op.disk_template,
7198
                                  instance, pnode_name,
7199
                                  self.secondaries,
7200
                                  self.disks,
7201
                                  file_storage_dir,
7202
                                  self.op.file_driver,
7203
                                  0)
7204

    
7205
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7206
                            primary_node=pnode_name,
7207
                            nics=self.nics, disks=disks,
7208
                            disk_template=self.op.disk_template,
7209
                            admin_up=False,
7210
                            network_port=network_port,
7211
                            beparams=self.op.beparams,
7212
                            hvparams=self.op.hvparams,
7213
                            hypervisor=self.op.hypervisor,
7214
                            osparams=self.op.osparams,
7215
                            )
7216

    
7217
    if self.adopt_disks:
7218
      # rename LVs to the newly-generated names; we need to construct
7219
      # 'fake' LV disks with the old data, plus the new unique_id
7220
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7221
      rename_to = []
7222
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7223
        rename_to.append(t_dsk.logical_id)
7224
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7225
        self.cfg.SetDiskID(t_dsk, pnode_name)
7226
      result = self.rpc.call_blockdev_rename(pnode_name,
7227
                                             zip(tmp_disks, rename_to))
7228
      result.Raise("Failed to rename adoped LVs")
7229
    else:
7230
      feedback_fn("* creating instance disks...")
7231
      try:
7232
        _CreateDisks(self, iobj)
7233
      except errors.OpExecError:
7234
        self.LogWarning("Device creation failed, reverting...")
7235
        try:
7236
          _RemoveDisks(self, iobj)
7237
        finally:
7238
          self.cfg.ReleaseDRBDMinors(instance)
7239
          raise
7240

    
7241
    feedback_fn("adding instance %s to cluster config" % instance)
7242

    
7243
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7244

    
7245
    # Declare that we don't want to remove the instance lock anymore, as we've
7246
    # added the instance to the config
7247
    del self.remove_locks[locking.LEVEL_INSTANCE]
7248
    # Unlock all the nodes
7249
    if self.op.mode == constants.INSTANCE_IMPORT:
7250
      nodes_keep = [self.op.src_node]
7251
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7252
                       if node != self.op.src_node]
7253
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7254
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7255
    else:
7256
      self.context.glm.release(locking.LEVEL_NODE)
7257
      del self.acquired_locks[locking.LEVEL_NODE]
7258

    
7259
    if self.op.wait_for_sync:
7260
      disk_abort = not _WaitForSync(self, iobj)
7261
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7262
      # make sure the disks are not degraded (still sync-ing is ok)
7263
      time.sleep(15)
7264
      feedback_fn("* checking mirrors status")
7265
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7266
    else:
7267
      disk_abort = False
7268

    
7269
    if disk_abort:
7270
      _RemoveDisks(self, iobj)
7271
      self.cfg.RemoveInstance(iobj.name)
7272
      # Make sure the instance lock gets removed
7273
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7274
      raise errors.OpExecError("There are some degraded disks for"
7275
                               " this instance")
7276

    
7277
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7278
      if self.op.mode == constants.INSTANCE_CREATE:
7279
        if not self.op.no_install:
7280
          feedback_fn("* running the instance OS create scripts...")
7281
          # FIXME: pass debug option from opcode to backend
7282
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7283
                                                 self.op.debug_level)
7284
          result.Raise("Could not add os for instance %s"
7285
                       " on node %s" % (instance, pnode_name))
7286

    
7287
      elif self.op.mode == constants.INSTANCE_IMPORT:
7288
        feedback_fn("* running the instance OS import scripts...")
7289

    
7290
        transfers = []
7291

    
7292
        for idx, image in enumerate(self.src_images):
7293
          if not image:
7294
            continue
7295

    
7296
          # FIXME: pass debug option from opcode to backend
7297
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7298
                                             constants.IEIO_FILE, (image, ),
7299
                                             constants.IEIO_SCRIPT,
7300
                                             (iobj.disks[idx], idx),
7301
                                             None)
7302
          transfers.append(dt)
7303

    
7304
        import_result = \
7305
          masterd.instance.TransferInstanceData(self, feedback_fn,
7306
                                                self.op.src_node, pnode_name,
7307
                                                self.pnode.secondary_ip,
7308
                                                iobj, transfers)
7309
        if not compat.all(import_result):
7310
          self.LogWarning("Some disks for instance %s on node %s were not"
7311
                          " imported successfully" % (instance, pnode_name))
7312

    
7313
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7314
        feedback_fn("* preparing remote import...")
7315
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
7316
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7317

    
7318
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7319
                                                     self.source_x509_ca,
7320
                                                     self._cds, timeouts)
7321
        if not compat.all(disk_results):
7322
          # TODO: Should the instance still be started, even if some disks
7323
          # failed to import (valid for local imports, too)?
7324
          self.LogWarning("Some disks for instance %s on node %s were not"
7325
                          " imported successfully" % (instance, pnode_name))
7326

    
7327
        # Run rename script on newly imported instance
7328
        assert iobj.name == instance
7329
        feedback_fn("Running rename script for %s" % instance)
7330
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7331
                                                   self.source_instance_name,
7332
                                                   self.op.debug_level)
7333
        if result.fail_msg:
7334
          self.LogWarning("Failed to run rename script for %s on node"
7335
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7336

    
7337
      else:
7338
        # also checked in the prereq part
7339
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7340
                                     % self.op.mode)
7341

    
7342
    if self.op.start:
7343
      iobj.admin_up = True
7344
      self.cfg.Update(iobj, feedback_fn)
7345
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7346
      feedback_fn("* starting instance...")
7347
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7348
      result.Raise("Could not start instance")
7349

    
7350
    return list(iobj.all_nodes)
7351

    
7352

    
7353
class LUConnectConsole(NoHooksLU):
7354
  """Connect to an instance's console.
7355

7356
  This is somewhat special in that it returns the command line that
7357
  you need to run on the master node in order to connect to the
7358
  console.
7359

7360
  """
7361
  _OP_PARAMS = [
7362
    _PInstanceName
7363
    ]
7364
  REQ_BGL = False
7365

    
7366
  def ExpandNames(self):
7367
    self._ExpandAndLockInstance()
7368

    
7369
  def CheckPrereq(self):
7370
    """Check prerequisites.
7371

7372
    This checks that the instance is in the cluster.
7373

7374
    """
7375
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7376
    assert self.instance is not None, \
7377
      "Cannot retrieve locked instance %s" % self.op.instance_name
7378
    _CheckNodeOnline(self, self.instance.primary_node)
7379

    
7380
  def Exec(self, feedback_fn):
7381
    """Connect to the console of an instance
7382

7383
    """
7384
    instance = self.instance
7385
    node = instance.primary_node
7386

    
7387
    node_insts = self.rpc.call_instance_list([node],
7388
                                             [instance.hypervisor])[node]
7389
    node_insts.Raise("Can't get node information from %s" % node)
7390

    
7391
    if instance.name not in node_insts.payload:
7392
      raise errors.OpExecError("Instance %s is not running." % instance.name)
7393

    
7394
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7395

    
7396
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7397
    cluster = self.cfg.GetClusterInfo()
7398
    # beparams and hvparams are passed separately, to avoid editing the
7399
    # instance and then saving the defaults in the instance itself.
7400
    hvparams = cluster.FillHV(instance)
7401
    beparams = cluster.FillBE(instance)
7402
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7403

    
7404
    # build ssh cmdline
7405
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7406

    
7407

    
7408
class LUReplaceDisks(LogicalUnit):
7409
  """Replace the disks of an instance.
7410

7411
  """
7412
  HPATH = "mirrors-replace"
7413
  HTYPE = constants.HTYPE_INSTANCE
7414
  _OP_PARAMS = [
7415
    _PInstanceName,
7416
    ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7417
    ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7418
    ("remote_node", None, _TMaybeString),
7419
    ("iallocator", None, _TMaybeString),
7420
    ("early_release", False, _TBool),
7421
    ]
7422
  REQ_BGL = False
7423

    
7424
  def CheckArguments(self):
7425
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7426
                                  self.op.iallocator)
7427

    
7428
  def ExpandNames(self):
7429
    self._ExpandAndLockInstance()
7430

    
7431
    if self.op.iallocator is not None:
7432
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7433

    
7434
    elif self.op.remote_node is not None:
7435
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7436
      self.op.remote_node = remote_node
7437

    
7438
      # Warning: do not remove the locking of the new secondary here
7439
      # unless DRBD8.AddChildren is changed to work in parallel;
7440
      # currently it doesn't since parallel invocations of
7441
      # FindUnusedMinor will conflict
7442
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7443
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7444

    
7445
    else:
7446
      self.needed_locks[locking.LEVEL_NODE] = []
7447
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7448

    
7449
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7450
                                   self.op.iallocator, self.op.remote_node,
7451
                                   self.op.disks, False, self.op.early_release)
7452

    
7453
    self.tasklets = [self.replacer]
7454

    
7455
  def DeclareLocks(self, level):
7456
    # If we're not already locking all nodes in the set we have to declare the
7457
    # instance's primary/secondary nodes.
7458
    if (level == locking.LEVEL_NODE and
7459
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7460
      self._LockInstancesNodes()
7461

    
7462
  def BuildHooksEnv(self):
7463
    """Build hooks env.
7464

7465
    This runs on the master, the primary and all the secondaries.
7466

7467
    """
7468
    instance = self.replacer.instance
7469
    env = {
7470
      "MODE": self.op.mode,
7471
      "NEW_SECONDARY": self.op.remote_node,
7472
      "OLD_SECONDARY": instance.secondary_nodes[0],
7473
      }
7474
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7475
    nl = [
7476
      self.cfg.GetMasterNode(),
7477
      instance.primary_node,
7478
      ]
7479
    if self.op.remote_node is not None:
7480
      nl.append(self.op.remote_node)
7481
    return env, nl, nl
7482

    
7483

    
7484
class TLReplaceDisks(Tasklet):
7485
  """Replaces disks for an instance.
7486

7487
  Note: Locking is not within the scope of this class.
7488

7489
  """
7490
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7491
               disks, delay_iallocator, early_release):
7492
    """Initializes this class.
7493

7494
    """
7495
    Tasklet.__init__(self, lu)
7496

    
7497
    # Parameters
7498
    self.instance_name = instance_name
7499
    self.mode = mode
7500
    self.iallocator_name = iallocator_name
7501
    self.remote_node = remote_node
7502
    self.disks = disks
7503
    self.delay_iallocator = delay_iallocator
7504
    self.early_release = early_release
7505

    
7506
    # Runtime data
7507
    self.instance = None
7508
    self.new_node = None
7509
    self.target_node = None
7510
    self.other_node = None
7511
    self.remote_node_info = None
7512
    self.node_secondary_ip = None
7513

    
7514
  @staticmethod
7515
  def CheckArguments(mode, remote_node, iallocator):
7516
    """Helper function for users of this class.
7517

7518
    """
7519
    # check for valid parameter combination
7520
    if mode == constants.REPLACE_DISK_CHG:
7521
      if remote_node is None and iallocator is None:
7522
        raise errors.OpPrereqError("When changing the secondary either an"
7523
                                   " iallocator script must be used or the"
7524
                                   " new node given", errors.ECODE_INVAL)
7525

    
7526
      if remote_node is not None and iallocator is not None:
7527
        raise errors.OpPrereqError("Give either the iallocator or the new"
7528
                                   " secondary, not both", errors.ECODE_INVAL)
7529

    
7530
    elif remote_node is not None or iallocator is not None:
7531
      # Not replacing the secondary
7532
      raise errors.OpPrereqError("The iallocator and new node options can"
7533
                                 " only be used when changing the"
7534
                                 " secondary node", errors.ECODE_INVAL)
7535

    
7536
  @staticmethod
7537
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7538
    """Compute a new secondary node using an IAllocator.
7539

7540
    """
7541
    ial = IAllocator(lu.cfg, lu.rpc,
7542
                     mode=constants.IALLOCATOR_MODE_RELOC,
7543
                     name=instance_name,
7544
                     relocate_from=relocate_from)
7545

    
7546
    ial.Run(iallocator_name)
7547

    
7548
    if not ial.success:
7549
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7550
                                 " %s" % (iallocator_name, ial.info),
7551
                                 errors.ECODE_NORES)
7552

    
7553
    if len(ial.result) != ial.required_nodes:
7554
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7555
                                 " of nodes (%s), required %s" %
7556
                                 (iallocator_name,
7557
                                  len(ial.result), ial.required_nodes),
7558
                                 errors.ECODE_FAULT)
7559

    
7560
    remote_node_name = ial.result[0]
7561

    
7562
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7563
               instance_name, remote_node_name)
7564

    
7565
    return remote_node_name
7566

    
7567
  def _FindFaultyDisks(self, node_name):
7568
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7569
                                    node_name, True)
7570

    
7571
  def CheckPrereq(self):
7572
    """Check prerequisites.
7573

7574
    This checks that the instance is in the cluster.
7575

7576
    """
7577
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7578
    assert instance is not None, \
7579
      "Cannot retrieve locked instance %s" % self.instance_name
7580

    
7581
    if instance.disk_template != constants.DT_DRBD8:
7582
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7583
                                 " instances", errors.ECODE_INVAL)
7584

    
7585
    if len(instance.secondary_nodes) != 1:
7586
      raise errors.OpPrereqError("The instance has a strange layout,"
7587
                                 " expected one secondary but found %d" %
7588
                                 len(instance.secondary_nodes),
7589
                                 errors.ECODE_FAULT)
7590

    
7591
    if not self.delay_iallocator:
7592
      self._CheckPrereq2()
7593

    
7594
  def _CheckPrereq2(self):
7595
    """Check prerequisites, second part.
7596

7597
    This function should always be part of CheckPrereq. It was separated and is
7598
    now called from Exec because during node evacuation iallocator was only
7599
    called with an unmodified cluster model, not taking planned changes into
7600
    account.
7601

7602
    """
7603
    instance = self.instance
7604
    secondary_node = instance.secondary_nodes[0]
7605

    
7606
    if self.iallocator_name is None:
7607
      remote_node = self.remote_node
7608
    else:
7609
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7610
                                       instance.name, instance.secondary_nodes)
7611

    
7612
    if remote_node is not None:
7613
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7614
      assert self.remote_node_info is not None, \
7615
        "Cannot retrieve locked node %s" % remote_node
7616
    else:
7617
      self.remote_node_info = None
7618

    
7619
    if remote_node == self.instance.primary_node:
7620
      raise errors.OpPrereqError("The specified node is the primary node of"
7621
                                 " the instance.", errors.ECODE_INVAL)
7622

    
7623
    if remote_node == secondary_node:
7624
      raise errors.OpPrereqError("The specified node is already the"
7625
                                 " secondary node of the instance.",
7626
                                 errors.ECODE_INVAL)
7627

    
7628
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7629
                                    constants.REPLACE_DISK_CHG):
7630
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7631
                                 errors.ECODE_INVAL)
7632

    
7633
    if self.mode == constants.REPLACE_DISK_AUTO:
7634
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7635
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7636

    
7637
      if faulty_primary and faulty_secondary:
7638
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7639
                                   " one node and can not be repaired"
7640
                                   " automatically" % self.instance_name,
7641
                                   errors.ECODE_STATE)
7642

    
7643
      if faulty_primary:
7644
        self.disks = faulty_primary
7645
        self.target_node = instance.primary_node
7646
        self.other_node = secondary_node
7647
        check_nodes = [self.target_node, self.other_node]
7648
      elif faulty_secondary:
7649
        self.disks = faulty_secondary
7650
        self.target_node = secondary_node
7651
        self.other_node = instance.primary_node
7652
        check_nodes = [self.target_node, self.other_node]
7653
      else:
7654
        self.disks = []
7655
        check_nodes = []
7656

    
7657
    else:
7658
      # Non-automatic modes
7659
      if self.mode == constants.REPLACE_DISK_PRI:
7660
        self.target_node = instance.primary_node
7661
        self.other_node = secondary_node
7662
        check_nodes = [self.target_node, self.other_node]
7663

    
7664
      elif self.mode == constants.REPLACE_DISK_SEC:
7665
        self.target_node = secondary_node
7666
        self.other_node = instance.primary_node
7667
        check_nodes = [self.target_node, self.other_node]
7668

    
7669
      elif self.mode == constants.REPLACE_DISK_CHG:
7670
        self.new_node = remote_node
7671
        self.other_node = instance.primary_node
7672
        self.target_node = secondary_node
7673
        check_nodes = [self.new_node, self.other_node]
7674

    
7675
        _CheckNodeNotDrained(self.lu, remote_node)
7676

    
7677
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7678
        assert old_node_info is not None
7679
        if old_node_info.offline and not self.early_release:
7680
          # doesn't make sense to delay the release
7681
          self.early_release = True
7682
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7683
                          " early-release mode", secondary_node)
7684

    
7685
      else:
7686
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7687
                                     self.mode)
7688

    
7689
      # If not specified all disks should be replaced
7690
      if not self.disks:
7691
        self.disks = range(len(self.instance.disks))
7692

    
7693
    for node in check_nodes:
7694
      _CheckNodeOnline(self.lu, node)
7695

    
7696
    # Check whether disks are valid
7697
    for disk_idx in self.disks:
7698
      instance.FindDisk(disk_idx)
7699

    
7700
    # Get secondary node IP addresses
7701
    node_2nd_ip = {}
7702

    
7703
    for node_name in [self.target_node, self.other_node, self.new_node]:
7704
      if node_name is not None:
7705
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7706

    
7707
    self.node_secondary_ip = node_2nd_ip
7708

    
7709
  def Exec(self, feedback_fn):
7710
    """Execute disk replacement.
7711

7712
    This dispatches the disk replacement to the appropriate handler.
7713

7714
    """
7715
    if self.delay_iallocator:
7716
      self._CheckPrereq2()
7717

    
7718
    if not self.disks:
7719
      feedback_fn("No disks need replacement")
7720
      return
7721

    
7722
    feedback_fn("Replacing disk(s) %s for %s" %
7723
                (utils.CommaJoin(self.disks), self.instance.name))
7724

    
7725
    activate_disks = (not self.instance.admin_up)
7726

    
7727
    # Activate the instance disks if we're replacing them on a down instance
7728
    if activate_disks:
7729
      _StartInstanceDisks(self.lu, self.instance, True)
7730

    
7731
    try:
7732
      # Should we replace the secondary node?
7733
      if self.new_node is not None:
7734
        fn = self._ExecDrbd8Secondary
7735
      else:
7736
        fn = self._ExecDrbd8DiskOnly
7737

    
7738
      return fn(feedback_fn)
7739

    
7740
    finally:
7741
      # Deactivate the instance disks if we're replacing them on a
7742
      # down instance
7743
      if activate_disks:
7744
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7745

    
7746
  def _CheckVolumeGroup(self, nodes):
7747
    self.lu.LogInfo("Checking volume groups")
7748

    
7749
    vgname = self.cfg.GetVGName()
7750

    
7751
    # Make sure volume group exists on all involved nodes
7752
    results = self.rpc.call_vg_list(nodes)
7753
    if not results:
7754
      raise errors.OpExecError("Can't list volume groups on the nodes")
7755

    
7756
    for node in nodes:
7757
      res = results[node]
7758
      res.Raise("Error checking node %s" % node)
7759
      if vgname not in res.payload:
7760
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7761
                                 (vgname, node))
7762

    
7763
  def _CheckDisksExistence(self, nodes):
7764
    # Check disk existence
7765
    for idx, dev in enumerate(self.instance.disks):
7766
      if idx not in self.disks:
7767
        continue
7768

    
7769
      for node in nodes:
7770
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7771
        self.cfg.SetDiskID(dev, node)
7772

    
7773
        result = self.rpc.call_blockdev_find(node, dev)
7774

    
7775
        msg = result.fail_msg
7776
        if msg or not result.payload:
7777
          if not msg:
7778
            msg = "disk not found"
7779
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7780
                                   (idx, node, msg))
7781

    
7782
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7783
    for idx, dev in enumerate(self.instance.disks):
7784
      if idx not in self.disks:
7785
        continue
7786

    
7787
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7788
                      (idx, node_name))
7789

    
7790
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7791
                                   ldisk=ldisk):
7792
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7793
                                 " replace disks for instance %s" %
7794
                                 (node_name, self.instance.name))
7795

    
7796
  def _CreateNewStorage(self, node_name):
7797
    vgname = self.cfg.GetVGName()
7798
    iv_names = {}
7799

    
7800
    for idx, dev in enumerate(self.instance.disks):
7801
      if idx not in self.disks:
7802
        continue
7803

    
7804
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7805

    
7806
      self.cfg.SetDiskID(dev, node_name)
7807

    
7808
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7809
      names = _GenerateUniqueNames(self.lu, lv_names)
7810

    
7811
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7812
                             logical_id=(vgname, names[0]))
7813
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7814
                             logical_id=(vgname, names[1]))
7815

    
7816
      new_lvs = [lv_data, lv_meta]
7817
      old_lvs = dev.children
7818
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7819

    
7820
      # we pass force_create=True to force the LVM creation
7821
      for new_lv in new_lvs:
7822
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7823
                        _GetInstanceInfoText(self.instance), False)
7824

    
7825
    return iv_names
7826

    
7827
  def _CheckDevices(self, node_name, iv_names):
7828
    for name, (dev, _, _) in iv_names.iteritems():
7829
      self.cfg.SetDiskID(dev, node_name)
7830

    
7831
      result = self.rpc.call_blockdev_find(node_name, dev)
7832

    
7833
      msg = result.fail_msg
7834
      if msg or not result.payload:
7835
        if not msg:
7836
          msg = "disk not found"
7837
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7838
                                 (name, msg))
7839

    
7840
      if result.payload.is_degraded:
7841
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7842

    
7843
  def _RemoveOldStorage(self, node_name, iv_names):
7844
    for name, (_, old_lvs, _) in iv_names.iteritems():
7845
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7846

    
7847
      for lv in old_lvs:
7848
        self.cfg.SetDiskID(lv, node_name)
7849

    
7850
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7851
        if msg:
7852
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7853
                             hint="remove unused LVs manually")
7854

    
7855
  def _ReleaseNodeLock(self, node_name):
7856
    """Releases the lock for a given node."""
7857
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7858

    
7859
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7860
    """Replace a disk on the primary or secondary for DRBD 8.
7861

7862
    The algorithm for replace is quite complicated:
7863

7864
      1. for each disk to be replaced:
7865

7866
        1. create new LVs on the target node with unique names
7867
        1. detach old LVs from the drbd device
7868
        1. rename old LVs to name_replaced.<time_t>
7869
        1. rename new LVs to old LVs
7870
        1. attach the new LVs (with the old names now) to the drbd device
7871

7872
      1. wait for sync across all devices
7873

7874
      1. for each modified disk:
7875

7876
        1. remove old LVs (which have the name name_replaces.<time_t>)
7877

7878
    Failures are not very well handled.
7879

7880
    """
7881
    steps_total = 6
7882

    
7883
    # Step: check device activation
7884
    self.lu.LogStep(1, steps_total, "Check device existence")
7885
    self._CheckDisksExistence([self.other_node, self.target_node])
7886
    self._CheckVolumeGroup([self.target_node, self.other_node])
7887

    
7888
    # Step: check other node consistency
7889
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7890
    self._CheckDisksConsistency(self.other_node,
7891
                                self.other_node == self.instance.primary_node,
7892
                                False)
7893

    
7894
    # Step: create new storage
7895
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7896
    iv_names = self._CreateNewStorage(self.target_node)
7897

    
7898
    # Step: for each lv, detach+rename*2+attach
7899
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7900
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7901
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7902

    
7903
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7904
                                                     old_lvs)
7905
      result.Raise("Can't detach drbd from local storage on node"
7906
                   " %s for device %s" % (self.target_node, dev.iv_name))
7907
      #dev.children = []
7908
      #cfg.Update(instance)
7909

    
7910
      # ok, we created the new LVs, so now we know we have the needed
7911
      # storage; as such, we proceed on the target node to rename
7912
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7913
      # using the assumption that logical_id == physical_id (which in
7914
      # turn is the unique_id on that node)
7915

    
7916
      # FIXME(iustin): use a better name for the replaced LVs
7917
      temp_suffix = int(time.time())
7918
      ren_fn = lambda d, suff: (d.physical_id[0],
7919
                                d.physical_id[1] + "_replaced-%s" % suff)
7920

    
7921
      # Build the rename list based on what LVs exist on the node
7922
      rename_old_to_new = []
7923
      for to_ren in old_lvs:
7924
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7925
        if not result.fail_msg and result.payload:
7926
          # device exists
7927
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7928

    
7929
      self.lu.LogInfo("Renaming the old LVs on the target node")
7930
      result = self.rpc.call_blockdev_rename(self.target_node,
7931
                                             rename_old_to_new)
7932
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7933

    
7934
      # Now we rename the new LVs to the old LVs
7935
      self.lu.LogInfo("Renaming the new LVs on the target node")
7936
      rename_new_to_old = [(new, old.physical_id)
7937
                           for old, new in zip(old_lvs, new_lvs)]
7938
      result = self.rpc.call_blockdev_rename(self.target_node,
7939
                                             rename_new_to_old)
7940
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7941

    
7942
      for old, new in zip(old_lvs, new_lvs):
7943
        new.logical_id = old.logical_id
7944
        self.cfg.SetDiskID(new, self.target_node)
7945

    
7946
      for disk in old_lvs:
7947
        disk.logical_id = ren_fn(disk, temp_suffix)
7948
        self.cfg.SetDiskID(disk, self.target_node)
7949

    
7950
      # Now that the new lvs have the old name, we can add them to the device
7951
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7952
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7953
                                                  new_lvs)
7954
      msg = result.fail_msg
7955
      if msg:
7956
        for new_lv in new_lvs:
7957
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7958
                                               new_lv).fail_msg
7959
          if msg2:
7960
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7961
                               hint=("cleanup manually the unused logical"
7962
                                     "volumes"))
7963
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7964

    
7965
      dev.children = new_lvs
7966

    
7967
      self.cfg.Update(self.instance, feedback_fn)
7968

    
7969
    cstep = 5
7970
    if self.early_release:
7971
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7972
      cstep += 1
7973
      self._RemoveOldStorage(self.target_node, iv_names)
7974
      # WARNING: we release both node locks here, do not do other RPCs
7975
      # than WaitForSync to the primary node
7976
      self._ReleaseNodeLock([self.target_node, self.other_node])
7977

    
7978
    # Wait for sync
7979
    # This can fail as the old devices are degraded and _WaitForSync
7980
    # does a combined result over all disks, so we don't check its return value
7981
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7982
    cstep += 1
7983
    _WaitForSync(self.lu, self.instance)
7984

    
7985
    # Check all devices manually
7986
    self._CheckDevices(self.instance.primary_node, iv_names)
7987

    
7988
    # Step: remove old storage
7989
    if not self.early_release:
7990
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7991
      cstep += 1
7992
      self._RemoveOldStorage(self.target_node, iv_names)
7993

    
7994
  def _ExecDrbd8Secondary(self, feedback_fn):
7995
    """Replace the secondary node for DRBD 8.
7996

7997
    The algorithm for replace is quite complicated:
7998
      - for all disks of the instance:
7999
        - create new LVs on the new node with same names
8000
        - shutdown the drbd device on the old secondary
8001
        - disconnect the drbd network on the primary
8002
        - create the drbd device on the new secondary
8003
        - network attach the drbd on the primary, using an artifice:
8004
          the drbd code for Attach() will connect to the network if it
8005
          finds a device which is connected to the good local disks but
8006
          not network enabled
8007
      - wait for sync across all devices
8008
      - remove all disks from the old secondary
8009

8010
    Failures are not very well handled.
8011

8012
    """
8013
    steps_total = 6
8014

    
8015
    # Step: check device activation
8016
    self.lu.LogStep(1, steps_total, "Check device existence")
8017
    self._CheckDisksExistence([self.instance.primary_node])
8018
    self._CheckVolumeGroup([self.instance.primary_node])
8019

    
8020
    # Step: check other node consistency
8021
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8022
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8023

    
8024
    # Step: create new storage
8025
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8026
    for idx, dev in enumerate(self.instance.disks):
8027
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8028
                      (self.new_node, idx))
8029
      # we pass force_create=True to force LVM creation
8030
      for new_lv in dev.children:
8031
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8032
                        _GetInstanceInfoText(self.instance), False)
8033

    
8034
    # Step 4: dbrd minors and drbd setups changes
8035
    # after this, we must manually remove the drbd minors on both the
8036
    # error and the success paths
8037
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8038
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8039
                                         for dev in self.instance.disks],
8040
                                        self.instance.name)
8041
    logging.debug("Allocated minors %r", minors)
8042

    
8043
    iv_names = {}
8044
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8045
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8046
                      (self.new_node, idx))
8047
      # create new devices on new_node; note that we create two IDs:
8048
      # one without port, so the drbd will be activated without
8049
      # networking information on the new node at this stage, and one
8050
      # with network, for the latter activation in step 4
8051
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8052
      if self.instance.primary_node == o_node1:
8053
        p_minor = o_minor1
8054
      else:
8055
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8056
        p_minor = o_minor2
8057

    
8058
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8059
                      p_minor, new_minor, o_secret)
8060
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8061
                    p_minor, new_minor, o_secret)
8062

    
8063
      iv_names[idx] = (dev, dev.children, new_net_id)
8064
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8065
                    new_net_id)
8066
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8067
                              logical_id=new_alone_id,
8068
                              children=dev.children,
8069
                              size=dev.size)
8070
      try:
8071
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8072
                              _GetInstanceInfoText(self.instance), False)
8073
      except errors.GenericError:
8074
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8075
        raise
8076

    
8077
    # We have new devices, shutdown the drbd on the old secondary
8078
    for idx, dev in enumerate(self.instance.disks):
8079
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8080
      self.cfg.SetDiskID(dev, self.target_node)
8081
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8082
      if msg:
8083
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8084
                           "node: %s" % (idx, msg),
8085
                           hint=("Please cleanup this device manually as"
8086
                                 " soon as possible"))
8087

    
8088
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8089
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8090
                                               self.node_secondary_ip,
8091
                                               self.instance.disks)\
8092
                                              [self.instance.primary_node]
8093

    
8094
    msg = result.fail_msg
8095
    if msg:
8096
      # detaches didn't succeed (unlikely)
8097
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8098
      raise errors.OpExecError("Can't detach the disks from the network on"
8099
                               " old node: %s" % (msg,))
8100

    
8101
    # if we managed to detach at least one, we update all the disks of
8102
    # the instance to point to the new secondary
8103
    self.lu.LogInfo("Updating instance configuration")
8104
    for dev, _, new_logical_id in iv_names.itervalues():
8105
      dev.logical_id = new_logical_id
8106
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8107

    
8108
    self.cfg.Update(self.instance, feedback_fn)
8109

    
8110
    # and now perform the drbd attach
8111
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8112
                    " (standalone => connected)")
8113
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8114
                                            self.new_node],
8115
                                           self.node_secondary_ip,
8116
                                           self.instance.disks,
8117
                                           self.instance.name,
8118
                                           False)
8119
    for to_node, to_result in result.items():
8120
      msg = to_result.fail_msg
8121
      if msg:
8122
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8123
                           to_node, msg,
8124
                           hint=("please do a gnt-instance info to see the"
8125
                                 " status of disks"))
8126
    cstep = 5
8127
    if self.early_release:
8128
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8129
      cstep += 1
8130
      self._RemoveOldStorage(self.target_node, iv_names)
8131
      # WARNING: we release all node locks here, do not do other RPCs
8132
      # than WaitForSync to the primary node
8133
      self._ReleaseNodeLock([self.instance.primary_node,
8134
                             self.target_node,
8135
                             self.new_node])
8136

    
8137
    # Wait for sync
8138
    # This can fail as the old devices are degraded and _WaitForSync
8139
    # does a combined result over all disks, so we don't check its return value
8140
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8141
    cstep += 1
8142
    _WaitForSync(self.lu, self.instance)
8143

    
8144
    # Check all devices manually
8145
    self._CheckDevices(self.instance.primary_node, iv_names)
8146

    
8147
    # Step: remove old storage
8148
    if not self.early_release:
8149
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8150
      self._RemoveOldStorage(self.target_node, iv_names)
8151

    
8152

    
8153
class LURepairNodeStorage(NoHooksLU):
8154
  """Repairs the volume group on a node.
8155

8156
  """
8157
  _OP_PARAMS = [
8158
    _PNodeName,
8159
    ("storage_type", _NoDefault, _CheckStorageType),
8160
    ("name", _NoDefault, _TNonEmptyString),
8161
    ("ignore_consistency", False, _TBool),
8162
    ]
8163
  REQ_BGL = False
8164

    
8165
  def CheckArguments(self):
8166
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8167

    
8168
    storage_type = self.op.storage_type
8169

    
8170
    if (constants.SO_FIX_CONSISTENCY not in
8171
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8172
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8173
                                 " repaired" % storage_type,
8174
                                 errors.ECODE_INVAL)
8175

    
8176
  def ExpandNames(self):
8177
    self.needed_locks = {
8178
      locking.LEVEL_NODE: [self.op.node_name],
8179
      }
8180

    
8181
  def _CheckFaultyDisks(self, instance, node_name):
8182
    """Ensure faulty disks abort the opcode or at least warn."""
8183
    try:
8184
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8185
                                  node_name, True):
8186
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8187
                                   " node '%s'" % (instance.name, node_name),
8188
                                   errors.ECODE_STATE)
8189
    except errors.OpPrereqError, err:
8190
      if self.op.ignore_consistency:
8191
        self.proc.LogWarning(str(err.args[0]))
8192
      else:
8193
        raise
8194

    
8195
  def CheckPrereq(self):
8196
    """Check prerequisites.
8197

8198
    """
8199
    # Check whether any instance on this node has faulty disks
8200
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8201
      if not inst.admin_up:
8202
        continue
8203
      check_nodes = set(inst.all_nodes)
8204
      check_nodes.discard(self.op.node_name)
8205
      for inst_node_name in check_nodes:
8206
        self._CheckFaultyDisks(inst, inst_node_name)
8207

    
8208
  def Exec(self, feedback_fn):
8209
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8210
                (self.op.name, self.op.node_name))
8211

    
8212
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8213
    result = self.rpc.call_storage_execute(self.op.node_name,
8214
                                           self.op.storage_type, st_args,
8215
                                           self.op.name,
8216
                                           constants.SO_FIX_CONSISTENCY)
8217
    result.Raise("Failed to repair storage unit '%s' on %s" %
8218
                 (self.op.name, self.op.node_name))
8219

    
8220

    
8221
class LUNodeEvacuationStrategy(NoHooksLU):
8222
  """Computes the node evacuation strategy.
8223

8224
  """
8225
  _OP_PARAMS = [
8226
    ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8227
    ("remote_node", None, _TMaybeString),
8228
    ("iallocator", None, _TMaybeString),
8229
    ]
8230
  REQ_BGL = False
8231

    
8232
  def CheckArguments(self):
8233
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8234

    
8235
  def ExpandNames(self):
8236
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8237
    self.needed_locks = locks = {}
8238
    if self.op.remote_node is None:
8239
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8240
    else:
8241
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8242
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8243

    
8244
  def Exec(self, feedback_fn):
8245
    if self.op.remote_node is not None:
8246
      instances = []
8247
      for node in self.op.nodes:
8248
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8249
      result = []
8250
      for i in instances:
8251
        if i.primary_node == self.op.remote_node:
8252
          raise errors.OpPrereqError("Node %s is the primary node of"
8253
                                     " instance %s, cannot use it as"
8254
                                     " secondary" %
8255
                                     (self.op.remote_node, i.name),
8256
                                     errors.ECODE_INVAL)
8257
        result.append([i.name, self.op.remote_node])
8258
    else:
8259
      ial = IAllocator(self.cfg, self.rpc,
8260
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8261
                       evac_nodes=self.op.nodes)
8262
      ial.Run(self.op.iallocator, validate=True)
8263
      if not ial.success:
8264
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8265
                                 errors.ECODE_NORES)
8266
      result = ial.result
8267
    return result
8268

    
8269

    
8270
class LUGrowDisk(LogicalUnit):
8271
  """Grow a disk of an instance.
8272

8273
  """
8274
  HPATH = "disk-grow"
8275
  HTYPE = constants.HTYPE_INSTANCE
8276
  _OP_PARAMS = [
8277
    _PInstanceName,
8278
    ("disk", _NoDefault, _TInt),
8279
    ("amount", _NoDefault, _TInt),
8280
    ("wait_for_sync", True, _TBool),
8281
    ]
8282
  REQ_BGL = False
8283

    
8284
  def ExpandNames(self):
8285
    self._ExpandAndLockInstance()
8286
    self.needed_locks[locking.LEVEL_NODE] = []
8287
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8288

    
8289
  def DeclareLocks(self, level):
8290
    if level == locking.LEVEL_NODE:
8291
      self._LockInstancesNodes()
8292

    
8293
  def BuildHooksEnv(self):
8294
    """Build hooks env.
8295

8296
    This runs on the master, the primary and all the secondaries.
8297

8298
    """
8299
    env = {
8300
      "DISK": self.op.disk,
8301
      "AMOUNT": self.op.amount,
8302
      }
8303
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8304
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8305
    return env, nl, nl
8306

    
8307
  def CheckPrereq(self):
8308
    """Check prerequisites.
8309

8310
    This checks that the instance is in the cluster.
8311

8312
    """
8313
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8314
    assert instance is not None, \
8315
      "Cannot retrieve locked instance %s" % self.op.instance_name
8316
    nodenames = list(instance.all_nodes)
8317
    for node in nodenames:
8318
      _CheckNodeOnline(self, node)
8319

    
8320
    self.instance = instance
8321

    
8322
    if instance.disk_template not in constants.DTS_GROWABLE:
8323
      raise errors.OpPrereqError("Instance's disk layout does not support"
8324
                                 " growing.", errors.ECODE_INVAL)
8325

    
8326
    self.disk = instance.FindDisk(self.op.disk)
8327

    
8328
    if instance.disk_template != constants.DT_FILE:
8329
      # TODO: check the free disk space for file, when that feature will be
8330
      # supported
8331
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8332

    
8333
  def Exec(self, feedback_fn):
8334
    """Execute disk grow.
8335

8336
    """
8337
    instance = self.instance
8338
    disk = self.disk
8339

    
8340
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8341
    if not disks_ok:
8342
      raise errors.OpExecError("Cannot activate block device to grow")
8343

    
8344
    for node in instance.all_nodes:
8345
      self.cfg.SetDiskID(disk, node)
8346
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8347
      result.Raise("Grow request failed to node %s" % node)
8348

    
8349
      # TODO: Rewrite code to work properly
8350
      # DRBD goes into sync mode for a short amount of time after executing the
8351
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8352
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8353
      # time is a work-around.
8354
      time.sleep(5)
8355

    
8356
    disk.RecordGrow(self.op.amount)
8357
    self.cfg.Update(instance, feedback_fn)
8358
    if self.op.wait_for_sync:
8359
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8360
      if disk_abort:
8361
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8362
                             " status.\nPlease check the instance.")
8363
      if not instance.admin_up:
8364
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8365
    elif not instance.admin_up:
8366
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8367
                           " not supposed to be running because no wait for"
8368
                           " sync mode was requested.")
8369

    
8370

    
8371
class LUQueryInstanceData(NoHooksLU):
8372
  """Query runtime instance data.
8373

8374
  """
8375
  _OP_PARAMS = [
8376
    ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8377
    ("static", False, _TBool),
8378
    ]
8379
  REQ_BGL = False
8380

    
8381
  def ExpandNames(self):
8382
    self.needed_locks = {}
8383
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8384

    
8385
    if self.op.instances:
8386
      self.wanted_names = []
8387
      for name in self.op.instances:
8388
        full_name = _ExpandInstanceName(self.cfg, name)
8389
        self.wanted_names.append(full_name)
8390
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8391
    else:
8392
      self.wanted_names = None
8393
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8394

    
8395
    self.needed_locks[locking.LEVEL_NODE] = []
8396
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8397

    
8398
  def DeclareLocks(self, level):
8399
    if level == locking.LEVEL_NODE:
8400
      self._LockInstancesNodes()
8401

    
8402
  def CheckPrereq(self):
8403
    """Check prerequisites.
8404

8405
    This only checks the optional instance list against the existing names.
8406

8407
    """
8408
    if self.wanted_names is None:
8409
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8410

    
8411
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8412
                             in self.wanted_names]
8413

    
8414
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8415
    """Returns the status of a block device
8416

8417
    """
8418
    if self.op.static or not node:
8419
      return None
8420

    
8421
    self.cfg.SetDiskID(dev, node)
8422

    
8423
    result = self.rpc.call_blockdev_find(node, dev)
8424
    if result.offline:
8425
      return None
8426

    
8427
    result.Raise("Can't compute disk status for %s" % instance_name)
8428

    
8429
    status = result.payload
8430
    if status is None:
8431
      return None
8432

    
8433
    return (status.dev_path, status.major, status.minor,
8434
            status.sync_percent, status.estimated_time,
8435
            status.is_degraded, status.ldisk_status)
8436

    
8437
  def _ComputeDiskStatus(self, instance, snode, dev):
8438
    """Compute block device status.
8439

8440
    """
8441
    if dev.dev_type in constants.LDS_DRBD:
8442
      # we change the snode then (otherwise we use the one passed in)
8443
      if dev.logical_id[0] == instance.primary_node:
8444
        snode = dev.logical_id[1]
8445
      else:
8446
        snode = dev.logical_id[0]
8447

    
8448
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8449
                                              instance.name, dev)
8450
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8451

    
8452
    if dev.children:
8453
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8454
                      for child in dev.children]
8455
    else:
8456
      dev_children = []
8457

    
8458
    data = {
8459
      "iv_name": dev.iv_name,
8460
      "dev_type": dev.dev_type,
8461
      "logical_id": dev.logical_id,
8462
      "physical_id": dev.physical_id,
8463
      "pstatus": dev_pstatus,
8464
      "sstatus": dev_sstatus,
8465
      "children": dev_children,
8466
      "mode": dev.mode,
8467
      "size": dev.size,
8468
      }
8469

    
8470
    return data
8471

    
8472
  def Exec(self, feedback_fn):
8473
    """Gather and return data"""
8474
    result = {}
8475

    
8476
    cluster = self.cfg.GetClusterInfo()
8477

    
8478
    for instance in self.wanted_instances:
8479
      if not self.op.static:
8480
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8481
                                                  instance.name,
8482
                                                  instance.hypervisor)
8483
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8484
        remote_info = remote_info.payload
8485
        if remote_info and "state" in remote_info:
8486
          remote_state = "up"
8487
        else:
8488
          remote_state = "down"
8489
      else:
8490
        remote_state = None
8491
      if instance.admin_up:
8492
        config_state = "up"
8493
      else:
8494
        config_state = "down"
8495

    
8496
      disks = [self._ComputeDiskStatus(instance, None, device)
8497
               for device in instance.disks]
8498

    
8499
      idict = {
8500
        "name": instance.name,
8501
        "config_state": config_state,
8502
        "run_state": remote_state,
8503
        "pnode": instance.primary_node,
8504
        "snodes": instance.secondary_nodes,
8505
        "os": instance.os,
8506
        # this happens to be the same format used for hooks
8507
        "nics": _NICListToTuple(self, instance.nics),
8508
        "disk_template": instance.disk_template,
8509
        "disks": disks,
8510
        "hypervisor": instance.hypervisor,
8511
        "network_port": instance.network_port,
8512
        "hv_instance": instance.hvparams,
8513
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8514
        "be_instance": instance.beparams,
8515
        "be_actual": cluster.FillBE(instance),
8516
        "os_instance": instance.osparams,
8517
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8518
        "serial_no": instance.serial_no,
8519
        "mtime": instance.mtime,
8520
        "ctime": instance.ctime,
8521
        "uuid": instance.uuid,
8522
        }
8523

    
8524
      result[instance.name] = idict
8525

    
8526
    return result
8527

    
8528

    
8529
class LUSetInstanceParams(LogicalUnit):
8530
  """Modifies an instances's parameters.
8531

8532
  """
8533
  HPATH = "instance-modify"
8534
  HTYPE = constants.HTYPE_INSTANCE
8535
  _OP_PARAMS = [
8536
    _PInstanceName,
8537
    ("nics", _EmptyList, _TList),
8538
    ("disks", _EmptyList, _TList),
8539
    ("beparams", _EmptyDict, _TDict),
8540
    ("hvparams", _EmptyDict, _TDict),
8541
    ("disk_template", None, _TMaybeString),
8542
    ("remote_node", None, _TMaybeString),
8543
    ("os_name", None, _TMaybeString),
8544
    ("force_variant", False, _TBool),
8545
    ("osparams", None, _TOr(_TDict, _TNone)),
8546
    _PForce,
8547
    ]
8548
  REQ_BGL = False
8549

    
8550
  def CheckArguments(self):
8551
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8552
            self.op.hvparams or self.op.beparams or self.op.os_name):
8553
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8554

    
8555
    if self.op.hvparams:
8556
      _CheckGlobalHvParams(self.op.hvparams)
8557

    
8558
    # Disk validation
8559
    disk_addremove = 0
8560
    for disk_op, disk_dict in self.op.disks:
8561
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8562
      if disk_op == constants.DDM_REMOVE:
8563
        disk_addremove += 1
8564
        continue
8565
      elif disk_op == constants.DDM_ADD:
8566
        disk_addremove += 1
8567
      else:
8568
        if not isinstance(disk_op, int):
8569
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8570
        if not isinstance(disk_dict, dict):
8571
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8572
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8573

    
8574
      if disk_op == constants.DDM_ADD:
8575
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8576
        if mode not in constants.DISK_ACCESS_SET:
8577
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8578
                                     errors.ECODE_INVAL)
8579
        size = disk_dict.get('size', None)
8580
        if size is None:
8581
          raise errors.OpPrereqError("Required disk parameter size missing",
8582
                                     errors.ECODE_INVAL)
8583
        try:
8584
          size = int(size)
8585
        except (TypeError, ValueError), err:
8586
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8587
                                     str(err), errors.ECODE_INVAL)
8588
        disk_dict['size'] = size
8589
      else:
8590
        # modification of disk
8591
        if 'size' in disk_dict:
8592
          raise errors.OpPrereqError("Disk size change not possible, use"
8593
                                     " grow-disk", errors.ECODE_INVAL)
8594

    
8595
    if disk_addremove > 1:
8596
      raise errors.OpPrereqError("Only one disk add or remove operation"
8597
                                 " supported at a time", errors.ECODE_INVAL)
8598

    
8599
    if self.op.disks and self.op.disk_template is not None:
8600
      raise errors.OpPrereqError("Disk template conversion and other disk"
8601
                                 " changes not supported at the same time",
8602
                                 errors.ECODE_INVAL)
8603

    
8604
    if self.op.disk_template:
8605
      _CheckDiskTemplate(self.op.disk_template)
8606
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8607
          self.op.remote_node is None):
8608
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8609
                                   " one requires specifying a secondary node",
8610
                                   errors.ECODE_INVAL)
8611

    
8612
    # NIC validation
8613
    nic_addremove = 0
8614
    for nic_op, nic_dict in self.op.nics:
8615
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8616
      if nic_op == constants.DDM_REMOVE:
8617
        nic_addremove += 1
8618
        continue
8619
      elif nic_op == constants.DDM_ADD:
8620
        nic_addremove += 1
8621
      else:
8622
        if not isinstance(nic_op, int):
8623
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8624
        if not isinstance(nic_dict, dict):
8625
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8626
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8627

    
8628
      # nic_dict should be a dict
8629
      nic_ip = nic_dict.get('ip', None)
8630
      if nic_ip is not None:
8631
        if nic_ip.lower() == constants.VALUE_NONE:
8632
          nic_dict['ip'] = None
8633
        else:
8634
          if not netutils.IsValidIP4(nic_ip):
8635
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8636
                                       errors.ECODE_INVAL)
8637

    
8638
      nic_bridge = nic_dict.get('bridge', None)
8639
      nic_link = nic_dict.get('link', None)
8640
      if nic_bridge and nic_link:
8641
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8642
                                   " at the same time", errors.ECODE_INVAL)
8643
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8644
        nic_dict['bridge'] = None
8645
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8646
        nic_dict['link'] = None
8647

    
8648
      if nic_op == constants.DDM_ADD:
8649
        nic_mac = nic_dict.get('mac', None)
8650
        if nic_mac is None:
8651
          nic_dict['mac'] = constants.VALUE_AUTO
8652

    
8653
      if 'mac' in nic_dict:
8654
        nic_mac = nic_dict['mac']
8655
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8656
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8657

    
8658
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8659
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8660
                                     " modifying an existing nic",
8661
                                     errors.ECODE_INVAL)
8662

    
8663
    if nic_addremove > 1:
8664
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8665
                                 " supported at a time", errors.ECODE_INVAL)
8666

    
8667
  def ExpandNames(self):
8668
    self._ExpandAndLockInstance()
8669
    self.needed_locks[locking.LEVEL_NODE] = []
8670
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8671

    
8672
  def DeclareLocks(self, level):
8673
    if level == locking.LEVEL_NODE:
8674
      self._LockInstancesNodes()
8675
      if self.op.disk_template and self.op.remote_node:
8676
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8677
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8678

    
8679
  def BuildHooksEnv(self):
8680
    """Build hooks env.
8681

8682
    This runs on the master, primary and secondaries.
8683

8684
    """
8685
    args = dict()
8686
    if constants.BE_MEMORY in self.be_new:
8687
      args['memory'] = self.be_new[constants.BE_MEMORY]
8688
    if constants.BE_VCPUS in self.be_new:
8689
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8690
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8691
    # information at all.
8692
    if self.op.nics:
8693
      args['nics'] = []
8694
      nic_override = dict(self.op.nics)
8695
      for idx, nic in enumerate(self.instance.nics):
8696
        if idx in nic_override:
8697
          this_nic_override = nic_override[idx]
8698
        else:
8699
          this_nic_override = {}
8700
        if 'ip' in this_nic_override:
8701
          ip = this_nic_override['ip']
8702
        else:
8703
          ip = nic.ip
8704
        if 'mac' in this_nic_override:
8705
          mac = this_nic_override['mac']
8706
        else:
8707
          mac = nic.mac
8708
        if idx in self.nic_pnew:
8709
          nicparams = self.nic_pnew[idx]
8710
        else:
8711
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8712
        mode = nicparams[constants.NIC_MODE]
8713
        link = nicparams[constants.NIC_LINK]
8714
        args['nics'].append((ip, mac, mode, link))
8715
      if constants.DDM_ADD in nic_override:
8716
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8717
        mac = nic_override[constants.DDM_ADD]['mac']
8718
        nicparams = self.nic_pnew[constants.DDM_ADD]
8719
        mode = nicparams[constants.NIC_MODE]
8720
        link = nicparams[constants.NIC_LINK]
8721
        args['nics'].append((ip, mac, mode, link))
8722
      elif constants.DDM_REMOVE in nic_override:
8723
        del args['nics'][-1]
8724

    
8725
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8726
    if self.op.disk_template:
8727
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8728
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8729
    return env, nl, nl
8730

    
8731
  def CheckPrereq(self):
8732
    """Check prerequisites.
8733

8734
    This only checks the instance list against the existing names.
8735

8736
    """
8737
    # checking the new params on the primary/secondary nodes
8738

    
8739
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8740
    cluster = self.cluster = self.cfg.GetClusterInfo()
8741
    assert self.instance is not None, \
8742
      "Cannot retrieve locked instance %s" % self.op.instance_name
8743
    pnode = instance.primary_node
8744
    nodelist = list(instance.all_nodes)
8745

    
8746
    # OS change
8747
    if self.op.os_name and not self.op.force:
8748
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8749
                      self.op.force_variant)
8750
      instance_os = self.op.os_name
8751
    else:
8752
      instance_os = instance.os
8753

    
8754
    if self.op.disk_template:
8755
      if instance.disk_template == self.op.disk_template:
8756
        raise errors.OpPrereqError("Instance already has disk template %s" %
8757
                                   instance.disk_template, errors.ECODE_INVAL)
8758

    
8759
      if (instance.disk_template,
8760
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8761
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8762
                                   " %s to %s" % (instance.disk_template,
8763
                                                  self.op.disk_template),
8764
                                   errors.ECODE_INVAL)
8765
      _CheckInstanceDown(self, instance, "cannot change disk template")
8766
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8767
        _CheckNodeOnline(self, self.op.remote_node)
8768
        _CheckNodeNotDrained(self, self.op.remote_node)
8769
        disks = [{"size": d.size} for d in instance.disks]
8770
        required = _ComputeDiskSize(self.op.disk_template, disks)
8771
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8772

    
8773
    # hvparams processing
8774
    if self.op.hvparams:
8775
      hv_type = instance.hypervisor
8776
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8777
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8778
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8779

    
8780
      # local check
8781
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8782
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8783
      self.hv_new = hv_new # the new actual values
8784
      self.hv_inst = i_hvdict # the new dict (without defaults)
8785
    else:
8786
      self.hv_new = self.hv_inst = {}
8787

    
8788
    # beparams processing
8789
    if self.op.beparams:
8790
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8791
                                   use_none=True)
8792
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8793
      be_new = cluster.SimpleFillBE(i_bedict)
8794
      self.be_new = be_new # the new actual values
8795
      self.be_inst = i_bedict # the new dict (without defaults)
8796
    else:
8797
      self.be_new = self.be_inst = {}
8798

    
8799
    # osparams processing
8800
    if self.op.osparams:
8801
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8802
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8803
      self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8804
      self.os_inst = i_osdict # the new dict (without defaults)
8805
    else:
8806
      self.os_new = self.os_inst = {}
8807

    
8808
    self.warn = []
8809

    
8810
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8811
      mem_check_list = [pnode]
8812
      if be_new[constants.BE_AUTO_BALANCE]:
8813
        # either we changed auto_balance to yes or it was from before
8814
        mem_check_list.extend(instance.secondary_nodes)
8815
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8816
                                                  instance.hypervisor)
8817
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8818
                                         instance.hypervisor)
8819
      pninfo = nodeinfo[pnode]
8820
      msg = pninfo.fail_msg
8821
      if msg:
8822
        # Assume the primary node is unreachable and go ahead
8823
        self.warn.append("Can't get info from primary node %s: %s" %
8824
                         (pnode,  msg))
8825
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8826
        self.warn.append("Node data from primary node %s doesn't contain"
8827
                         " free memory information" % pnode)
8828
      elif instance_info.fail_msg:
8829
        self.warn.append("Can't get instance runtime information: %s" %
8830
                        instance_info.fail_msg)
8831
      else:
8832
        if instance_info.payload:
8833
          current_mem = int(instance_info.payload['memory'])
8834
        else:
8835
          # Assume instance not running
8836
          # (there is a slight race condition here, but it's not very probable,
8837
          # and we have no other way to check)
8838
          current_mem = 0
8839
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8840
                    pninfo.payload['memory_free'])
8841
        if miss_mem > 0:
8842
          raise errors.OpPrereqError("This change will prevent the instance"
8843
                                     " from starting, due to %d MB of memory"
8844
                                     " missing on its primary node" % miss_mem,
8845
                                     errors.ECODE_NORES)
8846

    
8847
      if be_new[constants.BE_AUTO_BALANCE]:
8848
        for node, nres in nodeinfo.items():
8849
          if node not in instance.secondary_nodes:
8850
            continue
8851
          msg = nres.fail_msg
8852
          if msg:
8853
            self.warn.append("Can't get info from secondary node %s: %s" %
8854
                             (node, msg))
8855
          elif not isinstance(nres.payload.get('memory_free', None), int):
8856
            self.warn.append("Secondary node %s didn't return free"
8857
                             " memory information" % node)
8858
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8859
            self.warn.append("Not enough memory to failover instance to"
8860
                             " secondary node %s" % node)
8861

    
8862
    # NIC processing
8863
    self.nic_pnew = {}
8864
    self.nic_pinst = {}
8865
    for nic_op, nic_dict in self.op.nics:
8866
      if nic_op == constants.DDM_REMOVE:
8867
        if not instance.nics:
8868
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8869
                                     errors.ECODE_INVAL)
8870
        continue
8871
      if nic_op != constants.DDM_ADD:
8872
        # an existing nic
8873
        if not instance.nics:
8874
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8875
                                     " no NICs" % nic_op,
8876
                                     errors.ECODE_INVAL)
8877
        if nic_op < 0 or nic_op >= len(instance.nics):
8878
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8879
                                     " are 0 to %d" %
8880
                                     (nic_op, len(instance.nics) - 1),
8881
                                     errors.ECODE_INVAL)
8882
        old_nic_params = instance.nics[nic_op].nicparams
8883
        old_nic_ip = instance.nics[nic_op].ip
8884
      else:
8885
        old_nic_params = {}
8886
        old_nic_ip = None
8887

    
8888
      update_params_dict = dict([(key, nic_dict[key])
8889
                                 for key in constants.NICS_PARAMETERS
8890
                                 if key in nic_dict])
8891

    
8892
      if 'bridge' in nic_dict:
8893
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8894

    
8895
      new_nic_params = _GetUpdatedParams(old_nic_params,
8896
                                         update_params_dict)
8897
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8898
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8899
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8900
      self.nic_pinst[nic_op] = new_nic_params
8901
      self.nic_pnew[nic_op] = new_filled_nic_params
8902
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8903

    
8904
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8905
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8906
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8907
        if msg:
8908
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8909
          if self.op.force:
8910
            self.warn.append(msg)
8911
          else:
8912
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8913
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8914
        if 'ip' in nic_dict:
8915
          nic_ip = nic_dict['ip']
8916
        else:
8917
          nic_ip = old_nic_ip
8918
        if nic_ip is None:
8919
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8920
                                     ' on a routed nic', errors.ECODE_INVAL)
8921
      if 'mac' in nic_dict:
8922
        nic_mac = nic_dict['mac']
8923
        if nic_mac is None:
8924
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8925
                                     errors.ECODE_INVAL)
8926
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8927
          # otherwise generate the mac
8928
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8929
        else:
8930
          # or validate/reserve the current one
8931
          try:
8932
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8933
          except errors.ReservationError:
8934
            raise errors.OpPrereqError("MAC address %s already in use"
8935
                                       " in cluster" % nic_mac,
8936
                                       errors.ECODE_NOTUNIQUE)
8937

    
8938
    # DISK processing
8939
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8940
      raise errors.OpPrereqError("Disk operations not supported for"
8941
                                 " diskless instances",
8942
                                 errors.ECODE_INVAL)
8943
    for disk_op, _ in self.op.disks:
8944
      if disk_op == constants.DDM_REMOVE:
8945
        if len(instance.disks) == 1:
8946
          raise errors.OpPrereqError("Cannot remove the last disk of"
8947
                                     " an instance", errors.ECODE_INVAL)
8948
        _CheckInstanceDown(self, instance, "cannot remove disks")
8949

    
8950
      if (disk_op == constants.DDM_ADD and
8951
          len(instance.nics) >= constants.MAX_DISKS):
8952
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8953
                                   " add more" % constants.MAX_DISKS,
8954
                                   errors.ECODE_STATE)
8955
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8956
        # an existing disk
8957
        if disk_op < 0 or disk_op >= len(instance.disks):
8958
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8959
                                     " are 0 to %d" %
8960
                                     (disk_op, len(instance.disks)),
8961
                                     errors.ECODE_INVAL)
8962

    
8963
    return
8964

    
8965
  def _ConvertPlainToDrbd(self, feedback_fn):
8966
    """Converts an instance from plain to drbd.
8967

8968
    """
8969
    feedback_fn("Converting template to drbd")
8970
    instance = self.instance
8971
    pnode = instance.primary_node
8972
    snode = self.op.remote_node
8973

    
8974
    # create a fake disk info for _GenerateDiskTemplate
8975
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8976
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8977
                                      instance.name, pnode, [snode],
8978
                                      disk_info, None, None, 0)
8979
    info = _GetInstanceInfoText(instance)
8980
    feedback_fn("Creating aditional volumes...")
8981
    # first, create the missing data and meta devices
8982
    for disk in new_disks:
8983
      # unfortunately this is... not too nice
8984
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8985
                            info, True)
8986
      for child in disk.children:
8987
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8988
    # at this stage, all new LVs have been created, we can rename the
8989
    # old ones
8990
    feedback_fn("Renaming original volumes...")
8991
    rename_list = [(o, n.children[0].logical_id)
8992
                   for (o, n) in zip(instance.disks, new_disks)]
8993
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8994
    result.Raise("Failed to rename original LVs")
8995

    
8996
    feedback_fn("Initializing DRBD devices...")
8997
    # all child devices are in place, we can now create the DRBD devices
8998
    for disk in new_disks:
8999
      for node in [pnode, snode]:
9000
        f_create = node == pnode
9001
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9002

    
9003
    # at this point, the instance has been modified
9004
    instance.disk_template = constants.DT_DRBD8
9005
    instance.disks = new_disks
9006
    self.cfg.Update(instance, feedback_fn)
9007

    
9008
    # disks are created, waiting for sync
9009
    disk_abort = not _WaitForSync(self, instance)
9010
    if disk_abort:
9011
      raise errors.OpExecError("There are some degraded disks for"
9012
                               " this instance, please cleanup manually")
9013

    
9014
  def _ConvertDrbdToPlain(self, feedback_fn):
9015
    """Converts an instance from drbd to plain.
9016

9017
    """
9018
    instance = self.instance
9019
    assert len(instance.secondary_nodes) == 1
9020
    pnode = instance.primary_node
9021
    snode = instance.secondary_nodes[0]
9022
    feedback_fn("Converting template to plain")
9023

    
9024
    old_disks = instance.disks
9025
    new_disks = [d.children[0] for d in old_disks]
9026

    
9027
    # copy over size and mode
9028
    for parent, child in zip(old_disks, new_disks):
9029
      child.size = parent.size
9030
      child.mode = parent.mode
9031

    
9032
    # update instance structure
9033
    instance.disks = new_disks
9034
    instance.disk_template = constants.DT_PLAIN
9035
    self.cfg.Update(instance, feedback_fn)
9036

    
9037
    feedback_fn("Removing volumes on the secondary node...")
9038
    for disk in old_disks:
9039
      self.cfg.SetDiskID(disk, snode)
9040
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9041
      if msg:
9042
        self.LogWarning("Could not remove block device %s on node %s,"
9043
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9044

    
9045
    feedback_fn("Removing unneeded volumes on the primary node...")
9046
    for idx, disk in enumerate(old_disks):
9047
      meta = disk.children[1]
9048
      self.cfg.SetDiskID(meta, pnode)
9049
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9050
      if msg:
9051
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9052
                        " continuing anyway: %s", idx, pnode, msg)
9053

    
9054

    
9055
  def Exec(self, feedback_fn):
9056
    """Modifies an instance.
9057

9058
    All parameters take effect only at the next restart of the instance.
9059

9060
    """
9061
    # Process here the warnings from CheckPrereq, as we don't have a
9062
    # feedback_fn there.
9063
    for warn in self.warn:
9064
      feedback_fn("WARNING: %s" % warn)
9065

    
9066
    result = []
9067
    instance = self.instance
9068
    # disk changes
9069
    for disk_op, disk_dict in self.op.disks:
9070
      if disk_op == constants.DDM_REMOVE:
9071
        # remove the last disk
9072
        device = instance.disks.pop()
9073
        device_idx = len(instance.disks)
9074
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9075
          self.cfg.SetDiskID(disk, node)
9076
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9077
          if msg:
9078
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9079
                            " continuing anyway", device_idx, node, msg)
9080
        result.append(("disk/%d" % device_idx, "remove"))
9081
      elif disk_op == constants.DDM_ADD:
9082
        # add a new disk
9083
        if instance.disk_template == constants.DT_FILE:
9084
          file_driver, file_path = instance.disks[0].logical_id
9085
          file_path = os.path.dirname(file_path)
9086
        else:
9087
          file_driver = file_path = None
9088
        disk_idx_base = len(instance.disks)
9089
        new_disk = _GenerateDiskTemplate(self,
9090
                                         instance.disk_template,
9091
                                         instance.name, instance.primary_node,
9092
                                         instance.secondary_nodes,
9093
                                         [disk_dict],
9094
                                         file_path,
9095
                                         file_driver,
9096
                                         disk_idx_base)[0]
9097
        instance.disks.append(new_disk)
9098
        info = _GetInstanceInfoText(instance)
9099

    
9100
        logging.info("Creating volume %s for instance %s",
9101
                     new_disk.iv_name, instance.name)
9102
        # Note: this needs to be kept in sync with _CreateDisks
9103
        #HARDCODE
9104
        for node in instance.all_nodes:
9105
          f_create = node == instance.primary_node
9106
          try:
9107
            _CreateBlockDev(self, node, instance, new_disk,
9108
                            f_create, info, f_create)
9109
          except errors.OpExecError, err:
9110
            self.LogWarning("Failed to create volume %s (%s) on"
9111
                            " node %s: %s",
9112
                            new_disk.iv_name, new_disk, node, err)
9113
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9114
                       (new_disk.size, new_disk.mode)))
9115
      else:
9116
        # change a given disk
9117
        instance.disks[disk_op].mode = disk_dict['mode']
9118
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9119

    
9120
    if self.op.disk_template:
9121
      r_shut = _ShutdownInstanceDisks(self, instance)
9122
      if not r_shut:
9123
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9124
                                 " proceed with disk template conversion")
9125
      mode = (instance.disk_template, self.op.disk_template)
9126
      try:
9127
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9128
      except:
9129
        self.cfg.ReleaseDRBDMinors(instance.name)
9130
        raise
9131
      result.append(("disk_template", self.op.disk_template))
9132

    
9133
    # NIC changes
9134
    for nic_op, nic_dict in self.op.nics:
9135
      if nic_op == constants.DDM_REMOVE:
9136
        # remove the last nic
9137
        del instance.nics[-1]
9138
        result.append(("nic.%d" % len(instance.nics), "remove"))
9139
      elif nic_op == constants.DDM_ADD:
9140
        # mac and bridge should be set, by now
9141
        mac = nic_dict['mac']
9142
        ip = nic_dict.get('ip', None)
9143
        nicparams = self.nic_pinst[constants.DDM_ADD]
9144
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9145
        instance.nics.append(new_nic)
9146
        result.append(("nic.%d" % (len(instance.nics) - 1),
9147
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9148
                       (new_nic.mac, new_nic.ip,
9149
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9150
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9151
                       )))
9152
      else:
9153
        for key in 'mac', 'ip':
9154
          if key in nic_dict:
9155
            setattr(instance.nics[nic_op], key, nic_dict[key])
9156
        if nic_op in self.nic_pinst:
9157
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9158
        for key, val in nic_dict.iteritems():
9159
          result.append(("nic.%s/%d" % (key, nic_op), val))
9160

    
9161
    # hvparams changes
9162
    if self.op.hvparams:
9163
      instance.hvparams = self.hv_inst
9164
      for key, val in self.op.hvparams.iteritems():
9165
        result.append(("hv/%s" % key, val))
9166

    
9167
    # beparams changes
9168
    if self.op.beparams:
9169
      instance.beparams = self.be_inst
9170
      for key, val in self.op.beparams.iteritems():
9171
        result.append(("be/%s" % key, val))
9172

    
9173
    # OS change
9174
    if self.op.os_name:
9175
      instance.os = self.op.os_name
9176

    
9177
    # osparams changes
9178
    if self.op.osparams:
9179
      instance.osparams = self.os_inst
9180
      for key, val in self.op.osparams.iteritems():
9181
        result.append(("os/%s" % key, val))
9182

    
9183
    self.cfg.Update(instance, feedback_fn)
9184

    
9185
    return result
9186

    
9187
  _DISK_CONVERSIONS = {
9188
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9189
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9190
    }
9191

    
9192

    
9193
class LUQueryExports(NoHooksLU):
9194
  """Query the exports list
9195

9196
  """
9197
  _OP_PARAMS = [
9198
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9199
    ("use_locking", False, _TBool),
9200
    ]
9201
  REQ_BGL = False
9202

    
9203
  def ExpandNames(self):
9204
    self.needed_locks = {}
9205
    self.share_locks[locking.LEVEL_NODE] = 1
9206
    if not self.op.nodes:
9207
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9208
    else:
9209
      self.needed_locks[locking.LEVEL_NODE] = \
9210
        _GetWantedNodes(self, self.op.nodes)
9211

    
9212
  def Exec(self, feedback_fn):
9213
    """Compute the list of all the exported system images.
9214

9215
    @rtype: dict
9216
    @return: a dictionary with the structure node->(export-list)
9217
        where export-list is a list of the instances exported on
9218
        that node.
9219

9220
    """
9221
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9222
    rpcresult = self.rpc.call_export_list(self.nodes)
9223
    result = {}
9224
    for node in rpcresult:
9225
      if rpcresult[node].fail_msg:
9226
        result[node] = False
9227
      else:
9228
        result[node] = rpcresult[node].payload
9229

    
9230
    return result
9231

    
9232

    
9233
class LUPrepareExport(NoHooksLU):
9234
  """Prepares an instance for an export and returns useful information.
9235

9236
  """
9237
  _OP_PARAMS = [
9238
    _PInstanceName,
9239
    ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9240
    ]
9241
  REQ_BGL = False
9242

    
9243
  def ExpandNames(self):
9244
    self._ExpandAndLockInstance()
9245

    
9246
  def CheckPrereq(self):
9247
    """Check prerequisites.
9248

9249
    """
9250
    instance_name = self.op.instance_name
9251

    
9252
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9253
    assert self.instance is not None, \
9254
          "Cannot retrieve locked instance %s" % self.op.instance_name
9255
    _CheckNodeOnline(self, self.instance.primary_node)
9256

    
9257
    self._cds = _GetClusterDomainSecret()
9258

    
9259
  def Exec(self, feedback_fn):
9260
    """Prepares an instance for an export.
9261

9262
    """
9263
    instance = self.instance
9264

    
9265
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9266
      salt = utils.GenerateSecret(8)
9267

    
9268
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9269
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9270
                                              constants.RIE_CERT_VALIDITY)
9271
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9272

    
9273
      (name, cert_pem) = result.payload
9274

    
9275
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9276
                                             cert_pem)
9277

    
9278
      return {
9279
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9280
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9281
                          salt),
9282
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9283
        }
9284

    
9285
    return None
9286

    
9287

    
9288
class LUExportInstance(LogicalUnit):
9289
  """Export an instance to an image in the cluster.
9290

9291
  """
9292
  HPATH = "instance-export"
9293
  HTYPE = constants.HTYPE_INSTANCE
9294
  _OP_PARAMS = [
9295
    _PInstanceName,
9296
    ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9297
    ("shutdown", True, _TBool),
9298
    _PShutdownTimeout,
9299
    ("remove_instance", False, _TBool),
9300
    ("ignore_remove_failures", False, _TBool),
9301
    ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9302
    ("x509_key_name", None, _TOr(_TList, _TNone)),
9303
    ("destination_x509_ca", None, _TMaybeString),
9304
    ]
9305
  REQ_BGL = False
9306

    
9307
  def CheckArguments(self):
9308
    """Check the arguments.
9309

9310
    """
9311
    self.x509_key_name = self.op.x509_key_name
9312
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9313

    
9314
    if self.op.remove_instance and not self.op.shutdown:
9315
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9316
                                 " down before")
9317

    
9318
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9319
      if not self.x509_key_name:
9320
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9321
                                   errors.ECODE_INVAL)
9322

    
9323
      if not self.dest_x509_ca_pem:
9324
        raise errors.OpPrereqError("Missing destination X509 CA",
9325
                                   errors.ECODE_INVAL)
9326

    
9327
  def ExpandNames(self):
9328
    self._ExpandAndLockInstance()
9329

    
9330
    # Lock all nodes for local exports
9331
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9332
      # FIXME: lock only instance primary and destination node
9333
      #
9334
      # Sad but true, for now we have do lock all nodes, as we don't know where
9335
      # the previous export might be, and in this LU we search for it and
9336
      # remove it from its current node. In the future we could fix this by:
9337
      #  - making a tasklet to search (share-lock all), then create the
9338
      #    new one, then one to remove, after
9339
      #  - removing the removal operation altogether
9340
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9341

    
9342
  def DeclareLocks(self, level):
9343
    """Last minute lock declaration."""
9344
    # All nodes are locked anyway, so nothing to do here.
9345

    
9346
  def BuildHooksEnv(self):
9347
    """Build hooks env.
9348

9349
    This will run on the master, primary node and target node.
9350

9351
    """
9352
    env = {
9353
      "EXPORT_MODE": self.op.mode,
9354
      "EXPORT_NODE": self.op.target_node,
9355
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9356
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9357
      # TODO: Generic function for boolean env variables
9358
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9359
      }
9360

    
9361
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9362

    
9363
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9364

    
9365
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9366
      nl.append(self.op.target_node)
9367

    
9368
    return env, nl, nl
9369

    
9370
  def CheckPrereq(self):
9371
    """Check prerequisites.
9372

9373
    This checks that the instance and node names are valid.
9374

9375
    """
9376
    instance_name = self.op.instance_name
9377

    
9378
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9379
    assert self.instance is not None, \
9380
          "Cannot retrieve locked instance %s" % self.op.instance_name
9381
    _CheckNodeOnline(self, self.instance.primary_node)
9382

    
9383
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9384
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9385
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9386
      assert self.dst_node is not None
9387

    
9388
      _CheckNodeOnline(self, self.dst_node.name)
9389
      _CheckNodeNotDrained(self, self.dst_node.name)
9390

    
9391
      self._cds = None
9392
      self.dest_disk_info = None
9393
      self.dest_x509_ca = None
9394

    
9395
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9396
      self.dst_node = None
9397

    
9398
      if len(self.op.target_node) != len(self.instance.disks):
9399
        raise errors.OpPrereqError(("Received destination information for %s"
9400
                                    " disks, but instance %s has %s disks") %
9401
                                   (len(self.op.target_node), instance_name,
9402
                                    len(self.instance.disks)),
9403
                                   errors.ECODE_INVAL)
9404

    
9405
      cds = _GetClusterDomainSecret()
9406

    
9407
      # Check X509 key name
9408
      try:
9409
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9410
      except (TypeError, ValueError), err:
9411
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9412

    
9413
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9414
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9415
                                   errors.ECODE_INVAL)
9416

    
9417
      # Load and verify CA
9418
      try:
9419
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9420
      except OpenSSL.crypto.Error, err:
9421
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9422
                                   (err, ), errors.ECODE_INVAL)
9423

    
9424
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9425
      if errcode is not None:
9426
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9427
                                   (msg, ), errors.ECODE_INVAL)
9428

    
9429
      self.dest_x509_ca = cert
9430

    
9431
      # Verify target information
9432
      disk_info = []
9433
      for idx, disk_data in enumerate(self.op.target_node):
9434
        try:
9435
          (host, port, magic) = \
9436
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9437
        except errors.GenericError, err:
9438
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9439
                                     (idx, err), errors.ECODE_INVAL)
9440

    
9441
        disk_info.append((host, port, magic))
9442

    
9443
      assert len(disk_info) == len(self.op.target_node)
9444
      self.dest_disk_info = disk_info
9445

    
9446
    else:
9447
      raise errors.ProgrammerError("Unhandled export mode %r" %
9448
                                   self.op.mode)
9449

    
9450
    # instance disk type verification
9451
    # TODO: Implement export support for file-based disks
9452
    for disk in self.instance.disks:
9453
      if disk.dev_type == constants.LD_FILE:
9454
        raise errors.OpPrereqError("Export not supported for instances with"
9455
                                   " file-based disks", errors.ECODE_INVAL)
9456

    
9457
  def _CleanupExports(self, feedback_fn):
9458
    """Removes exports of current instance from all other nodes.
9459

9460
    If an instance in a cluster with nodes A..D was exported to node C, its
9461
    exports will be removed from the nodes A, B and D.
9462

9463
    """
9464
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9465

    
9466
    nodelist = self.cfg.GetNodeList()
9467
    nodelist.remove(self.dst_node.name)
9468

    
9469
    # on one-node clusters nodelist will be empty after the removal
9470
    # if we proceed the backup would be removed because OpQueryExports
9471
    # substitutes an empty list with the full cluster node list.
9472
    iname = self.instance.name
9473
    if nodelist:
9474
      feedback_fn("Removing old exports for instance %s" % iname)
9475
      exportlist = self.rpc.call_export_list(nodelist)
9476
      for node in exportlist:
9477
        if exportlist[node].fail_msg:
9478
          continue
9479
        if iname in exportlist[node].payload:
9480
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9481
          if msg:
9482
            self.LogWarning("Could not remove older export for instance %s"
9483
                            " on node %s: %s", iname, node, msg)
9484

    
9485
  def Exec(self, feedback_fn):
9486
    """Export an instance to an image in the cluster.
9487

9488
    """
9489
    assert self.op.mode in constants.EXPORT_MODES
9490

    
9491
    instance = self.instance
9492
    src_node = instance.primary_node
9493

    
9494
    if self.op.shutdown:
9495
      # shutdown the instance, but not the disks
9496
      feedback_fn("Shutting down instance %s" % instance.name)
9497
      result = self.rpc.call_instance_shutdown(src_node, instance,
9498
                                               self.op.shutdown_timeout)
9499
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9500
      result.Raise("Could not shutdown instance %s on"
9501
                   " node %s" % (instance.name, src_node))
9502

    
9503
    # set the disks ID correctly since call_instance_start needs the
9504
    # correct drbd minor to create the symlinks
9505
    for disk in instance.disks:
9506
      self.cfg.SetDiskID(disk, src_node)
9507

    
9508
    activate_disks = (not instance.admin_up)
9509

    
9510
    if activate_disks:
9511
      # Activate the instance disks if we'exporting a stopped instance
9512
      feedback_fn("Activating disks for %s" % instance.name)
9513
      _StartInstanceDisks(self, instance, None)
9514

    
9515
    try:
9516
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9517
                                                     instance)
9518

    
9519
      helper.CreateSnapshots()
9520
      try:
9521
        if (self.op.shutdown and instance.admin_up and
9522
            not self.op.remove_instance):
9523
          assert not activate_disks
9524
          feedback_fn("Starting instance %s" % instance.name)
9525
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9526
          msg = result.fail_msg
9527
          if msg:
9528
            feedback_fn("Failed to start instance: %s" % msg)
9529
            _ShutdownInstanceDisks(self, instance)
9530
            raise errors.OpExecError("Could not start instance: %s" % msg)
9531

    
9532
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9533
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9534
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9535
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9536
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9537

    
9538
          (key_name, _, _) = self.x509_key_name
9539

    
9540
          dest_ca_pem = \
9541
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9542
                                            self.dest_x509_ca)
9543

    
9544
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9545
                                                     key_name, dest_ca_pem,
9546
                                                     timeouts)
9547
      finally:
9548
        helper.Cleanup()
9549

    
9550
      # Check for backwards compatibility
9551
      assert len(dresults) == len(instance.disks)
9552
      assert compat.all(isinstance(i, bool) for i in dresults), \
9553
             "Not all results are boolean: %r" % dresults
9554

    
9555
    finally:
9556
      if activate_disks:
9557
        feedback_fn("Deactivating disks for %s" % instance.name)
9558
        _ShutdownInstanceDisks(self, instance)
9559

    
9560
    if not (compat.all(dresults) and fin_resu):
9561
      failures = []
9562
      if not fin_resu:
9563
        failures.append("export finalization")
9564
      if not compat.all(dresults):
9565
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9566
                               if not dsk)
9567
        failures.append("disk export: disk(s) %s" % fdsk)
9568

    
9569
      raise errors.OpExecError("Export failed, errors in %s" %
9570
                               utils.CommaJoin(failures))
9571

    
9572
    # At this point, the export was successful, we can cleanup/finish
9573

    
9574
    # Remove instance if requested
9575
    if self.op.remove_instance:
9576
      feedback_fn("Removing instance %s" % instance.name)
9577
      _RemoveInstance(self, feedback_fn, instance,
9578
                      self.op.ignore_remove_failures)
9579

    
9580
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9581
      self._CleanupExports(feedback_fn)
9582

    
9583
    return fin_resu, dresults
9584

    
9585

    
9586
class LURemoveExport(NoHooksLU):
9587
  """Remove exports related to the named instance.
9588

9589
  """
9590
  _OP_PARAMS = [
9591
    _PInstanceName,
9592
    ]
9593
  REQ_BGL = False
9594

    
9595
  def ExpandNames(self):
9596
    self.needed_locks = {}
9597
    # We need all nodes to be locked in order for RemoveExport to work, but we
9598
    # don't need to lock the instance itself, as nothing will happen to it (and
9599
    # we can remove exports also for a removed instance)
9600
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9601

    
9602
  def Exec(self, feedback_fn):
9603
    """Remove any export.
9604

9605
    """
9606
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9607
    # If the instance was not found we'll try with the name that was passed in.
9608
    # This will only work if it was an FQDN, though.
9609
    fqdn_warn = False
9610
    if not instance_name:
9611
      fqdn_warn = True
9612
      instance_name = self.op.instance_name
9613

    
9614
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9615
    exportlist = self.rpc.call_export_list(locked_nodes)
9616
    found = False
9617
    for node in exportlist:
9618
      msg = exportlist[node].fail_msg
9619
      if msg:
9620
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9621
        continue
9622
      if instance_name in exportlist[node].payload:
9623
        found = True
9624
        result = self.rpc.call_export_remove(node, instance_name)
9625
        msg = result.fail_msg
9626
        if msg:
9627
          logging.error("Could not remove export for instance %s"
9628
                        " on node %s: %s", instance_name, node, msg)
9629

    
9630
    if fqdn_warn and not found:
9631
      feedback_fn("Export not found. If trying to remove an export belonging"
9632
                  " to a deleted instance please use its Fully Qualified"
9633
                  " Domain Name.")
9634

    
9635

    
9636
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9637
  """Generic tags LU.
9638

9639
  This is an abstract class which is the parent of all the other tags LUs.
9640

9641
  """
9642

    
9643
  def ExpandNames(self):
9644
    self.needed_locks = {}
9645
    if self.op.kind == constants.TAG_NODE:
9646
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9647
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9648
    elif self.op.kind == constants.TAG_INSTANCE:
9649
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9650
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9651

    
9652
  def CheckPrereq(self):
9653
    """Check prerequisites.
9654

9655
    """
9656
    if self.op.kind == constants.TAG_CLUSTER:
9657
      self.target = self.cfg.GetClusterInfo()
9658
    elif self.op.kind == constants.TAG_NODE:
9659
      self.target = self.cfg.GetNodeInfo(self.op.name)
9660
    elif self.op.kind == constants.TAG_INSTANCE:
9661
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9662
    else:
9663
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9664
                                 str(self.op.kind), errors.ECODE_INVAL)
9665

    
9666

    
9667
class LUGetTags(TagsLU):
9668
  """Returns the tags of a given object.
9669

9670
  """
9671
  _OP_PARAMS = [
9672
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9673
    ("name", _NoDefault, _TNonEmptyString),
9674
    ]
9675
  REQ_BGL = False
9676

    
9677
  def Exec(self, feedback_fn):
9678
    """Returns the tag list.
9679

9680
    """
9681
    return list(self.target.GetTags())
9682

    
9683

    
9684
class LUSearchTags(NoHooksLU):
9685
  """Searches the tags for a given pattern.
9686

9687
  """
9688
  _OP_PARAMS = [
9689
    ("pattern", _NoDefault, _TNonEmptyString),
9690
    ]
9691
  REQ_BGL = False
9692

    
9693
  def ExpandNames(self):
9694
    self.needed_locks = {}
9695

    
9696
  def CheckPrereq(self):
9697
    """Check prerequisites.
9698

9699
    This checks the pattern passed for validity by compiling it.
9700

9701
    """
9702
    try:
9703
      self.re = re.compile(self.op.pattern)
9704
    except re.error, err:
9705
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9706
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9707

    
9708
  def Exec(self, feedback_fn):
9709
    """Returns the tag list.
9710

9711
    """
9712
    cfg = self.cfg
9713
    tgts = [("/cluster", cfg.GetClusterInfo())]
9714
    ilist = cfg.GetAllInstancesInfo().values()
9715
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9716
    nlist = cfg.GetAllNodesInfo().values()
9717
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9718
    results = []
9719
    for path, target in tgts:
9720
      for tag in target.GetTags():
9721
        if self.re.search(tag):
9722
          results.append((path, tag))
9723
    return results
9724

    
9725

    
9726
class LUAddTags(TagsLU):
9727
  """Sets a tag on a given object.
9728

9729
  """
9730
  _OP_PARAMS = [
9731
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9732
    ("name", _NoDefault, _TNonEmptyString),
9733
    ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9734
    ]
9735
  REQ_BGL = False
9736

    
9737
  def CheckPrereq(self):
9738
    """Check prerequisites.
9739

9740
    This checks the type and length of the tag name and value.
9741

9742
    """
9743
    TagsLU.CheckPrereq(self)
9744
    for tag in self.op.tags:
9745
      objects.TaggableObject.ValidateTag(tag)
9746

    
9747
  def Exec(self, feedback_fn):
9748
    """Sets the tag.
9749

9750
    """
9751
    try:
9752
      for tag in self.op.tags:
9753
        self.target.AddTag(tag)
9754
    except errors.TagError, err:
9755
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9756
    self.cfg.Update(self.target, feedback_fn)
9757

    
9758

    
9759
class LUDelTags(TagsLU):
9760
  """Delete a list of tags from a given object.
9761

9762
  """
9763
  _OP_PARAMS = [
9764
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9765
    ("name", _NoDefault, _TNonEmptyString),
9766
    ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9767
    ]
9768
  REQ_BGL = False
9769

    
9770
  def CheckPrereq(self):
9771
    """Check prerequisites.
9772

9773
    This checks that we have the given tag.
9774

9775
    """
9776
    TagsLU.CheckPrereq(self)
9777
    for tag in self.op.tags:
9778
      objects.TaggableObject.ValidateTag(tag)
9779
    del_tags = frozenset(self.op.tags)
9780
    cur_tags = self.target.GetTags()
9781
    if not del_tags <= cur_tags:
9782
      diff_tags = del_tags - cur_tags
9783
      diff_names = ["'%s'" % tag for tag in diff_tags]
9784
      diff_names.sort()
9785
      raise errors.OpPrereqError("Tag(s) %s not found" %
9786
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9787

    
9788
  def Exec(self, feedback_fn):
9789
    """Remove the tag from the object.
9790

9791
    """
9792
    for tag in self.op.tags:
9793
      self.target.RemoveTag(tag)
9794
    self.cfg.Update(self.target, feedback_fn)
9795

    
9796

    
9797
class LUTestDelay(NoHooksLU):
9798
  """Sleep for a specified amount of time.
9799

9800
  This LU sleeps on the master and/or nodes for a specified amount of
9801
  time.
9802

9803
  """
9804
  _OP_PARAMS = [
9805
    ("duration", _NoDefault, _TFloat),
9806
    ("on_master", True, _TBool),
9807
    ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9808
    ("repeat", 0, _TPositiveInt)
9809
    ]
9810
  REQ_BGL = False
9811

    
9812
  def ExpandNames(self):
9813
    """Expand names and set required locks.
9814

9815
    This expands the node list, if any.
9816

9817
    """
9818
    self.needed_locks = {}
9819
    if self.op.on_nodes:
9820
      # _GetWantedNodes can be used here, but is not always appropriate to use
9821
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9822
      # more information.
9823
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9824
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9825

    
9826
  def _TestDelay(self):
9827
    """Do the actual sleep.
9828

9829
    """
9830
    if self.op.on_master:
9831
      if not utils.TestDelay(self.op.duration):
9832
        raise errors.OpExecError("Error during master delay test")
9833
    if self.op.on_nodes:
9834
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9835
      for node, node_result in result.items():
9836
        node_result.Raise("Failure during rpc call to node %s" % node)
9837

    
9838
  def Exec(self, feedback_fn):
9839
    """Execute the test delay opcode, with the wanted repetitions.
9840

9841
    """
9842
    if self.op.repeat == 0:
9843
      self._TestDelay()
9844
    else:
9845
      top_value = self.op.repeat - 1
9846
      for i in range(self.op.repeat):
9847
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9848
        self._TestDelay()
9849

    
9850

    
9851
class IAllocator(object):
9852
  """IAllocator framework.
9853

9854
  An IAllocator instance has three sets of attributes:
9855
    - cfg that is needed to query the cluster
9856
    - input data (all members of the _KEYS class attribute are required)
9857
    - four buffer attributes (in|out_data|text), that represent the
9858
      input (to the external script) in text and data structure format,
9859
      and the output from it, again in two formats
9860
    - the result variables from the script (success, info, nodes) for
9861
      easy usage
9862

9863
  """
9864
  # pylint: disable-msg=R0902
9865
  # lots of instance attributes
9866
  _ALLO_KEYS = [
9867
    "name", "mem_size", "disks", "disk_template",
9868
    "os", "tags", "nics", "vcpus", "hypervisor",
9869
    ]
9870
  _RELO_KEYS = [
9871
    "name", "relocate_from",
9872
    ]
9873
  _EVAC_KEYS = [
9874
    "evac_nodes",
9875
    ]
9876

    
9877
  def __init__(self, cfg, rpc, mode, **kwargs):
9878
    self.cfg = cfg
9879
    self.rpc = rpc
9880
    # init buffer variables
9881
    self.in_text = self.out_text = self.in_data = self.out_data = None
9882
    # init all input fields so that pylint is happy
9883
    self.mode = mode
9884
    self.mem_size = self.disks = self.disk_template = None
9885
    self.os = self.tags = self.nics = self.vcpus = None
9886
    self.hypervisor = None
9887
    self.relocate_from = None
9888
    self.name = None
9889
    self.evac_nodes = None
9890
    # computed fields
9891
    self.required_nodes = None
9892
    # init result fields
9893
    self.success = self.info = self.result = None
9894
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9895
      keyset = self._ALLO_KEYS
9896
      fn = self._AddNewInstance
9897
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9898
      keyset = self._RELO_KEYS
9899
      fn = self._AddRelocateInstance
9900
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9901
      keyset = self._EVAC_KEYS
9902
      fn = self._AddEvacuateNodes
9903
    else:
9904
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9905
                                   " IAllocator" % self.mode)
9906
    for key in kwargs:
9907
      if key not in keyset:
9908
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9909
                                     " IAllocator" % key)
9910
      setattr(self, key, kwargs[key])
9911

    
9912
    for key in keyset:
9913
      if key not in kwargs:
9914
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9915
                                     " IAllocator" % key)
9916
    self._BuildInputData(fn)
9917

    
9918
  def _ComputeClusterData(self):
9919
    """Compute the generic allocator input data.
9920

9921
    This is the data that is independent of the actual operation.
9922

9923
    """
9924
    cfg = self.cfg
9925
    cluster_info = cfg.GetClusterInfo()
9926
    # cluster data
9927
    data = {
9928
      "version": constants.IALLOCATOR_VERSION,
9929
      "cluster_name": cfg.GetClusterName(),
9930
      "cluster_tags": list(cluster_info.GetTags()),
9931
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9932
      # we don't have job IDs
9933
      }
9934
    iinfo = cfg.GetAllInstancesInfo().values()
9935
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9936

    
9937
    # node data
9938
    node_results = {}
9939
    node_list = cfg.GetNodeList()
9940

    
9941
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9942
      hypervisor_name = self.hypervisor
9943
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9944
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9945
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9946
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9947

    
9948
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9949
                                        hypervisor_name)
9950
    node_iinfo = \
9951
      self.rpc.call_all_instances_info(node_list,
9952
                                       cluster_info.enabled_hypervisors)
9953
    for nname, nresult in node_data.items():
9954
      # first fill in static (config-based) values
9955
      ninfo = cfg.GetNodeInfo(nname)
9956
      pnr = {
9957
        "tags": list(ninfo.GetTags()),
9958
        "primary_ip": ninfo.primary_ip,
9959
        "secondary_ip": ninfo.secondary_ip,
9960
        "offline": ninfo.offline,
9961
        "drained": ninfo.drained,
9962
        "master_candidate": ninfo.master_candidate,
9963
        }
9964

    
9965
      if not (ninfo.offline or ninfo.drained):
9966
        nresult.Raise("Can't get data for node %s" % nname)
9967
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9968
                                nname)
9969
        remote_info = nresult.payload
9970

    
9971
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9972
                     'vg_size', 'vg_free', 'cpu_total']:
9973
          if attr not in remote_info:
9974
            raise errors.OpExecError("Node '%s' didn't return attribute"
9975
                                     " '%s'" % (nname, attr))
9976
          if not isinstance(remote_info[attr], int):
9977
            raise errors.OpExecError("Node '%s' returned invalid value"
9978
                                     " for '%s': %s" %
9979
                                     (nname, attr, remote_info[attr]))
9980
        # compute memory used by primary instances
9981
        i_p_mem = i_p_up_mem = 0
9982
        for iinfo, beinfo in i_list:
9983
          if iinfo.primary_node == nname:
9984
            i_p_mem += beinfo[constants.BE_MEMORY]
9985
            if iinfo.name not in node_iinfo[nname].payload:
9986
              i_used_mem = 0
9987
            else:
9988
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9989
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9990
            remote_info['memory_free'] -= max(0, i_mem_diff)
9991

    
9992
            if iinfo.admin_up:
9993
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9994

    
9995
        # compute memory used by instances
9996
        pnr_dyn = {
9997
          "total_memory": remote_info['memory_total'],
9998
          "reserved_memory": remote_info['memory_dom0'],
9999
          "free_memory": remote_info['memory_free'],
10000
          "total_disk": remote_info['vg_size'],
10001
          "free_disk": remote_info['vg_free'],
10002
          "total_cpus": remote_info['cpu_total'],
10003
          "i_pri_memory": i_p_mem,
10004
          "i_pri_up_memory": i_p_up_mem,
10005
          }
10006
        pnr.update(pnr_dyn)
10007

    
10008
      node_results[nname] = pnr
10009
    data["nodes"] = node_results
10010

    
10011
    # instance data
10012
    instance_data = {}
10013
    for iinfo, beinfo in i_list:
10014
      nic_data = []
10015
      for nic in iinfo.nics:
10016
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10017
        nic_dict = {"mac": nic.mac,
10018
                    "ip": nic.ip,
10019
                    "mode": filled_params[constants.NIC_MODE],
10020
                    "link": filled_params[constants.NIC_LINK],
10021
                   }
10022
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10023
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10024
        nic_data.append(nic_dict)
10025
      pir = {
10026
        "tags": list(iinfo.GetTags()),
10027
        "admin_up": iinfo.admin_up,
10028
        "vcpus": beinfo[constants.BE_VCPUS],
10029
        "memory": beinfo[constants.BE_MEMORY],
10030
        "os": iinfo.os,
10031
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10032
        "nics": nic_data,
10033
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10034
        "disk_template": iinfo.disk_template,
10035
        "hypervisor": iinfo.hypervisor,
10036
        }
10037
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10038
                                                 pir["disks"])
10039
      instance_data[iinfo.name] = pir
10040

    
10041
    data["instances"] = instance_data
10042

    
10043
    self.in_data = data
10044

    
10045
  def _AddNewInstance(self):
10046
    """Add new instance data to allocator structure.
10047

10048
    This in combination with _AllocatorGetClusterData will create the
10049
    correct structure needed as input for the allocator.
10050

10051
    The checks for the completeness of the opcode must have already been
10052
    done.
10053

10054
    """
10055
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10056

    
10057
    if self.disk_template in constants.DTS_NET_MIRROR:
10058
      self.required_nodes = 2
10059
    else:
10060
      self.required_nodes = 1
10061
    request = {
10062
      "name": self.name,
10063
      "disk_template": self.disk_template,
10064
      "tags": self.tags,
10065
      "os": self.os,
10066
      "vcpus": self.vcpus,
10067
      "memory": self.mem_size,
10068
      "disks": self.disks,
10069
      "disk_space_total": disk_space,
10070
      "nics": self.nics,
10071
      "required_nodes": self.required_nodes,
10072
      }
10073
    return request
10074

    
10075
  def _AddRelocateInstance(self):
10076
    """Add relocate instance data to allocator structure.
10077

10078
    This in combination with _IAllocatorGetClusterData will create the
10079
    correct structure needed as input for the allocator.
10080

10081
    The checks for the completeness of the opcode must have already been
10082
    done.
10083

10084
    """
10085
    instance = self.cfg.GetInstanceInfo(self.name)
10086
    if instance is None:
10087
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
10088
                                   " IAllocator" % self.name)
10089

    
10090
    if instance.disk_template not in constants.DTS_NET_MIRROR:
10091
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10092
                                 errors.ECODE_INVAL)
10093

    
10094
    if len(instance.secondary_nodes) != 1:
10095
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
10096
                                 errors.ECODE_STATE)
10097

    
10098
    self.required_nodes = 1
10099
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
10100
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10101

    
10102
    request = {
10103
      "name": self.name,
10104
      "disk_space_total": disk_space,
10105
      "required_nodes": self.required_nodes,
10106
      "relocate_from": self.relocate_from,
10107
      }
10108
    return request
10109

    
10110
  def _AddEvacuateNodes(self):
10111
    """Add evacuate nodes data to allocator structure.
10112

10113
    """
10114
    request = {
10115
      "evac_nodes": self.evac_nodes
10116
      }
10117
    return request
10118

    
10119
  def _BuildInputData(self, fn):
10120
    """Build input data structures.
10121

10122
    """
10123
    self._ComputeClusterData()
10124

    
10125
    request = fn()
10126
    request["type"] = self.mode
10127
    self.in_data["request"] = request
10128

    
10129
    self.in_text = serializer.Dump(self.in_data)
10130

    
10131
  def Run(self, name, validate=True, call_fn=None):
10132
    """Run an instance allocator and return the results.
10133

10134
    """
10135
    if call_fn is None:
10136
      call_fn = self.rpc.call_iallocator_runner
10137

    
10138
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10139
    result.Raise("Failure while running the iallocator script")
10140

    
10141
    self.out_text = result.payload
10142
    if validate:
10143
      self._ValidateResult()
10144

    
10145
  def _ValidateResult(self):
10146
    """Process the allocator results.
10147

10148
    This will process and if successful save the result in
10149
    self.out_data and the other parameters.
10150

10151
    """
10152
    try:
10153
      rdict = serializer.Load(self.out_text)
10154
    except Exception, err:
10155
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10156

    
10157
    if not isinstance(rdict, dict):
10158
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10159

    
10160
    # TODO: remove backwards compatiblity in later versions
10161
    if "nodes" in rdict and "result" not in rdict:
10162
      rdict["result"] = rdict["nodes"]
10163
      del rdict["nodes"]
10164

    
10165
    for key in "success", "info", "result":
10166
      if key not in rdict:
10167
        raise errors.OpExecError("Can't parse iallocator results:"
10168
                                 " missing key '%s'" % key)
10169
      setattr(self, key, rdict[key])
10170

    
10171
    if not isinstance(rdict["result"], list):
10172
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10173
                               " is not a list")
10174
    self.out_data = rdict
10175

    
10176

    
10177
class LUTestAllocator(NoHooksLU):
10178
  """Run allocator tests.
10179

10180
  This LU runs the allocator tests
10181

10182
  """
10183
  _OP_PARAMS = [
10184
    ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10185
    ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10186
    ("name", _NoDefault, _TNonEmptyString),
10187
    ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10188
      _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10189
               _TOr(_TNone, _TNonEmptyString))))),
10190
    ("disks", _NoDefault, _TOr(_TNone, _TList)),
10191
    ("hypervisor", None, _TMaybeString),
10192
    ("allocator", None, _TMaybeString),
10193
    ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10194
    ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10195
    ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10196
    ("os", None, _TMaybeString),
10197
    ("disk_template", None, _TMaybeString),
10198
    ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10199
    ]
10200

    
10201
  def CheckPrereq(self):
10202
    """Check prerequisites.
10203

10204
    This checks the opcode parameters depending on the director and mode test.
10205

10206
    """
10207
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10208
      for attr in ["mem_size", "disks", "disk_template",
10209
                   "os", "tags", "nics", "vcpus"]:
10210
        if not hasattr(self.op, attr):
10211
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10212
                                     attr, errors.ECODE_INVAL)
10213
      iname = self.cfg.ExpandInstanceName(self.op.name)
10214
      if iname is not None:
10215
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10216
                                   iname, errors.ECODE_EXISTS)
10217
      if not isinstance(self.op.nics, list):
10218
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10219
                                   errors.ECODE_INVAL)
10220
      if not isinstance(self.op.disks, list):
10221
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10222
                                   errors.ECODE_INVAL)
10223
      for row in self.op.disks:
10224
        if (not isinstance(row, dict) or
10225
            "size" not in row or
10226
            not isinstance(row["size"], int) or
10227
            "mode" not in row or
10228
            row["mode"] not in ['r', 'w']):
10229
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10230
                                     " parameter", errors.ECODE_INVAL)
10231
      if self.op.hypervisor is None:
10232
        self.op.hypervisor = self.cfg.GetHypervisorType()
10233
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10234
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10235
      self.op.name = fname
10236
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10237
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10238
      if not hasattr(self.op, "evac_nodes"):
10239
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10240
                                   " opcode input", errors.ECODE_INVAL)
10241
    else:
10242
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10243
                                 self.op.mode, errors.ECODE_INVAL)
10244

    
10245
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10246
      if self.op.allocator is None:
10247
        raise errors.OpPrereqError("Missing allocator name",
10248
                                   errors.ECODE_INVAL)
10249
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10250
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10251
                                 self.op.direction, errors.ECODE_INVAL)
10252

    
10253
  def Exec(self, feedback_fn):
10254
    """Run the allocator test.
10255

10256
    """
10257
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10258
      ial = IAllocator(self.cfg, self.rpc,
10259
                       mode=self.op.mode,
10260
                       name=self.op.name,
10261
                       mem_size=self.op.mem_size,
10262
                       disks=self.op.disks,
10263
                       disk_template=self.op.disk_template,
10264
                       os=self.op.os,
10265
                       tags=self.op.tags,
10266
                       nics=self.op.nics,
10267
                       vcpus=self.op.vcpus,
10268
                       hypervisor=self.op.hypervisor,
10269
                       )
10270
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10271
      ial = IAllocator(self.cfg, self.rpc,
10272
                       mode=self.op.mode,
10273
                       name=self.op.name,
10274
                       relocate_from=list(self.relocate_from),
10275
                       )
10276
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10277
      ial = IAllocator(self.cfg, self.rpc,
10278
                       mode=self.op.mode,
10279
                       evac_nodes=self.op.evac_nodes)
10280
    else:
10281
      raise errors.ProgrammerError("Uncatched mode %s in"
10282
                                   " LUTestAllocator.Exec", self.op.mode)
10283

    
10284
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10285
      result = ial.in_text
10286
    else:
10287
      ial.Run(self.op.allocator, validate=False)
10288
      result = ial.out_text
10289
    return result