Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 3636400f

History | View | Annotate | Download (352.8 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39

    
40
from ganeti import ssh
41
from ganeti import utils
42
from ganeti import errors
43
from ganeti import hypervisor
44
from ganeti import locking
45
from ganeti import constants
46
from ganeti import objects
47
from ganeti import serializer
48
from ganeti import ssconf
49
from ganeti import uidpool
50
from ganeti import compat
51
from ganeti import masterd
52

    
53
import ganeti.masterd.instance # pylint: disable-msg=W0611
54

    
55

    
56
# Modifiable default values; need to define these here before the
57
# actual LUs
58

    
59
def _EmptyList():
60
  """Returns an empty list.
61

62
  """
63
  return []
64

    
65

    
66
def _EmptyDict():
67
  """Returns an empty dict.
68

69
  """
70
  return {}
71

    
72

    
73
# Some basic types
74
def _TNotNone(val):
75
  """Checks if the given value is not None.
76

77
  """
78
  return val is not None
79

    
80

    
81
def _TNone(val):
82
  """Checks if the given value is None.
83

84
  """
85
  return val is None
86

    
87

    
88
def _TBool(val):
89
  """Checks if the given value is a boolean.
90

91
  """
92
  return isinstance(val, bool)
93

    
94

    
95
def _TInt(val):
96
  """Checks if the given value is an integer.
97

98
  """
99
  return isinstance(val, int)
100

    
101

    
102
def _TFloat(val):
103
  """Checks if the given value is a float.
104

105
  """
106
  return isinstance(val, float)
107

    
108

    
109
def _TString(val):
110
  """Checks if the given value is a string.
111

112
  """
113
  return isinstance(val, basestring)
114

    
115

    
116
def _TTrue(val):
117
  """Checks if a given value evaluates to a boolean True value.
118

119
  """
120
  return bool(val)
121

    
122

    
123
def _TElemOf(target_list):
124
  """Builds a function that checks if a given value is a member of a list.
125

126
  """
127
  return lambda val: val in target_list
128

    
129

    
130
# Container types
131
def _TList(val):
132
  """Checks if the given value is a list.
133

134
  """
135
  return isinstance(val, list)
136

    
137

    
138
def _TDict(val):
139
  """Checks if the given value is a dictionary.
140

141
  """
142
  return isinstance(val, dict)
143

    
144

    
145
# Combinator types
146
def _TAnd(*args):
147
  """Combine multiple functions using an AND operation.
148

149
  """
150
  def fn(val):
151
    return compat.all(t(val) for t in args)
152
  return fn
153

    
154

    
155
def _TOr(*args):
156
  """Combine multiple functions using an AND operation.
157

158
  """
159
  def fn(val):
160
    return compat.any(t(val) for t in args)
161
  return fn
162

    
163

    
164
# Type aliases
165

    
166
# non-empty string
167
_TNEString = _TAnd(_TString, _TTrue)
168

    
169

    
170
# positive integer
171
_TPInt = _TAnd(_TInt, lambda v: v >= 0)
172

    
173

    
174
def _TListOf(my_type):
175
  """Checks if a given value is a list with all elements of the same type.
176

177
  """
178
  return _TAnd(_TList,
179
               lambda lst: compat.all(lst, my_type))
180

    
181

    
182
def _TDictOf(key_type, val_type):
183
  """Checks a dict type for the type of its key/values.
184

185
  """
186
  return _TAnd(_TDict,
187
               lambda my_dict: (compat.all(my_dict.keys(), key_type) and
188
                                compat.all(my_dict.values(), val_type)))
189

    
190

    
191
# End types
192
class LogicalUnit(object):
193
  """Logical Unit base class.
194

195
  Subclasses must follow these rules:
196
    - implement ExpandNames
197
    - implement CheckPrereq (except when tasklets are used)
198
    - implement Exec (except when tasklets are used)
199
    - implement BuildHooksEnv
200
    - redefine HPATH and HTYPE
201
    - optionally redefine their run requirements:
202
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
203

204
  Note that all commands require root permissions.
205

206
  @ivar dry_run_result: the value (if any) that will be returned to the caller
207
      in dry-run mode (signalled by opcode dry_run parameter)
208
  @cvar _OP_DEFS: a list of opcode attributes and the defaults values
209
      they should get if not already existing
210

211
  """
212
  HPATH = None
213
  HTYPE = None
214
  _OP_REQP = []
215
  _OP_DEFS = []
216
  REQ_BGL = True
217

    
218
  def __init__(self, processor, op, context, rpc):
219
    """Constructor for LogicalUnit.
220

221
    This needs to be overridden in derived classes in order to check op
222
    validity.
223

224
    """
225
    self.proc = processor
226
    self.op = op
227
    self.cfg = context.cfg
228
    self.context = context
229
    self.rpc = rpc
230
    # Dicts used to declare locking needs to mcpu
231
    self.needed_locks = None
232
    self.acquired_locks = {}
233
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
234
    self.add_locks = {}
235
    self.remove_locks = {}
236
    # Used to force good behavior when calling helper functions
237
    self.recalculate_locks = {}
238
    self.__ssh = None
239
    # logging
240
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
241
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
242
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
243
    # support for dry-run
244
    self.dry_run_result = None
245
    # support for generic debug attribute
246
    if (not hasattr(self.op, "debug_level") or
247
        not isinstance(self.op.debug_level, int)):
248
      self.op.debug_level = 0
249

    
250
    # Tasklets
251
    self.tasklets = None
252

    
253
    for aname, aval in self._OP_DEFS:
254
      if not hasattr(self.op, aname):
255
        if callable(aval):
256
          dval = aval()
257
        else:
258
          dval = aval
259
        setattr(self.op, aname, dval)
260

    
261
    for attr_name, test in self._OP_REQP:
262
      if not hasattr(op, attr_name):
263
        raise errors.OpPrereqError("Required parameter '%s' missing" %
264
                                   attr_name, errors.ECODE_INVAL)
265
      attr_val = getattr(op, attr_name, None)
266
      if not callable(test):
267
        raise errors.ProgrammerError("Validation for parameter '%s' failed,"
268
                                     " given type is not a proper type (%s)" %
269
                                     (attr_name, test))
270
      if not test(attr_val):
271
        raise errors.OpPrereqError("Parameter '%s' has invalid type" %
272
                                   attr_name, errors.ECODE_INVAL)
273

    
274
    self.CheckArguments()
275

    
276
  def __GetSSH(self):
277
    """Returns the SshRunner object
278

279
    """
280
    if not self.__ssh:
281
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
282
    return self.__ssh
283

    
284
  ssh = property(fget=__GetSSH)
285

    
286
  def CheckArguments(self):
287
    """Check syntactic validity for the opcode arguments.
288

289
    This method is for doing a simple syntactic check and ensure
290
    validity of opcode parameters, without any cluster-related
291
    checks. While the same can be accomplished in ExpandNames and/or
292
    CheckPrereq, doing these separate is better because:
293

294
      - ExpandNames is left as as purely a lock-related function
295
      - CheckPrereq is run after we have acquired locks (and possible
296
        waited for them)
297

298
    The function is allowed to change the self.op attribute so that
299
    later methods can no longer worry about missing parameters.
300

301
    """
302
    pass
303

    
304
  def ExpandNames(self):
305
    """Expand names for this LU.
306

307
    This method is called before starting to execute the opcode, and it should
308
    update all the parameters of the opcode to their canonical form (e.g. a
309
    short node name must be fully expanded after this method has successfully
310
    completed). This way locking, hooks, logging, ecc. can work correctly.
311

312
    LUs which implement this method must also populate the self.needed_locks
313
    member, as a dict with lock levels as keys, and a list of needed lock names
314
    as values. Rules:
315

316
      - use an empty dict if you don't need any lock
317
      - if you don't need any lock at a particular level omit that level
318
      - don't put anything for the BGL level
319
      - if you want all locks at a level use locking.ALL_SET as a value
320

321
    If you need to share locks (rather than acquire them exclusively) at one
322
    level you can modify self.share_locks, setting a true value (usually 1) for
323
    that level. By default locks are not shared.
324

325
    This function can also define a list of tasklets, which then will be
326
    executed in order instead of the usual LU-level CheckPrereq and Exec
327
    functions, if those are not defined by the LU.
328

329
    Examples::
330

331
      # Acquire all nodes and one instance
332
      self.needed_locks = {
333
        locking.LEVEL_NODE: locking.ALL_SET,
334
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
335
      }
336
      # Acquire just two nodes
337
      self.needed_locks = {
338
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
339
      }
340
      # Acquire no locks
341
      self.needed_locks = {} # No, you can't leave it to the default value None
342

343
    """
344
    # The implementation of this method is mandatory only if the new LU is
345
    # concurrent, so that old LUs don't need to be changed all at the same
346
    # time.
347
    if self.REQ_BGL:
348
      self.needed_locks = {} # Exclusive LUs don't need locks.
349
    else:
350
      raise NotImplementedError
351

    
352
  def DeclareLocks(self, level):
353
    """Declare LU locking needs for a level
354

355
    While most LUs can just declare their locking needs at ExpandNames time,
356
    sometimes there's the need to calculate some locks after having acquired
357
    the ones before. This function is called just before acquiring locks at a
358
    particular level, but after acquiring the ones at lower levels, and permits
359
    such calculations. It can be used to modify self.needed_locks, and by
360
    default it does nothing.
361

362
    This function is only called if you have something already set in
363
    self.needed_locks for the level.
364

365
    @param level: Locking level which is going to be locked
366
    @type level: member of ganeti.locking.LEVELS
367

368
    """
369

    
370
  def CheckPrereq(self):
371
    """Check prerequisites for this LU.
372

373
    This method should check that the prerequisites for the execution
374
    of this LU are fulfilled. It can do internode communication, but
375
    it should be idempotent - no cluster or system changes are
376
    allowed.
377

378
    The method should raise errors.OpPrereqError in case something is
379
    not fulfilled. Its return value is ignored.
380

381
    This method should also update all the parameters of the opcode to
382
    their canonical form if it hasn't been done by ExpandNames before.
383

384
    """
385
    if self.tasklets is not None:
386
      for (idx, tl) in enumerate(self.tasklets):
387
        logging.debug("Checking prerequisites for tasklet %s/%s",
388
                      idx + 1, len(self.tasklets))
389
        tl.CheckPrereq()
390
    else:
391
      pass
392

    
393
  def Exec(self, feedback_fn):
394
    """Execute the LU.
395

396
    This method should implement the actual work. It should raise
397
    errors.OpExecError for failures that are somewhat dealt with in
398
    code, or expected.
399

400
    """
401
    if self.tasklets is not None:
402
      for (idx, tl) in enumerate(self.tasklets):
403
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
404
        tl.Exec(feedback_fn)
405
    else:
406
      raise NotImplementedError
407

    
408
  def BuildHooksEnv(self):
409
    """Build hooks environment for this LU.
410

411
    This method should return a three-node tuple consisting of: a dict
412
    containing the environment that will be used for running the
413
    specific hook for this LU, a list of node names on which the hook
414
    should run before the execution, and a list of node names on which
415
    the hook should run after the execution.
416

417
    The keys of the dict must not have 'GANETI_' prefixed as this will
418
    be handled in the hooks runner. Also note additional keys will be
419
    added by the hooks runner. If the LU doesn't define any
420
    environment, an empty dict (and not None) should be returned.
421

422
    No nodes should be returned as an empty list (and not None).
423

424
    Note that if the HPATH for a LU class is None, this function will
425
    not be called.
426

427
    """
428
    raise NotImplementedError
429

    
430
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
431
    """Notify the LU about the results of its hooks.
432

433
    This method is called every time a hooks phase is executed, and notifies
434
    the Logical Unit about the hooks' result. The LU can then use it to alter
435
    its result based on the hooks.  By default the method does nothing and the
436
    previous result is passed back unchanged but any LU can define it if it
437
    wants to use the local cluster hook-scripts somehow.
438

439
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
440
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
441
    @param hook_results: the results of the multi-node hooks rpc call
442
    @param feedback_fn: function used send feedback back to the caller
443
    @param lu_result: the previous Exec result this LU had, or None
444
        in the PRE phase
445
    @return: the new Exec result, based on the previous result
446
        and hook results
447

448
    """
449
    # API must be kept, thus we ignore the unused argument and could
450
    # be a function warnings
451
    # pylint: disable-msg=W0613,R0201
452
    return lu_result
453

    
454
  def _ExpandAndLockInstance(self):
455
    """Helper function to expand and lock an instance.
456

457
    Many LUs that work on an instance take its name in self.op.instance_name
458
    and need to expand it and then declare the expanded name for locking. This
459
    function does it, and then updates self.op.instance_name to the expanded
460
    name. It also initializes needed_locks as a dict, if this hasn't been done
461
    before.
462

463
    """
464
    if self.needed_locks is None:
465
      self.needed_locks = {}
466
    else:
467
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
468
        "_ExpandAndLockInstance called with instance-level locks set"
469
    self.op.instance_name = _ExpandInstanceName(self.cfg,
470
                                                self.op.instance_name)
471
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
472

    
473
  def _LockInstancesNodes(self, primary_only=False):
474
    """Helper function to declare instances' nodes for locking.
475

476
    This function should be called after locking one or more instances to lock
477
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
478
    with all primary or secondary nodes for instances already locked and
479
    present in self.needed_locks[locking.LEVEL_INSTANCE].
480

481
    It should be called from DeclareLocks, and for safety only works if
482
    self.recalculate_locks[locking.LEVEL_NODE] is set.
483

484
    In the future it may grow parameters to just lock some instance's nodes, or
485
    to just lock primaries or secondary nodes, if needed.
486

487
    If should be called in DeclareLocks in a way similar to::
488

489
      if level == locking.LEVEL_NODE:
490
        self._LockInstancesNodes()
491

492
    @type primary_only: boolean
493
    @param primary_only: only lock primary nodes of locked instances
494

495
    """
496
    assert locking.LEVEL_NODE in self.recalculate_locks, \
497
      "_LockInstancesNodes helper function called with no nodes to recalculate"
498

    
499
    # TODO: check if we're really been called with the instance locks held
500

    
501
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
502
    # future we might want to have different behaviors depending on the value
503
    # of self.recalculate_locks[locking.LEVEL_NODE]
504
    wanted_nodes = []
505
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
506
      instance = self.context.cfg.GetInstanceInfo(instance_name)
507
      wanted_nodes.append(instance.primary_node)
508
      if not primary_only:
509
        wanted_nodes.extend(instance.secondary_nodes)
510

    
511
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
512
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
513
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
514
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
515

    
516
    del self.recalculate_locks[locking.LEVEL_NODE]
517

    
518

    
519
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
520
  """Simple LU which runs no hooks.
521

522
  This LU is intended as a parent for other LogicalUnits which will
523
  run no hooks, in order to reduce duplicate code.
524

525
  """
526
  HPATH = None
527
  HTYPE = None
528

    
529
  def BuildHooksEnv(self):
530
    """Empty BuildHooksEnv for NoHooksLu.
531

532
    This just raises an error.
533

534
    """
535
    assert False, "BuildHooksEnv called for NoHooksLUs"
536

    
537

    
538
class Tasklet:
539
  """Tasklet base class.
540

541
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
542
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
543
  tasklets know nothing about locks.
544

545
  Subclasses must follow these rules:
546
    - Implement CheckPrereq
547
    - Implement Exec
548

549
  """
550
  def __init__(self, lu):
551
    self.lu = lu
552

    
553
    # Shortcuts
554
    self.cfg = lu.cfg
555
    self.rpc = lu.rpc
556

    
557
  def CheckPrereq(self):
558
    """Check prerequisites for this tasklets.
559

560
    This method should check whether the prerequisites for the execution of
561
    this tasklet are fulfilled. It can do internode communication, but it
562
    should be idempotent - no cluster or system changes are allowed.
563

564
    The method should raise errors.OpPrereqError in case something is not
565
    fulfilled. Its return value is ignored.
566

567
    This method should also update all parameters to their canonical form if it
568
    hasn't been done before.
569

570
    """
571
    pass
572

    
573
  def Exec(self, feedback_fn):
574
    """Execute the tasklet.
575

576
    This method should implement the actual work. It should raise
577
    errors.OpExecError for failures that are somewhat dealt with in code, or
578
    expected.
579

580
    """
581
    raise NotImplementedError
582

    
583

    
584
def _GetWantedNodes(lu, nodes):
585
  """Returns list of checked and expanded node names.
586

587
  @type lu: L{LogicalUnit}
588
  @param lu: the logical unit on whose behalf we execute
589
  @type nodes: list
590
  @param nodes: list of node names or None for all nodes
591
  @rtype: list
592
  @return: the list of nodes, sorted
593
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
594

595
  """
596
  if not nodes:
597
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
598
      " non-empty list of nodes whose name is to be expanded.")
599

    
600
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
601
  return utils.NiceSort(wanted)
602

    
603

    
604
def _GetWantedInstances(lu, instances):
605
  """Returns list of checked and expanded instance names.
606

607
  @type lu: L{LogicalUnit}
608
  @param lu: the logical unit on whose behalf we execute
609
  @type instances: list
610
  @param instances: list of instance names or None for all instances
611
  @rtype: list
612
  @return: the list of instances, sorted
613
  @raise errors.OpPrereqError: if the instances parameter is wrong type
614
  @raise errors.OpPrereqError: if any of the passed instances is not found
615

616
  """
617
  if instances:
618
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
619
  else:
620
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
621
  return wanted
622

    
623

    
624
def _GetUpdatedParams(old_params, update_dict,
625
                      use_default=True, use_none=False):
626
  """Return the new version of a parameter dictionary.
627

628
  @type old_params: dict
629
  @param old_params: old parameters
630
  @type update_dict: dict
631
  @param update_dict: dict containing new parameter values, or
632
      constants.VALUE_DEFAULT to reset the parameter to its default
633
      value
634
  @param use_default: boolean
635
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
636
      values as 'to be deleted' values
637
  @param use_none: boolean
638
  @type use_none: whether to recognise C{None} values as 'to be
639
      deleted' values
640
  @rtype: dict
641
  @return: the new parameter dictionary
642

643
  """
644
  params_copy = copy.deepcopy(old_params)
645
  for key, val in update_dict.iteritems():
646
    if ((use_default and val == constants.VALUE_DEFAULT) or
647
        (use_none and val is None)):
648
      try:
649
        del params_copy[key]
650
      except KeyError:
651
        pass
652
    else:
653
      params_copy[key] = val
654
  return params_copy
655

    
656

    
657
def _CheckOutputFields(static, dynamic, selected):
658
  """Checks whether all selected fields are valid.
659

660
  @type static: L{utils.FieldSet}
661
  @param static: static fields set
662
  @type dynamic: L{utils.FieldSet}
663
  @param dynamic: dynamic fields set
664

665
  """
666
  f = utils.FieldSet()
667
  f.Extend(static)
668
  f.Extend(dynamic)
669

    
670
  delta = f.NonMatching(selected)
671
  if delta:
672
    raise errors.OpPrereqError("Unknown output fields selected: %s"
673
                               % ",".join(delta), errors.ECODE_INVAL)
674

    
675

    
676
def _CheckBooleanOpField(op, name):
677
  """Validates boolean opcode parameters.
678

679
  This will ensure that an opcode parameter is either a boolean value,
680
  or None (but that it always exists).
681

682
  """
683
  val = getattr(op, name, None)
684
  if not (val is None or isinstance(val, bool)):
685
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
686
                               (name, str(val)), errors.ECODE_INVAL)
687
  setattr(op, name, val)
688

    
689

    
690
def _CheckGlobalHvParams(params):
691
  """Validates that given hypervisor params are not global ones.
692

693
  This will ensure that instances don't get customised versions of
694
  global params.
695

696
  """
697
  used_globals = constants.HVC_GLOBALS.intersection(params)
698
  if used_globals:
699
    msg = ("The following hypervisor parameters are global and cannot"
700
           " be customized at instance level, please modify them at"
701
           " cluster level: %s" % utils.CommaJoin(used_globals))
702
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
703

    
704

    
705
def _CheckNodeOnline(lu, node):
706
  """Ensure that a given node is online.
707

708
  @param lu: the LU on behalf of which we make the check
709
  @param node: the node to check
710
  @raise errors.OpPrereqError: if the node is offline
711

712
  """
713
  if lu.cfg.GetNodeInfo(node).offline:
714
    raise errors.OpPrereqError("Can't use offline node %s" % node,
715
                               errors.ECODE_INVAL)
716

    
717

    
718
def _CheckNodeNotDrained(lu, node):
719
  """Ensure that a given node is not drained.
720

721
  @param lu: the LU on behalf of which we make the check
722
  @param node: the node to check
723
  @raise errors.OpPrereqError: if the node is drained
724

725
  """
726
  if lu.cfg.GetNodeInfo(node).drained:
727
    raise errors.OpPrereqError("Can't use drained node %s" % node,
728
                               errors.ECODE_INVAL)
729

    
730

    
731
def _CheckNodeHasOS(lu, node, os_name, force_variant):
732
  """Ensure that a node supports a given OS.
733

734
  @param lu: the LU on behalf of which we make the check
735
  @param node: the node to check
736
  @param os_name: the OS to query about
737
  @param force_variant: whether to ignore variant errors
738
  @raise errors.OpPrereqError: if the node is not supporting the OS
739

740
  """
741
  result = lu.rpc.call_os_get(node, os_name)
742
  result.Raise("OS '%s' not in supported OS list for node %s" %
743
               (os_name, node),
744
               prereq=True, ecode=errors.ECODE_INVAL)
745
  if not force_variant:
746
    _CheckOSVariant(result.payload, os_name)
747

    
748

    
749
def _RequireFileStorage():
750
  """Checks that file storage is enabled.
751

752
  @raise errors.OpPrereqError: when file storage is disabled
753

754
  """
755
  if not constants.ENABLE_FILE_STORAGE:
756
    raise errors.OpPrereqError("File storage disabled at configure time",
757
                               errors.ECODE_INVAL)
758

    
759

    
760
def _CheckDiskTemplate(template):
761
  """Ensure a given disk template is valid.
762

763
  """
764
  if template not in constants.DISK_TEMPLATES:
765
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
766
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
767
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
768
  if template == constants.DT_FILE:
769
    _RequireFileStorage()
770

    
771

    
772
def _CheckStorageType(storage_type):
773
  """Ensure a given storage type is valid.
774

775
  """
776
  if storage_type not in constants.VALID_STORAGE_TYPES:
777
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
778
                               errors.ECODE_INVAL)
779
  if storage_type == constants.ST_FILE:
780
    _RequireFileStorage()
781
  return True
782

    
783

    
784
def _GetClusterDomainSecret():
785
  """Reads the cluster domain secret.
786

787
  """
788
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
789
                               strict=True)
790

    
791

    
792
def _CheckInstanceDown(lu, instance, reason):
793
  """Ensure that an instance is not running."""
794
  if instance.admin_up:
795
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
796
                               (instance.name, reason), errors.ECODE_STATE)
797

    
798
  pnode = instance.primary_node
799
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
800
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
801
              prereq=True, ecode=errors.ECODE_ENVIRON)
802

    
803
  if instance.name in ins_l.payload:
804
    raise errors.OpPrereqError("Instance %s is running, %s" %
805
                               (instance.name, reason), errors.ECODE_STATE)
806

    
807

    
808
def _ExpandItemName(fn, name, kind):
809
  """Expand an item name.
810

811
  @param fn: the function to use for expansion
812
  @param name: requested item name
813
  @param kind: text description ('Node' or 'Instance')
814
  @return: the resolved (full) name
815
  @raise errors.OpPrereqError: if the item is not found
816

817
  """
818
  full_name = fn(name)
819
  if full_name is None:
820
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
821
                               errors.ECODE_NOENT)
822
  return full_name
823

    
824

    
825
def _ExpandNodeName(cfg, name):
826
  """Wrapper over L{_ExpandItemName} for nodes."""
827
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
828

    
829

    
830
def _ExpandInstanceName(cfg, name):
831
  """Wrapper over L{_ExpandItemName} for instance."""
832
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
833

    
834

    
835
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
836
                          memory, vcpus, nics, disk_template, disks,
837
                          bep, hvp, hypervisor_name):
838
  """Builds instance related env variables for hooks
839

840
  This builds the hook environment from individual variables.
841

842
  @type name: string
843
  @param name: the name of the instance
844
  @type primary_node: string
845
  @param primary_node: the name of the instance's primary node
846
  @type secondary_nodes: list
847
  @param secondary_nodes: list of secondary nodes as strings
848
  @type os_type: string
849
  @param os_type: the name of the instance's OS
850
  @type status: boolean
851
  @param status: the should_run status of the instance
852
  @type memory: string
853
  @param memory: the memory size of the instance
854
  @type vcpus: string
855
  @param vcpus: the count of VCPUs the instance has
856
  @type nics: list
857
  @param nics: list of tuples (ip, mac, mode, link) representing
858
      the NICs the instance has
859
  @type disk_template: string
860
  @param disk_template: the disk template of the instance
861
  @type disks: list
862
  @param disks: the list of (size, mode) pairs
863
  @type bep: dict
864
  @param bep: the backend parameters for the instance
865
  @type hvp: dict
866
  @param hvp: the hypervisor parameters for the instance
867
  @type hypervisor_name: string
868
  @param hypervisor_name: the hypervisor for the instance
869
  @rtype: dict
870
  @return: the hook environment for this instance
871

872
  """
873
  if status:
874
    str_status = "up"
875
  else:
876
    str_status = "down"
877
  env = {
878
    "OP_TARGET": name,
879
    "INSTANCE_NAME": name,
880
    "INSTANCE_PRIMARY": primary_node,
881
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
882
    "INSTANCE_OS_TYPE": os_type,
883
    "INSTANCE_STATUS": str_status,
884
    "INSTANCE_MEMORY": memory,
885
    "INSTANCE_VCPUS": vcpus,
886
    "INSTANCE_DISK_TEMPLATE": disk_template,
887
    "INSTANCE_HYPERVISOR": hypervisor_name,
888
  }
889

    
890
  if nics:
891
    nic_count = len(nics)
892
    for idx, (ip, mac, mode, link) in enumerate(nics):
893
      if ip is None:
894
        ip = ""
895
      env["INSTANCE_NIC%d_IP" % idx] = ip
896
      env["INSTANCE_NIC%d_MAC" % idx] = mac
897
      env["INSTANCE_NIC%d_MODE" % idx] = mode
898
      env["INSTANCE_NIC%d_LINK" % idx] = link
899
      if mode == constants.NIC_MODE_BRIDGED:
900
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
901
  else:
902
    nic_count = 0
903

    
904
  env["INSTANCE_NIC_COUNT"] = nic_count
905

    
906
  if disks:
907
    disk_count = len(disks)
908
    for idx, (size, mode) in enumerate(disks):
909
      env["INSTANCE_DISK%d_SIZE" % idx] = size
910
      env["INSTANCE_DISK%d_MODE" % idx] = mode
911
  else:
912
    disk_count = 0
913

    
914
  env["INSTANCE_DISK_COUNT"] = disk_count
915

    
916
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
917
    for key, value in source.items():
918
      env["INSTANCE_%s_%s" % (kind, key)] = value
919

    
920
  return env
921

    
922

    
923
def _NICListToTuple(lu, nics):
924
  """Build a list of nic information tuples.
925

926
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
927
  value in LUQueryInstanceData.
928

929
  @type lu:  L{LogicalUnit}
930
  @param lu: the logical unit on whose behalf we execute
931
  @type nics: list of L{objects.NIC}
932
  @param nics: list of nics to convert to hooks tuples
933

934
  """
935
  hooks_nics = []
936
  cluster = lu.cfg.GetClusterInfo()
937
  for nic in nics:
938
    ip = nic.ip
939
    mac = nic.mac
940
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
941
    mode = filled_params[constants.NIC_MODE]
942
    link = filled_params[constants.NIC_LINK]
943
    hooks_nics.append((ip, mac, mode, link))
944
  return hooks_nics
945

    
946

    
947
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
948
  """Builds instance related env variables for hooks from an object.
949

950
  @type lu: L{LogicalUnit}
951
  @param lu: the logical unit on whose behalf we execute
952
  @type instance: L{objects.Instance}
953
  @param instance: the instance for which we should build the
954
      environment
955
  @type override: dict
956
  @param override: dictionary with key/values that will override
957
      our values
958
  @rtype: dict
959
  @return: the hook environment dictionary
960

961
  """
962
  cluster = lu.cfg.GetClusterInfo()
963
  bep = cluster.FillBE(instance)
964
  hvp = cluster.FillHV(instance)
965
  args = {
966
    'name': instance.name,
967
    'primary_node': instance.primary_node,
968
    'secondary_nodes': instance.secondary_nodes,
969
    'os_type': instance.os,
970
    'status': instance.admin_up,
971
    'memory': bep[constants.BE_MEMORY],
972
    'vcpus': bep[constants.BE_VCPUS],
973
    'nics': _NICListToTuple(lu, instance.nics),
974
    'disk_template': instance.disk_template,
975
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
976
    'bep': bep,
977
    'hvp': hvp,
978
    'hypervisor_name': instance.hypervisor,
979
  }
980
  if override:
981
    args.update(override)
982
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
983

    
984

    
985
def _AdjustCandidatePool(lu, exceptions):
986
  """Adjust the candidate pool after node operations.
987

988
  """
989
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
990
  if mod_list:
991
    lu.LogInfo("Promoted nodes to master candidate role: %s",
992
               utils.CommaJoin(node.name for node in mod_list))
993
    for name in mod_list:
994
      lu.context.ReaddNode(name)
995
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
996
  if mc_now > mc_max:
997
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
998
               (mc_now, mc_max))
999

    
1000

    
1001
def _DecideSelfPromotion(lu, exceptions=None):
1002
  """Decide whether I should promote myself as a master candidate.
1003

1004
  """
1005
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1006
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1007
  # the new node will increase mc_max with one, so:
1008
  mc_should = min(mc_should + 1, cp_size)
1009
  return mc_now < mc_should
1010

    
1011

    
1012
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1013
  """Check that the brigdes needed by a list of nics exist.
1014

1015
  """
1016
  cluster = lu.cfg.GetClusterInfo()
1017
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1018
  brlist = [params[constants.NIC_LINK] for params in paramslist
1019
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1020
  if brlist:
1021
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1022
    result.Raise("Error checking bridges on destination node '%s'" %
1023
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1024

    
1025

    
1026
def _CheckInstanceBridgesExist(lu, instance, node=None):
1027
  """Check that the brigdes needed by an instance exist.
1028

1029
  """
1030
  if node is None:
1031
    node = instance.primary_node
1032
  _CheckNicsBridgesExist(lu, instance.nics, node)
1033

    
1034

    
1035
def _CheckOSVariant(os_obj, name):
1036
  """Check whether an OS name conforms to the os variants specification.
1037

1038
  @type os_obj: L{objects.OS}
1039
  @param os_obj: OS object to check
1040
  @type name: string
1041
  @param name: OS name passed by the user, to check for validity
1042

1043
  """
1044
  if not os_obj.supported_variants:
1045
    return
1046
  try:
1047
    variant = name.split("+", 1)[1]
1048
  except IndexError:
1049
    raise errors.OpPrereqError("OS name must include a variant",
1050
                               errors.ECODE_INVAL)
1051

    
1052
  if variant not in os_obj.supported_variants:
1053
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1054

    
1055

    
1056
def _GetNodeInstancesInner(cfg, fn):
1057
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1058

    
1059

    
1060
def _GetNodeInstances(cfg, node_name):
1061
  """Returns a list of all primary and secondary instances on a node.
1062

1063
  """
1064

    
1065
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1066

    
1067

    
1068
def _GetNodePrimaryInstances(cfg, node_name):
1069
  """Returns primary instances on a node.
1070

1071
  """
1072
  return _GetNodeInstancesInner(cfg,
1073
                                lambda inst: node_name == inst.primary_node)
1074

    
1075

    
1076
def _GetNodeSecondaryInstances(cfg, node_name):
1077
  """Returns secondary instances on a node.
1078

1079
  """
1080
  return _GetNodeInstancesInner(cfg,
1081
                                lambda inst: node_name in inst.secondary_nodes)
1082

    
1083

    
1084
def _GetStorageTypeArgs(cfg, storage_type):
1085
  """Returns the arguments for a storage type.
1086

1087
  """
1088
  # Special case for file storage
1089
  if storage_type == constants.ST_FILE:
1090
    # storage.FileStorage wants a list of storage directories
1091
    return [[cfg.GetFileStorageDir()]]
1092

    
1093
  return []
1094

    
1095

    
1096
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1097
  faulty = []
1098

    
1099
  for dev in instance.disks:
1100
    cfg.SetDiskID(dev, node_name)
1101

    
1102
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1103
  result.Raise("Failed to get disk status from node %s" % node_name,
1104
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1105

    
1106
  for idx, bdev_status in enumerate(result.payload):
1107
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1108
      faulty.append(idx)
1109

    
1110
  return faulty
1111

    
1112

    
1113
class LUPostInitCluster(LogicalUnit):
1114
  """Logical unit for running hooks after cluster initialization.
1115

1116
  """
1117
  HPATH = "cluster-init"
1118
  HTYPE = constants.HTYPE_CLUSTER
1119
  _OP_REQP = []
1120

    
1121
  def BuildHooksEnv(self):
1122
    """Build hooks env.
1123

1124
    """
1125
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1126
    mn = self.cfg.GetMasterNode()
1127
    return env, [], [mn]
1128

    
1129
  def Exec(self, feedback_fn):
1130
    """Nothing to do.
1131

1132
    """
1133
    return True
1134

    
1135

    
1136
class LUDestroyCluster(LogicalUnit):
1137
  """Logical unit for destroying the cluster.
1138

1139
  """
1140
  HPATH = "cluster-destroy"
1141
  HTYPE = constants.HTYPE_CLUSTER
1142
  _OP_REQP = []
1143

    
1144
  def BuildHooksEnv(self):
1145
    """Build hooks env.
1146

1147
    """
1148
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1149
    return env, [], []
1150

    
1151
  def CheckPrereq(self):
1152
    """Check prerequisites.
1153

1154
    This checks whether the cluster is empty.
1155

1156
    Any errors are signaled by raising errors.OpPrereqError.
1157

1158
    """
1159
    master = self.cfg.GetMasterNode()
1160

    
1161
    nodelist = self.cfg.GetNodeList()
1162
    if len(nodelist) != 1 or nodelist[0] != master:
1163
      raise errors.OpPrereqError("There are still %d node(s) in"
1164
                                 " this cluster." % (len(nodelist) - 1),
1165
                                 errors.ECODE_INVAL)
1166
    instancelist = self.cfg.GetInstanceList()
1167
    if instancelist:
1168
      raise errors.OpPrereqError("There are still %d instance(s) in"
1169
                                 " this cluster." % len(instancelist),
1170
                                 errors.ECODE_INVAL)
1171

    
1172
  def Exec(self, feedback_fn):
1173
    """Destroys the cluster.
1174

1175
    """
1176
    master = self.cfg.GetMasterNode()
1177
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1178

    
1179
    # Run post hooks on master node before it's removed
1180
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1181
    try:
1182
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1183
    except:
1184
      # pylint: disable-msg=W0702
1185
      self.LogWarning("Errors occurred running hooks on %s" % master)
1186

    
1187
    result = self.rpc.call_node_stop_master(master, False)
1188
    result.Raise("Could not disable the master role")
1189

    
1190
    if modify_ssh_setup:
1191
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1192
      utils.CreateBackup(priv_key)
1193
      utils.CreateBackup(pub_key)
1194

    
1195
    return master
1196

    
1197

    
1198
def _VerifyCertificate(filename):
1199
  """Verifies a certificate for LUVerifyCluster.
1200

1201
  @type filename: string
1202
  @param filename: Path to PEM file
1203

1204
  """
1205
  try:
1206
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1207
                                           utils.ReadFile(filename))
1208
  except Exception, err: # pylint: disable-msg=W0703
1209
    return (LUVerifyCluster.ETYPE_ERROR,
1210
            "Failed to load X509 certificate %s: %s" % (filename, err))
1211

    
1212
  (errcode, msg) = \
1213
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1214
                                constants.SSL_CERT_EXPIRATION_ERROR)
1215

    
1216
  if msg:
1217
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1218
  else:
1219
    fnamemsg = None
1220

    
1221
  if errcode is None:
1222
    return (None, fnamemsg)
1223
  elif errcode == utils.CERT_WARNING:
1224
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1225
  elif errcode == utils.CERT_ERROR:
1226
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1227

    
1228
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1229

    
1230

    
1231
class LUVerifyCluster(LogicalUnit):
1232
  """Verifies the cluster status.
1233

1234
  """
1235
  HPATH = "cluster-verify"
1236
  HTYPE = constants.HTYPE_CLUSTER
1237
  _OP_REQP = [
1238
    ("skip_checks", _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1239
    ("verbose", _TBool),
1240
    ("error_codes", _TBool),
1241
    ("debug_simulate_errors", _TBool),
1242
    ]
1243
  REQ_BGL = False
1244

    
1245
  TCLUSTER = "cluster"
1246
  TNODE = "node"
1247
  TINSTANCE = "instance"
1248

    
1249
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1250
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1251
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1252
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1253
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1254
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1255
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1256
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1257
  ENODEDRBD = (TNODE, "ENODEDRBD")
1258
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1259
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1260
  ENODEHV = (TNODE, "ENODEHV")
1261
  ENODELVM = (TNODE, "ENODELVM")
1262
  ENODEN1 = (TNODE, "ENODEN1")
1263
  ENODENET = (TNODE, "ENODENET")
1264
  ENODEOS = (TNODE, "ENODEOS")
1265
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1266
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1267
  ENODERPC = (TNODE, "ENODERPC")
1268
  ENODESSH = (TNODE, "ENODESSH")
1269
  ENODEVERSION = (TNODE, "ENODEVERSION")
1270
  ENODESETUP = (TNODE, "ENODESETUP")
1271
  ENODETIME = (TNODE, "ENODETIME")
1272

    
1273
  ETYPE_FIELD = "code"
1274
  ETYPE_ERROR = "ERROR"
1275
  ETYPE_WARNING = "WARNING"
1276

    
1277
  class NodeImage(object):
1278
    """A class representing the logical and physical status of a node.
1279

1280
    @type name: string
1281
    @ivar name: the node name to which this object refers
1282
    @ivar volumes: a structure as returned from
1283
        L{ganeti.backend.GetVolumeList} (runtime)
1284
    @ivar instances: a list of running instances (runtime)
1285
    @ivar pinst: list of configured primary instances (config)
1286
    @ivar sinst: list of configured secondary instances (config)
1287
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1288
        of this node (config)
1289
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1290
    @ivar dfree: free disk, as reported by the node (runtime)
1291
    @ivar offline: the offline status (config)
1292
    @type rpc_fail: boolean
1293
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1294
        not whether the individual keys were correct) (runtime)
1295
    @type lvm_fail: boolean
1296
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1297
    @type hyp_fail: boolean
1298
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1299
    @type ghost: boolean
1300
    @ivar ghost: whether this is a known node or not (config)
1301
    @type os_fail: boolean
1302
    @ivar os_fail: whether the RPC call didn't return valid OS data
1303
    @type oslist: list
1304
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1305

1306
    """
1307
    def __init__(self, offline=False, name=None):
1308
      self.name = name
1309
      self.volumes = {}
1310
      self.instances = []
1311
      self.pinst = []
1312
      self.sinst = []
1313
      self.sbp = {}
1314
      self.mfree = 0
1315
      self.dfree = 0
1316
      self.offline = offline
1317
      self.rpc_fail = False
1318
      self.lvm_fail = False
1319
      self.hyp_fail = False
1320
      self.ghost = False
1321
      self.os_fail = False
1322
      self.oslist = {}
1323

    
1324
  def ExpandNames(self):
1325
    self.needed_locks = {
1326
      locking.LEVEL_NODE: locking.ALL_SET,
1327
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1328
    }
1329
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1330

    
1331
  def _Error(self, ecode, item, msg, *args, **kwargs):
1332
    """Format an error message.
1333

1334
    Based on the opcode's error_codes parameter, either format a
1335
    parseable error code, or a simpler error string.
1336

1337
    This must be called only from Exec and functions called from Exec.
1338

1339
    """
1340
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1341
    itype, etxt = ecode
1342
    # first complete the msg
1343
    if args:
1344
      msg = msg % args
1345
    # then format the whole message
1346
    if self.op.error_codes:
1347
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1348
    else:
1349
      if item:
1350
        item = " " + item
1351
      else:
1352
        item = ""
1353
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1354
    # and finally report it via the feedback_fn
1355
    self._feedback_fn("  - %s" % msg)
1356

    
1357
  def _ErrorIf(self, cond, *args, **kwargs):
1358
    """Log an error message if the passed condition is True.
1359

1360
    """
1361
    cond = bool(cond) or self.op.debug_simulate_errors
1362
    if cond:
1363
      self._Error(*args, **kwargs)
1364
    # do not mark the operation as failed for WARN cases only
1365
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1366
      self.bad = self.bad or cond
1367

    
1368
  def _VerifyNode(self, ninfo, nresult):
1369
    """Run multiple tests against a node.
1370

1371
    Test list:
1372

1373
      - compares ganeti version
1374
      - checks vg existence and size > 20G
1375
      - checks config file checksum
1376
      - checks ssh to other nodes
1377

1378
    @type ninfo: L{objects.Node}
1379
    @param ninfo: the node to check
1380
    @param nresult: the results from the node
1381
    @rtype: boolean
1382
    @return: whether overall this call was successful (and we can expect
1383
         reasonable values in the respose)
1384

1385
    """
1386
    node = ninfo.name
1387
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1388

    
1389
    # main result, nresult should be a non-empty dict
1390
    test = not nresult or not isinstance(nresult, dict)
1391
    _ErrorIf(test, self.ENODERPC, node,
1392
                  "unable to verify node: no data returned")
1393
    if test:
1394
      return False
1395

    
1396
    # compares ganeti version
1397
    local_version = constants.PROTOCOL_VERSION
1398
    remote_version = nresult.get("version", None)
1399
    test = not (remote_version and
1400
                isinstance(remote_version, (list, tuple)) and
1401
                len(remote_version) == 2)
1402
    _ErrorIf(test, self.ENODERPC, node,
1403
             "connection to node returned invalid data")
1404
    if test:
1405
      return False
1406

    
1407
    test = local_version != remote_version[0]
1408
    _ErrorIf(test, self.ENODEVERSION, node,
1409
             "incompatible protocol versions: master %s,"
1410
             " node %s", local_version, remote_version[0])
1411
    if test:
1412
      return False
1413

    
1414
    # node seems compatible, we can actually try to look into its results
1415

    
1416
    # full package version
1417
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1418
                  self.ENODEVERSION, node,
1419
                  "software version mismatch: master %s, node %s",
1420
                  constants.RELEASE_VERSION, remote_version[1],
1421
                  code=self.ETYPE_WARNING)
1422

    
1423
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1424
    if isinstance(hyp_result, dict):
1425
      for hv_name, hv_result in hyp_result.iteritems():
1426
        test = hv_result is not None
1427
        _ErrorIf(test, self.ENODEHV, node,
1428
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1429

    
1430

    
1431
    test = nresult.get(constants.NV_NODESETUP,
1432
                           ["Missing NODESETUP results"])
1433
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1434
             "; ".join(test))
1435

    
1436
    return True
1437

    
1438
  def _VerifyNodeTime(self, ninfo, nresult,
1439
                      nvinfo_starttime, nvinfo_endtime):
1440
    """Check the node time.
1441

1442
    @type ninfo: L{objects.Node}
1443
    @param ninfo: the node to check
1444
    @param nresult: the remote results for the node
1445
    @param nvinfo_starttime: the start time of the RPC call
1446
    @param nvinfo_endtime: the end time of the RPC call
1447

1448
    """
1449
    node = ninfo.name
1450
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1451

    
1452
    ntime = nresult.get(constants.NV_TIME, None)
1453
    try:
1454
      ntime_merged = utils.MergeTime(ntime)
1455
    except (ValueError, TypeError):
1456
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1457
      return
1458

    
1459
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1460
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1461
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1462
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1463
    else:
1464
      ntime_diff = None
1465

    
1466
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1467
             "Node time diverges by at least %s from master node time",
1468
             ntime_diff)
1469

    
1470
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1471
    """Check the node time.
1472

1473
    @type ninfo: L{objects.Node}
1474
    @param ninfo: the node to check
1475
    @param nresult: the remote results for the node
1476
    @param vg_name: the configured VG name
1477

1478
    """
1479
    if vg_name is None:
1480
      return
1481

    
1482
    node = ninfo.name
1483
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1484

    
1485
    # checks vg existence and size > 20G
1486
    vglist = nresult.get(constants.NV_VGLIST, None)
1487
    test = not vglist
1488
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1489
    if not test:
1490
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1491
                                            constants.MIN_VG_SIZE)
1492
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1493

    
1494
    # check pv names
1495
    pvlist = nresult.get(constants.NV_PVLIST, None)
1496
    test = pvlist is None
1497
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1498
    if not test:
1499
      # check that ':' is not present in PV names, since it's a
1500
      # special character for lvcreate (denotes the range of PEs to
1501
      # use on the PV)
1502
      for _, pvname, owner_vg in pvlist:
1503
        test = ":" in pvname
1504
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1505
                 " '%s' of VG '%s'", pvname, owner_vg)
1506

    
1507
  def _VerifyNodeNetwork(self, ninfo, nresult):
1508
    """Check the node time.
1509

1510
    @type ninfo: L{objects.Node}
1511
    @param ninfo: the node to check
1512
    @param nresult: the remote results for the node
1513

1514
    """
1515
    node = ninfo.name
1516
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1517

    
1518
    test = constants.NV_NODELIST not in nresult
1519
    _ErrorIf(test, self.ENODESSH, node,
1520
             "node hasn't returned node ssh connectivity data")
1521
    if not test:
1522
      if nresult[constants.NV_NODELIST]:
1523
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1524
          _ErrorIf(True, self.ENODESSH, node,
1525
                   "ssh communication with node '%s': %s", a_node, a_msg)
1526

    
1527
    test = constants.NV_NODENETTEST not in nresult
1528
    _ErrorIf(test, self.ENODENET, node,
1529
             "node hasn't returned node tcp connectivity data")
1530
    if not test:
1531
      if nresult[constants.NV_NODENETTEST]:
1532
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1533
        for anode in nlist:
1534
          _ErrorIf(True, self.ENODENET, node,
1535
                   "tcp communication with node '%s': %s",
1536
                   anode, nresult[constants.NV_NODENETTEST][anode])
1537

    
1538
    test = constants.NV_MASTERIP not in nresult
1539
    _ErrorIf(test, self.ENODENET, node,
1540
             "node hasn't returned node master IP reachability data")
1541
    if not test:
1542
      if not nresult[constants.NV_MASTERIP]:
1543
        if node == self.master_node:
1544
          msg = "the master node cannot reach the master IP (not configured?)"
1545
        else:
1546
          msg = "cannot reach the master IP"
1547
        _ErrorIf(True, self.ENODENET, node, msg)
1548

    
1549

    
1550
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1551
    """Verify an instance.
1552

1553
    This function checks to see if the required block devices are
1554
    available on the instance's node.
1555

1556
    """
1557
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1558
    node_current = instanceconfig.primary_node
1559

    
1560
    node_vol_should = {}
1561
    instanceconfig.MapLVsByNode(node_vol_should)
1562

    
1563
    for node in node_vol_should:
1564
      n_img = node_image[node]
1565
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1566
        # ignore missing volumes on offline or broken nodes
1567
        continue
1568
      for volume in node_vol_should[node]:
1569
        test = volume not in n_img.volumes
1570
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1571
                 "volume %s missing on node %s", volume, node)
1572

    
1573
    if instanceconfig.admin_up:
1574
      pri_img = node_image[node_current]
1575
      test = instance not in pri_img.instances and not pri_img.offline
1576
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1577
               "instance not running on its primary node %s",
1578
               node_current)
1579

    
1580
    for node, n_img in node_image.items():
1581
      if (not node == node_current):
1582
        test = instance in n_img.instances
1583
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1584
                 "instance should not run on node %s", node)
1585

    
1586
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1587
    """Verify if there are any unknown volumes in the cluster.
1588

1589
    The .os, .swap and backup volumes are ignored. All other volumes are
1590
    reported as unknown.
1591

1592
    """
1593
    for node, n_img in node_image.items():
1594
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1595
        # skip non-healthy nodes
1596
        continue
1597
      for volume in n_img.volumes:
1598
        test = (node not in node_vol_should or
1599
                volume not in node_vol_should[node])
1600
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1601
                      "volume %s is unknown", volume)
1602

    
1603
  def _VerifyOrphanInstances(self, instancelist, node_image):
1604
    """Verify the list of running instances.
1605

1606
    This checks what instances are running but unknown to the cluster.
1607

1608
    """
1609
    for node, n_img in node_image.items():
1610
      for o_inst in n_img.instances:
1611
        test = o_inst not in instancelist
1612
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1613
                      "instance %s on node %s should not exist", o_inst, node)
1614

    
1615
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1616
    """Verify N+1 Memory Resilience.
1617

1618
    Check that if one single node dies we can still start all the
1619
    instances it was primary for.
1620

1621
    """
1622
    for node, n_img in node_image.items():
1623
      # This code checks that every node which is now listed as
1624
      # secondary has enough memory to host all instances it is
1625
      # supposed to should a single other node in the cluster fail.
1626
      # FIXME: not ready for failover to an arbitrary node
1627
      # FIXME: does not support file-backed instances
1628
      # WARNING: we currently take into account down instances as well
1629
      # as up ones, considering that even if they're down someone
1630
      # might want to start them even in the event of a node failure.
1631
      for prinode, instances in n_img.sbp.items():
1632
        needed_mem = 0
1633
        for instance in instances:
1634
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1635
          if bep[constants.BE_AUTO_BALANCE]:
1636
            needed_mem += bep[constants.BE_MEMORY]
1637
        test = n_img.mfree < needed_mem
1638
        self._ErrorIf(test, self.ENODEN1, node,
1639
                      "not enough memory on to accommodate"
1640
                      " failovers should peer node %s fail", prinode)
1641

    
1642
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1643
                       master_files):
1644
    """Verifies and computes the node required file checksums.
1645

1646
    @type ninfo: L{objects.Node}
1647
    @param ninfo: the node to check
1648
    @param nresult: the remote results for the node
1649
    @param file_list: required list of files
1650
    @param local_cksum: dictionary of local files and their checksums
1651
    @param master_files: list of files that only masters should have
1652

1653
    """
1654
    node = ninfo.name
1655
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1656

    
1657
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1658
    test = not isinstance(remote_cksum, dict)
1659
    _ErrorIf(test, self.ENODEFILECHECK, node,
1660
             "node hasn't returned file checksum data")
1661
    if test:
1662
      return
1663

    
1664
    for file_name in file_list:
1665
      node_is_mc = ninfo.master_candidate
1666
      must_have = (file_name not in master_files) or node_is_mc
1667
      # missing
1668
      test1 = file_name not in remote_cksum
1669
      # invalid checksum
1670
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1671
      # existing and good
1672
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1673
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1674
               "file '%s' missing", file_name)
1675
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1676
               "file '%s' has wrong checksum", file_name)
1677
      # not candidate and this is not a must-have file
1678
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1679
               "file '%s' should not exist on non master"
1680
               " candidates (and the file is outdated)", file_name)
1681
      # all good, except non-master/non-must have combination
1682
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1683
               "file '%s' should not exist"
1684
               " on non master candidates", file_name)
1685

    
1686
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1687
    """Verifies and the node DRBD status.
1688

1689
    @type ninfo: L{objects.Node}
1690
    @param ninfo: the node to check
1691
    @param nresult: the remote results for the node
1692
    @param instanceinfo: the dict of instances
1693
    @param drbd_map: the DRBD map as returned by
1694
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1695

1696
    """
1697
    node = ninfo.name
1698
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1699

    
1700
    # compute the DRBD minors
1701
    node_drbd = {}
1702
    for minor, instance in drbd_map[node].items():
1703
      test = instance not in instanceinfo
1704
      _ErrorIf(test, self.ECLUSTERCFG, None,
1705
               "ghost instance '%s' in temporary DRBD map", instance)
1706
        # ghost instance should not be running, but otherwise we
1707
        # don't give double warnings (both ghost instance and
1708
        # unallocated minor in use)
1709
      if test:
1710
        node_drbd[minor] = (instance, False)
1711
      else:
1712
        instance = instanceinfo[instance]
1713
        node_drbd[minor] = (instance.name, instance.admin_up)
1714

    
1715
    # and now check them
1716
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1717
    test = not isinstance(used_minors, (tuple, list))
1718
    _ErrorIf(test, self.ENODEDRBD, node,
1719
             "cannot parse drbd status file: %s", str(used_minors))
1720
    if test:
1721
      # we cannot check drbd status
1722
      return
1723

    
1724
    for minor, (iname, must_exist) in node_drbd.items():
1725
      test = minor not in used_minors and must_exist
1726
      _ErrorIf(test, self.ENODEDRBD, node,
1727
               "drbd minor %d of instance %s is not active", minor, iname)
1728
    for minor in used_minors:
1729
      test = minor not in node_drbd
1730
      _ErrorIf(test, self.ENODEDRBD, node,
1731
               "unallocated drbd minor %d is in use", minor)
1732

    
1733
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1734
    """Builds the node OS structures.
1735

1736
    @type ninfo: L{objects.Node}
1737
    @param ninfo: the node to check
1738
    @param nresult: the remote results for the node
1739
    @param nimg: the node image object
1740

1741
    """
1742
    node = ninfo.name
1743
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1744

    
1745
    remote_os = nresult.get(constants.NV_OSLIST, None)
1746
    test = (not isinstance(remote_os, list) or
1747
            not compat.all(remote_os,
1748
                           lambda v: isinstance(v, list) and len(v) == 7))
1749

    
1750
    _ErrorIf(test, self.ENODEOS, node,
1751
             "node hasn't returned valid OS data")
1752

    
1753
    nimg.os_fail = test
1754

    
1755
    if test:
1756
      return
1757

    
1758
    os_dict = {}
1759

    
1760
    for (name, os_path, status, diagnose,
1761
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1762

    
1763
      if name not in os_dict:
1764
        os_dict[name] = []
1765

    
1766
      # parameters is a list of lists instead of list of tuples due to
1767
      # JSON lacking a real tuple type, fix it:
1768
      parameters = [tuple(v) for v in parameters]
1769
      os_dict[name].append((os_path, status, diagnose,
1770
                            set(variants), set(parameters), set(api_ver)))
1771

    
1772
    nimg.oslist = os_dict
1773

    
1774
  def _VerifyNodeOS(self, ninfo, nimg, base):
1775
    """Verifies the node OS list.
1776

1777
    @type ninfo: L{objects.Node}
1778
    @param ninfo: the node to check
1779
    @param nimg: the node image object
1780
    @param base: the 'template' node we match against (e.g. from the master)
1781

1782
    """
1783
    node = ninfo.name
1784
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1785

    
1786
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1787

    
1788
    for os_name, os_data in nimg.oslist.items():
1789
      assert os_data, "Empty OS status for OS %s?!" % os_name
1790
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1791
      _ErrorIf(not f_status, self.ENODEOS, node,
1792
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1793
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1794
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1795
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1796
      # this will catched in backend too
1797
      _ErrorIf(compat.any(f_api, lambda v: v >= constants.OS_API_V15)
1798
               and not f_var, self.ENODEOS, node,
1799
               "OS %s with API at least %d does not declare any variant",
1800
               os_name, constants.OS_API_V15)
1801
      # comparisons with the 'base' image
1802
      test = os_name not in base.oslist
1803
      _ErrorIf(test, self.ENODEOS, node,
1804
               "Extra OS %s not present on reference node (%s)",
1805
               os_name, base.name)
1806
      if test:
1807
        continue
1808
      assert base.oslist[os_name], "Base node has empty OS status?"
1809
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1810
      if not b_status:
1811
        # base OS is invalid, skipping
1812
        continue
1813
      for kind, a, b in [("API version", f_api, b_api),
1814
                         ("variants list", f_var, b_var),
1815
                         ("parameters", f_param, b_param)]:
1816
        _ErrorIf(a != b, self.ENODEOS, node,
1817
                 "OS %s %s differs from reference node %s: %s vs. %s",
1818
                 kind, os_name, base.name,
1819
                 utils.CommaJoin(a), utils.CommaJoin(a))
1820

    
1821
    # check any missing OSes
1822
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1823
    _ErrorIf(missing, self.ENODEOS, node,
1824
             "OSes present on reference node %s but missing on this node: %s",
1825
             base.name, utils.CommaJoin(missing))
1826

    
1827
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1828
    """Verifies and updates the node volume data.
1829

1830
    This function will update a L{NodeImage}'s internal structures
1831
    with data from the remote call.
1832

1833
    @type ninfo: L{objects.Node}
1834
    @param ninfo: the node to check
1835
    @param nresult: the remote results for the node
1836
    @param nimg: the node image object
1837
    @param vg_name: the configured VG name
1838

1839
    """
1840
    node = ninfo.name
1841
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1842

    
1843
    nimg.lvm_fail = True
1844
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1845
    if vg_name is None:
1846
      pass
1847
    elif isinstance(lvdata, basestring):
1848
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1849
               utils.SafeEncode(lvdata))
1850
    elif not isinstance(lvdata, dict):
1851
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1852
    else:
1853
      nimg.volumes = lvdata
1854
      nimg.lvm_fail = False
1855

    
1856
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1857
    """Verifies and updates the node instance list.
1858

1859
    If the listing was successful, then updates this node's instance
1860
    list. Otherwise, it marks the RPC call as failed for the instance
1861
    list key.
1862

1863
    @type ninfo: L{objects.Node}
1864
    @param ninfo: the node to check
1865
    @param nresult: the remote results for the node
1866
    @param nimg: the node image object
1867

1868
    """
1869
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1870
    test = not isinstance(idata, list)
1871
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1872
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1873
    if test:
1874
      nimg.hyp_fail = True
1875
    else:
1876
      nimg.instances = idata
1877

    
1878
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1879
    """Verifies and computes a node information map
1880

1881
    @type ninfo: L{objects.Node}
1882
    @param ninfo: the node to check
1883
    @param nresult: the remote results for the node
1884
    @param nimg: the node image object
1885
    @param vg_name: the configured VG name
1886

1887
    """
1888
    node = ninfo.name
1889
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1890

    
1891
    # try to read free memory (from the hypervisor)
1892
    hv_info = nresult.get(constants.NV_HVINFO, None)
1893
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1894
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1895
    if not test:
1896
      try:
1897
        nimg.mfree = int(hv_info["memory_free"])
1898
      except (ValueError, TypeError):
1899
        _ErrorIf(True, self.ENODERPC, node,
1900
                 "node returned invalid nodeinfo, check hypervisor")
1901

    
1902
    # FIXME: devise a free space model for file based instances as well
1903
    if vg_name is not None:
1904
      test = (constants.NV_VGLIST not in nresult or
1905
              vg_name not in nresult[constants.NV_VGLIST])
1906
      _ErrorIf(test, self.ENODELVM, node,
1907
               "node didn't return data for the volume group '%s'"
1908
               " - it is either missing or broken", vg_name)
1909
      if not test:
1910
        try:
1911
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1912
        except (ValueError, TypeError):
1913
          _ErrorIf(True, self.ENODERPC, node,
1914
                   "node returned invalid LVM info, check LVM status")
1915

    
1916
  def BuildHooksEnv(self):
1917
    """Build hooks env.
1918

1919
    Cluster-Verify hooks just ran in the post phase and their failure makes
1920
    the output be logged in the verify output and the verification to fail.
1921

1922
    """
1923
    all_nodes = self.cfg.GetNodeList()
1924
    env = {
1925
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1926
      }
1927
    for node in self.cfg.GetAllNodesInfo().values():
1928
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1929

    
1930
    return env, [], all_nodes
1931

    
1932
  def Exec(self, feedback_fn):
1933
    """Verify integrity of cluster, performing various test on nodes.
1934

1935
    """
1936
    self.bad = False
1937
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1938
    verbose = self.op.verbose
1939
    self._feedback_fn = feedback_fn
1940
    feedback_fn("* Verifying global settings")
1941
    for msg in self.cfg.VerifyConfig():
1942
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1943

    
1944
    # Check the cluster certificates
1945
    for cert_filename in constants.ALL_CERT_FILES:
1946
      (errcode, msg) = _VerifyCertificate(cert_filename)
1947
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1948

    
1949
    vg_name = self.cfg.GetVGName()
1950
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1951
    cluster = self.cfg.GetClusterInfo()
1952
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1953
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1954
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1955
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1956
                        for iname in instancelist)
1957
    i_non_redundant = [] # Non redundant instances
1958
    i_non_a_balanced = [] # Non auto-balanced instances
1959
    n_offline = 0 # Count of offline nodes
1960
    n_drained = 0 # Count of nodes being drained
1961
    node_vol_should = {}
1962

    
1963
    # FIXME: verify OS list
1964
    # do local checksums
1965
    master_files = [constants.CLUSTER_CONF_FILE]
1966
    master_node = self.master_node = self.cfg.GetMasterNode()
1967
    master_ip = self.cfg.GetMasterIP()
1968

    
1969
    file_names = ssconf.SimpleStore().GetFileList()
1970
    file_names.extend(constants.ALL_CERT_FILES)
1971
    file_names.extend(master_files)
1972
    if cluster.modify_etc_hosts:
1973
      file_names.append(constants.ETC_HOSTS)
1974

    
1975
    local_checksums = utils.FingerprintFiles(file_names)
1976

    
1977
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1978
    node_verify_param = {
1979
      constants.NV_FILELIST: file_names,
1980
      constants.NV_NODELIST: [node.name for node in nodeinfo
1981
                              if not node.offline],
1982
      constants.NV_HYPERVISOR: hypervisors,
1983
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1984
                                  node.secondary_ip) for node in nodeinfo
1985
                                 if not node.offline],
1986
      constants.NV_INSTANCELIST: hypervisors,
1987
      constants.NV_VERSION: None,
1988
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1989
      constants.NV_NODESETUP: None,
1990
      constants.NV_TIME: None,
1991
      constants.NV_MASTERIP: (master_node, master_ip),
1992
      constants.NV_OSLIST: None,
1993
      }
1994

    
1995
    if vg_name is not None:
1996
      node_verify_param[constants.NV_VGLIST] = None
1997
      node_verify_param[constants.NV_LVLIST] = vg_name
1998
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1999
      node_verify_param[constants.NV_DRBDLIST] = None
2000

    
2001
    # Build our expected cluster state
2002
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2003
                                                 name=node.name))
2004
                      for node in nodeinfo)
2005

    
2006
    for instance in instancelist:
2007
      inst_config = instanceinfo[instance]
2008

    
2009
      for nname in inst_config.all_nodes:
2010
        if nname not in node_image:
2011
          # ghost node
2012
          gnode = self.NodeImage(name=nname)
2013
          gnode.ghost = True
2014
          node_image[nname] = gnode
2015

    
2016
      inst_config.MapLVsByNode(node_vol_should)
2017

    
2018
      pnode = inst_config.primary_node
2019
      node_image[pnode].pinst.append(instance)
2020

    
2021
      for snode in inst_config.secondary_nodes:
2022
        nimg = node_image[snode]
2023
        nimg.sinst.append(instance)
2024
        if pnode not in nimg.sbp:
2025
          nimg.sbp[pnode] = []
2026
        nimg.sbp[pnode].append(instance)
2027

    
2028
    # At this point, we have the in-memory data structures complete,
2029
    # except for the runtime information, which we'll gather next
2030

    
2031
    # Due to the way our RPC system works, exact response times cannot be
2032
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2033
    # time before and after executing the request, we can at least have a time
2034
    # window.
2035
    nvinfo_starttime = time.time()
2036
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2037
                                           self.cfg.GetClusterName())
2038
    nvinfo_endtime = time.time()
2039

    
2040
    all_drbd_map = self.cfg.ComputeDRBDMap()
2041

    
2042
    feedback_fn("* Verifying node status")
2043

    
2044
    refos_img = None
2045

    
2046
    for node_i in nodeinfo:
2047
      node = node_i.name
2048
      nimg = node_image[node]
2049

    
2050
      if node_i.offline:
2051
        if verbose:
2052
          feedback_fn("* Skipping offline node %s" % (node,))
2053
        n_offline += 1
2054
        continue
2055

    
2056
      if node == master_node:
2057
        ntype = "master"
2058
      elif node_i.master_candidate:
2059
        ntype = "master candidate"
2060
      elif node_i.drained:
2061
        ntype = "drained"
2062
        n_drained += 1
2063
      else:
2064
        ntype = "regular"
2065
      if verbose:
2066
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2067

    
2068
      msg = all_nvinfo[node].fail_msg
2069
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2070
      if msg:
2071
        nimg.rpc_fail = True
2072
        continue
2073

    
2074
      nresult = all_nvinfo[node].payload
2075

    
2076
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2077
      self._VerifyNodeNetwork(node_i, nresult)
2078
      self._VerifyNodeLVM(node_i, nresult, vg_name)
2079
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2080
                            master_files)
2081
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
2082
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2083

    
2084
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2085
      self._UpdateNodeInstances(node_i, nresult, nimg)
2086
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2087
      self._UpdateNodeOS(node_i, nresult, nimg)
2088
      if not nimg.os_fail:
2089
        if refos_img is None:
2090
          refos_img = nimg
2091
        self._VerifyNodeOS(node_i, nimg, refos_img)
2092

    
2093
    feedback_fn("* Verifying instance status")
2094
    for instance in instancelist:
2095
      if verbose:
2096
        feedback_fn("* Verifying instance %s" % instance)
2097
      inst_config = instanceinfo[instance]
2098
      self._VerifyInstance(instance, inst_config, node_image)
2099
      inst_nodes_offline = []
2100

    
2101
      pnode = inst_config.primary_node
2102
      pnode_img = node_image[pnode]
2103
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2104
               self.ENODERPC, pnode, "instance %s, connection to"
2105
               " primary node failed", instance)
2106

    
2107
      if pnode_img.offline:
2108
        inst_nodes_offline.append(pnode)
2109

    
2110
      # If the instance is non-redundant we cannot survive losing its primary
2111
      # node, so we are not N+1 compliant. On the other hand we have no disk
2112
      # templates with more than one secondary so that situation is not well
2113
      # supported either.
2114
      # FIXME: does not support file-backed instances
2115
      if not inst_config.secondary_nodes:
2116
        i_non_redundant.append(instance)
2117
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2118
               instance, "instance has multiple secondary nodes: %s",
2119
               utils.CommaJoin(inst_config.secondary_nodes),
2120
               code=self.ETYPE_WARNING)
2121

    
2122
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2123
        i_non_a_balanced.append(instance)
2124

    
2125
      for snode in inst_config.secondary_nodes:
2126
        s_img = node_image[snode]
2127
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2128
                 "instance %s, connection to secondary node failed", instance)
2129

    
2130
        if s_img.offline:
2131
          inst_nodes_offline.append(snode)
2132

    
2133
      # warn that the instance lives on offline nodes
2134
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2135
               "instance lives on offline node(s) %s",
2136
               utils.CommaJoin(inst_nodes_offline))
2137
      # ... or ghost nodes
2138
      for node in inst_config.all_nodes:
2139
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2140
                 "instance lives on ghost node %s", node)
2141

    
2142
    feedback_fn("* Verifying orphan volumes")
2143
    self._VerifyOrphanVolumes(node_vol_should, node_image)
2144

    
2145
    feedback_fn("* Verifying orphan instances")
2146
    self._VerifyOrphanInstances(instancelist, node_image)
2147

    
2148
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2149
      feedback_fn("* Verifying N+1 Memory redundancy")
2150
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2151

    
2152
    feedback_fn("* Other Notes")
2153
    if i_non_redundant:
2154
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2155
                  % len(i_non_redundant))
2156

    
2157
    if i_non_a_balanced:
2158
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2159
                  % len(i_non_a_balanced))
2160

    
2161
    if n_offline:
2162
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2163

    
2164
    if n_drained:
2165
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2166

    
2167
    return not self.bad
2168

    
2169
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2170
    """Analyze the post-hooks' result
2171

2172
    This method analyses the hook result, handles it, and sends some
2173
    nicely-formatted feedback back to the user.
2174

2175
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2176
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2177
    @param hooks_results: the results of the multi-node hooks rpc call
2178
    @param feedback_fn: function used send feedback back to the caller
2179
    @param lu_result: previous Exec result
2180
    @return: the new Exec result, based on the previous result
2181
        and hook results
2182

2183
    """
2184
    # We only really run POST phase hooks, and are only interested in
2185
    # their results
2186
    if phase == constants.HOOKS_PHASE_POST:
2187
      # Used to change hooks' output to proper indentation
2188
      indent_re = re.compile('^', re.M)
2189
      feedback_fn("* Hooks Results")
2190
      assert hooks_results, "invalid result from hooks"
2191

    
2192
      for node_name in hooks_results:
2193
        res = hooks_results[node_name]
2194
        msg = res.fail_msg
2195
        test = msg and not res.offline
2196
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2197
                      "Communication failure in hooks execution: %s", msg)
2198
        if res.offline or msg:
2199
          # No need to investigate payload if node is offline or gave an error.
2200
          # override manually lu_result here as _ErrorIf only
2201
          # overrides self.bad
2202
          lu_result = 1
2203
          continue
2204
        for script, hkr, output in res.payload:
2205
          test = hkr == constants.HKR_FAIL
2206
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2207
                        "Script %s failed, output:", script)
2208
          if test:
2209
            output = indent_re.sub('      ', output)
2210
            feedback_fn("%s" % output)
2211
            lu_result = 0
2212

    
2213
      return lu_result
2214

    
2215

    
2216
class LUVerifyDisks(NoHooksLU):
2217
  """Verifies the cluster disks status.
2218

2219
  """
2220
  _OP_REQP = []
2221
  REQ_BGL = False
2222

    
2223
  def ExpandNames(self):
2224
    self.needed_locks = {
2225
      locking.LEVEL_NODE: locking.ALL_SET,
2226
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2227
    }
2228
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2229

    
2230
  def Exec(self, feedback_fn):
2231
    """Verify integrity of cluster disks.
2232

2233
    @rtype: tuple of three items
2234
    @return: a tuple of (dict of node-to-node_error, list of instances
2235
        which need activate-disks, dict of instance: (node, volume) for
2236
        missing volumes
2237

2238
    """
2239
    result = res_nodes, res_instances, res_missing = {}, [], {}
2240

    
2241
    vg_name = self.cfg.GetVGName()
2242
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2243
    instances = [self.cfg.GetInstanceInfo(name)
2244
                 for name in self.cfg.GetInstanceList()]
2245

    
2246
    nv_dict = {}
2247
    for inst in instances:
2248
      inst_lvs = {}
2249
      if (not inst.admin_up or
2250
          inst.disk_template not in constants.DTS_NET_MIRROR):
2251
        continue
2252
      inst.MapLVsByNode(inst_lvs)
2253
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2254
      for node, vol_list in inst_lvs.iteritems():
2255
        for vol in vol_list:
2256
          nv_dict[(node, vol)] = inst
2257

    
2258
    if not nv_dict:
2259
      return result
2260

    
2261
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2262

    
2263
    for node in nodes:
2264
      # node_volume
2265
      node_res = node_lvs[node]
2266
      if node_res.offline:
2267
        continue
2268
      msg = node_res.fail_msg
2269
      if msg:
2270
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2271
        res_nodes[node] = msg
2272
        continue
2273

    
2274
      lvs = node_res.payload
2275
      for lv_name, (_, _, lv_online) in lvs.items():
2276
        inst = nv_dict.pop((node, lv_name), None)
2277
        if (not lv_online and inst is not None
2278
            and inst.name not in res_instances):
2279
          res_instances.append(inst.name)
2280

    
2281
    # any leftover items in nv_dict are missing LVs, let's arrange the
2282
    # data better
2283
    for key, inst in nv_dict.iteritems():
2284
      if inst.name not in res_missing:
2285
        res_missing[inst.name] = []
2286
      res_missing[inst.name].append(key)
2287

    
2288
    return result
2289

    
2290

    
2291
class LURepairDiskSizes(NoHooksLU):
2292
  """Verifies the cluster disks sizes.
2293

2294
  """
2295
  _OP_REQP = [("instances", _TListOf(_TNEString))]
2296
  REQ_BGL = False
2297

    
2298
  def ExpandNames(self):
2299
    if self.op.instances:
2300
      self.wanted_names = []
2301
      for name in self.op.instances:
2302
        full_name = _ExpandInstanceName(self.cfg, name)
2303
        self.wanted_names.append(full_name)
2304
      self.needed_locks = {
2305
        locking.LEVEL_NODE: [],
2306
        locking.LEVEL_INSTANCE: self.wanted_names,
2307
        }
2308
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2309
    else:
2310
      self.wanted_names = None
2311
      self.needed_locks = {
2312
        locking.LEVEL_NODE: locking.ALL_SET,
2313
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2314
        }
2315
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2316

    
2317
  def DeclareLocks(self, level):
2318
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2319
      self._LockInstancesNodes(primary_only=True)
2320

    
2321
  def CheckPrereq(self):
2322
    """Check prerequisites.
2323

2324
    This only checks the optional instance list against the existing names.
2325

2326
    """
2327
    if self.wanted_names is None:
2328
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2329

    
2330
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2331
                             in self.wanted_names]
2332

    
2333
  def _EnsureChildSizes(self, disk):
2334
    """Ensure children of the disk have the needed disk size.
2335

2336
    This is valid mainly for DRBD8 and fixes an issue where the
2337
    children have smaller disk size.
2338

2339
    @param disk: an L{ganeti.objects.Disk} object
2340

2341
    """
2342
    if disk.dev_type == constants.LD_DRBD8:
2343
      assert disk.children, "Empty children for DRBD8?"
2344
      fchild = disk.children[0]
2345
      mismatch = fchild.size < disk.size
2346
      if mismatch:
2347
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2348
                     fchild.size, disk.size)
2349
        fchild.size = disk.size
2350

    
2351
      # and we recurse on this child only, not on the metadev
2352
      return self._EnsureChildSizes(fchild) or mismatch
2353
    else:
2354
      return False
2355

    
2356
  def Exec(self, feedback_fn):
2357
    """Verify the size of cluster disks.
2358

2359
    """
2360
    # TODO: check child disks too
2361
    # TODO: check differences in size between primary/secondary nodes
2362
    per_node_disks = {}
2363
    for instance in self.wanted_instances:
2364
      pnode = instance.primary_node
2365
      if pnode not in per_node_disks:
2366
        per_node_disks[pnode] = []
2367
      for idx, disk in enumerate(instance.disks):
2368
        per_node_disks[pnode].append((instance, idx, disk))
2369

    
2370
    changed = []
2371
    for node, dskl in per_node_disks.items():
2372
      newl = [v[2].Copy() for v in dskl]
2373
      for dsk in newl:
2374
        self.cfg.SetDiskID(dsk, node)
2375
      result = self.rpc.call_blockdev_getsizes(node, newl)
2376
      if result.fail_msg:
2377
        self.LogWarning("Failure in blockdev_getsizes call to node"
2378
                        " %s, ignoring", node)
2379
        continue
2380
      if len(result.data) != len(dskl):
2381
        self.LogWarning("Invalid result from node %s, ignoring node results",
2382
                        node)
2383
        continue
2384
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2385
        if size is None:
2386
          self.LogWarning("Disk %d of instance %s did not return size"
2387
                          " information, ignoring", idx, instance.name)
2388
          continue
2389
        if not isinstance(size, (int, long)):
2390
          self.LogWarning("Disk %d of instance %s did not return valid"
2391
                          " size information, ignoring", idx, instance.name)
2392
          continue
2393
        size = size >> 20
2394
        if size != disk.size:
2395
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2396
                       " correcting: recorded %d, actual %d", idx,
2397
                       instance.name, disk.size, size)
2398
          disk.size = size
2399
          self.cfg.Update(instance, feedback_fn)
2400
          changed.append((instance.name, idx, size))
2401
        if self._EnsureChildSizes(disk):
2402
          self.cfg.Update(instance, feedback_fn)
2403
          changed.append((instance.name, idx, disk.size))
2404
    return changed
2405

    
2406

    
2407
class LURenameCluster(LogicalUnit):
2408
  """Rename the cluster.
2409

2410
  """
2411
  HPATH = "cluster-rename"
2412
  HTYPE = constants.HTYPE_CLUSTER
2413
  _OP_REQP = [("name", _TNEString)]
2414

    
2415
  def BuildHooksEnv(self):
2416
    """Build hooks env.
2417

2418
    """
2419
    env = {
2420
      "OP_TARGET": self.cfg.GetClusterName(),
2421
      "NEW_NAME": self.op.name,
2422
      }
2423
    mn = self.cfg.GetMasterNode()
2424
    all_nodes = self.cfg.GetNodeList()
2425
    return env, [mn], all_nodes
2426

    
2427
  def CheckPrereq(self):
2428
    """Verify that the passed name is a valid one.
2429

2430
    """
2431
    hostname = utils.GetHostInfo(self.op.name)
2432

    
2433
    new_name = hostname.name
2434
    self.ip = new_ip = hostname.ip
2435
    old_name = self.cfg.GetClusterName()
2436
    old_ip = self.cfg.GetMasterIP()
2437
    if new_name == old_name and new_ip == old_ip:
2438
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2439
                                 " cluster has changed",
2440
                                 errors.ECODE_INVAL)
2441
    if new_ip != old_ip:
2442
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2443
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2444
                                   " reachable on the network. Aborting." %
2445
                                   new_ip, errors.ECODE_NOTUNIQUE)
2446

    
2447
    self.op.name = new_name
2448

    
2449
  def Exec(self, feedback_fn):
2450
    """Rename the cluster.
2451

2452
    """
2453
    clustername = self.op.name
2454
    ip = self.ip
2455

    
2456
    # shutdown the master IP
2457
    master = self.cfg.GetMasterNode()
2458
    result = self.rpc.call_node_stop_master(master, False)
2459
    result.Raise("Could not disable the master role")
2460

    
2461
    try:
2462
      cluster = self.cfg.GetClusterInfo()
2463
      cluster.cluster_name = clustername
2464
      cluster.master_ip = ip
2465
      self.cfg.Update(cluster, feedback_fn)
2466

    
2467
      # update the known hosts file
2468
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2469
      node_list = self.cfg.GetNodeList()
2470
      try:
2471
        node_list.remove(master)
2472
      except ValueError:
2473
        pass
2474
      result = self.rpc.call_upload_file(node_list,
2475
                                         constants.SSH_KNOWN_HOSTS_FILE)
2476
      for to_node, to_result in result.iteritems():
2477
        msg = to_result.fail_msg
2478
        if msg:
2479
          msg = ("Copy of file %s to node %s failed: %s" %
2480
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2481
          self.proc.LogWarning(msg)
2482

    
2483
    finally:
2484
      result = self.rpc.call_node_start_master(master, False, False)
2485
      msg = result.fail_msg
2486
      if msg:
2487
        self.LogWarning("Could not re-enable the master role on"
2488
                        " the master, please restart manually: %s", msg)
2489

    
2490

    
2491
def _RecursiveCheckIfLVMBased(disk):
2492
  """Check if the given disk or its children are lvm-based.
2493

2494
  @type disk: L{objects.Disk}
2495
  @param disk: the disk to check
2496
  @rtype: boolean
2497
  @return: boolean indicating whether a LD_LV dev_type was found or not
2498

2499
  """
2500
  if disk.children:
2501
    for chdisk in disk.children:
2502
      if _RecursiveCheckIfLVMBased(chdisk):
2503
        return True
2504
  return disk.dev_type == constants.LD_LV
2505

    
2506

    
2507
class LUSetClusterParams(LogicalUnit):
2508
  """Change the parameters of the cluster.
2509

2510
  """
2511
  HPATH = "cluster-modify"
2512
  HTYPE = constants.HTYPE_CLUSTER
2513
  _OP_REQP = [
2514
    ("hvparams", _TOr(_TDictOf(_TNEString, _TDict), _TNone)),
2515
    ("os_hvp", _TOr(_TDictOf(_TNEString, _TDict), _TNone)),
2516
    ("osparams", _TOr(_TDictOf(_TNEString, _TDict), _TNone)),
2517
    ("enabled_hypervisors",
2518
     _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2519
    ]
2520
  _OP_DEFS = [
2521
    ("candidate_pool_size", None),
2522
    ("uid_pool", None),
2523
    ("add_uids", None),
2524
    ("remove_uids", None),
2525
    ("hvparams", None),
2526
    ("ov_hvp", None),
2527
    ]
2528
  REQ_BGL = False
2529

    
2530
  def CheckArguments(self):
2531
    """Check parameters
2532

2533
    """
2534
    if self.op.candidate_pool_size is not None:
2535
      try:
2536
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2537
      except (ValueError, TypeError), err:
2538
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2539
                                   str(err), errors.ECODE_INVAL)
2540
      if self.op.candidate_pool_size < 1:
2541
        raise errors.OpPrereqError("At least one master candidate needed",
2542
                                   errors.ECODE_INVAL)
2543

    
2544
    _CheckBooleanOpField(self.op, "maintain_node_health")
2545

    
2546
    if self.op.uid_pool:
2547
      uidpool.CheckUidPool(self.op.uid_pool)
2548

    
2549
    if self.op.add_uids:
2550
      uidpool.CheckUidPool(self.op.add_uids)
2551

    
2552
    if self.op.remove_uids:
2553
      uidpool.CheckUidPool(self.op.remove_uids)
2554

    
2555
  def ExpandNames(self):
2556
    # FIXME: in the future maybe other cluster params won't require checking on
2557
    # all nodes to be modified.
2558
    self.needed_locks = {
2559
      locking.LEVEL_NODE: locking.ALL_SET,
2560
    }
2561
    self.share_locks[locking.LEVEL_NODE] = 1
2562

    
2563
  def BuildHooksEnv(self):
2564
    """Build hooks env.
2565

2566
    """
2567
    env = {
2568
      "OP_TARGET": self.cfg.GetClusterName(),
2569
      "NEW_VG_NAME": self.op.vg_name,
2570
      }
2571
    mn = self.cfg.GetMasterNode()
2572
    return env, [mn], [mn]
2573

    
2574
  def CheckPrereq(self):
2575
    """Check prerequisites.
2576

2577
    This checks whether the given params don't conflict and
2578
    if the given volume group is valid.
2579

2580
    """
2581
    if self.op.vg_name is not None and not self.op.vg_name:
2582
      instances = self.cfg.GetAllInstancesInfo().values()
2583
      for inst in instances:
2584
        for disk in inst.disks:
2585
          if _RecursiveCheckIfLVMBased(disk):
2586
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2587
                                       " lvm-based instances exist",
2588
                                       errors.ECODE_INVAL)
2589

    
2590
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2591

    
2592
    # if vg_name not None, checks given volume group on all nodes
2593
    if self.op.vg_name:
2594
      vglist = self.rpc.call_vg_list(node_list)
2595
      for node in node_list:
2596
        msg = vglist[node].fail_msg
2597
        if msg:
2598
          # ignoring down node
2599
          self.LogWarning("Error while gathering data on node %s"
2600
                          " (ignoring node): %s", node, msg)
2601
          continue
2602
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2603
                                              self.op.vg_name,
2604
                                              constants.MIN_VG_SIZE)
2605
        if vgstatus:
2606
          raise errors.OpPrereqError("Error on node '%s': %s" %
2607
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2608

    
2609
    self.cluster = cluster = self.cfg.GetClusterInfo()
2610
    # validate params changes
2611
    if self.op.beparams:
2612
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2613
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2614

    
2615
    if self.op.nicparams:
2616
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2617
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2618
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2619
      nic_errors = []
2620

    
2621
      # check all instances for consistency
2622
      for instance in self.cfg.GetAllInstancesInfo().values():
2623
        for nic_idx, nic in enumerate(instance.nics):
2624
          params_copy = copy.deepcopy(nic.nicparams)
2625
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2626

    
2627
          # check parameter syntax
2628
          try:
2629
            objects.NIC.CheckParameterSyntax(params_filled)
2630
          except errors.ConfigurationError, err:
2631
            nic_errors.append("Instance %s, nic/%d: %s" %
2632
                              (instance.name, nic_idx, err))
2633

    
2634
          # if we're moving instances to routed, check that they have an ip
2635
          target_mode = params_filled[constants.NIC_MODE]
2636
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2637
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2638
                              (instance.name, nic_idx))
2639
      if nic_errors:
2640
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2641
                                   "\n".join(nic_errors))
2642

    
2643
    # hypervisor list/parameters
2644
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2645
    if self.op.hvparams:
2646
      for hv_name, hv_dict in self.op.hvparams.items():
2647
        if hv_name not in self.new_hvparams:
2648
          self.new_hvparams[hv_name] = hv_dict
2649
        else:
2650
          self.new_hvparams[hv_name].update(hv_dict)
2651

    
2652
    # os hypervisor parameters
2653
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2654
    if self.op.os_hvp:
2655
      for os_name, hvs in self.op.os_hvp.items():
2656
        if os_name not in self.new_os_hvp:
2657
          self.new_os_hvp[os_name] = hvs
2658
        else:
2659
          for hv_name, hv_dict in hvs.items():
2660
            if hv_name not in self.new_os_hvp[os_name]:
2661
              self.new_os_hvp[os_name][hv_name] = hv_dict
2662
            else:
2663
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2664

    
2665
    # os parameters
2666
    self.new_osp = objects.FillDict(cluster.osparams, {})
2667
    if self.op.osparams:
2668
      for os_name, osp in self.op.osparams.items():
2669
        if os_name not in self.new_osp:
2670
          self.new_osp[os_name] = {}
2671

    
2672
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2673
                                                  use_none=True)
2674

    
2675
        if not self.new_osp[os_name]:
2676
          # we removed all parameters
2677
          del self.new_osp[os_name]
2678
        else:
2679
          # check the parameter validity (remote check)
2680
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2681
                         os_name, self.new_osp[os_name])
2682

    
2683
    # changes to the hypervisor list
2684
    if self.op.enabled_hypervisors is not None:
2685
      self.hv_list = self.op.enabled_hypervisors
2686
      for hv in self.hv_list:
2687
        # if the hypervisor doesn't already exist in the cluster
2688
        # hvparams, we initialize it to empty, and then (in both
2689
        # cases) we make sure to fill the defaults, as we might not
2690
        # have a complete defaults list if the hypervisor wasn't
2691
        # enabled before
2692
        if hv not in new_hvp:
2693
          new_hvp[hv] = {}
2694
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2695
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2696
    else:
2697
      self.hv_list = cluster.enabled_hypervisors
2698

    
2699
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2700
      # either the enabled list has changed, or the parameters have, validate
2701
      for hv_name, hv_params in self.new_hvparams.items():
2702
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2703
            (self.op.enabled_hypervisors and
2704
             hv_name in self.op.enabled_hypervisors)):
2705
          # either this is a new hypervisor, or its parameters have changed
2706
          hv_class = hypervisor.GetHypervisor(hv_name)
2707
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2708
          hv_class.CheckParameterSyntax(hv_params)
2709
          _CheckHVParams(self, node_list, hv_name, hv_params)
2710

    
2711
    if self.op.os_hvp:
2712
      # no need to check any newly-enabled hypervisors, since the
2713
      # defaults have already been checked in the above code-block
2714
      for os_name, os_hvp in self.new_os_hvp.items():
2715
        for hv_name, hv_params in os_hvp.items():
2716
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2717
          # we need to fill in the new os_hvp on top of the actual hv_p
2718
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2719
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2720
          hv_class = hypervisor.GetHypervisor(hv_name)
2721
          hv_class.CheckParameterSyntax(new_osp)
2722
          _CheckHVParams(self, node_list, hv_name, new_osp)
2723

    
2724

    
2725
  def Exec(self, feedback_fn):
2726
    """Change the parameters of the cluster.
2727

2728
    """
2729
    if self.op.vg_name is not None:
2730
      new_volume = self.op.vg_name
2731
      if not new_volume:
2732
        new_volume = None
2733
      if new_volume != self.cfg.GetVGName():
2734
        self.cfg.SetVGName(new_volume)
2735
      else:
2736
        feedback_fn("Cluster LVM configuration already in desired"
2737
                    " state, not changing")
2738
    if self.op.hvparams:
2739
      self.cluster.hvparams = self.new_hvparams
2740
    if self.op.os_hvp:
2741
      self.cluster.os_hvp = self.new_os_hvp
2742
    if self.op.enabled_hypervisors is not None:
2743
      self.cluster.hvparams = self.new_hvparams
2744
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2745
    if self.op.beparams:
2746
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2747
    if self.op.nicparams:
2748
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2749
    if self.op.osparams:
2750
      self.cluster.osparams = self.new_osp
2751

    
2752
    if self.op.candidate_pool_size is not None:
2753
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2754
      # we need to update the pool size here, otherwise the save will fail
2755
      _AdjustCandidatePool(self, [])
2756

    
2757
    if self.op.maintain_node_health is not None:
2758
      self.cluster.maintain_node_health = self.op.maintain_node_health
2759

    
2760
    if self.op.add_uids is not None:
2761
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2762

    
2763
    if self.op.remove_uids is not None:
2764
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2765

    
2766
    if self.op.uid_pool is not None:
2767
      self.cluster.uid_pool = self.op.uid_pool
2768

    
2769
    self.cfg.Update(self.cluster, feedback_fn)
2770

    
2771

    
2772
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2773
  """Distribute additional files which are part of the cluster configuration.
2774

2775
  ConfigWriter takes care of distributing the config and ssconf files, but
2776
  there are more files which should be distributed to all nodes. This function
2777
  makes sure those are copied.
2778

2779
  @param lu: calling logical unit
2780
  @param additional_nodes: list of nodes not in the config to distribute to
2781

2782
  """
2783
  # 1. Gather target nodes
2784
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2785
  dist_nodes = lu.cfg.GetOnlineNodeList()
2786
  if additional_nodes is not None:
2787
    dist_nodes.extend(additional_nodes)
2788
  if myself.name in dist_nodes:
2789
    dist_nodes.remove(myself.name)
2790

    
2791
  # 2. Gather files to distribute
2792
  dist_files = set([constants.ETC_HOSTS,
2793
                    constants.SSH_KNOWN_HOSTS_FILE,
2794
                    constants.RAPI_CERT_FILE,
2795
                    constants.RAPI_USERS_FILE,
2796
                    constants.CONFD_HMAC_KEY,
2797
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2798
                   ])
2799

    
2800
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2801
  for hv_name in enabled_hypervisors:
2802
    hv_class = hypervisor.GetHypervisor(hv_name)
2803
    dist_files.update(hv_class.GetAncillaryFiles())
2804

    
2805
  # 3. Perform the files upload
2806
  for fname in dist_files:
2807
    if os.path.exists(fname):
2808
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2809
      for to_node, to_result in result.items():
2810
        msg = to_result.fail_msg
2811
        if msg:
2812
          msg = ("Copy of file %s to node %s failed: %s" %
2813
                 (fname, to_node, msg))
2814
          lu.proc.LogWarning(msg)
2815

    
2816

    
2817
class LURedistributeConfig(NoHooksLU):
2818
  """Force the redistribution of cluster configuration.
2819

2820
  This is a very simple LU.
2821

2822
  """
2823
  _OP_REQP = []
2824
  REQ_BGL = False
2825

    
2826
  def ExpandNames(self):
2827
    self.needed_locks = {
2828
      locking.LEVEL_NODE: locking.ALL_SET,
2829
    }
2830
    self.share_locks[locking.LEVEL_NODE] = 1
2831

    
2832
  def Exec(self, feedback_fn):
2833
    """Redistribute the configuration.
2834

2835
    """
2836
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2837
    _RedistributeAncillaryFiles(self)
2838

    
2839

    
2840
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2841
  """Sleep and poll for an instance's disk to sync.
2842

2843
  """
2844
  if not instance.disks or disks is not None and not disks:
2845
    return True
2846

    
2847
  disks = _ExpandCheckDisks(instance, disks)
2848

    
2849
  if not oneshot:
2850
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2851

    
2852
  node = instance.primary_node
2853

    
2854
  for dev in disks:
2855
    lu.cfg.SetDiskID(dev, node)
2856

    
2857
  # TODO: Convert to utils.Retry
2858

    
2859
  retries = 0
2860
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2861
  while True:
2862
    max_time = 0
2863
    done = True
2864
    cumul_degraded = False
2865
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2866
    msg = rstats.fail_msg
2867
    if msg:
2868
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2869
      retries += 1
2870
      if retries >= 10:
2871
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2872
                                 " aborting." % node)
2873
      time.sleep(6)
2874
      continue
2875
    rstats = rstats.payload
2876
    retries = 0
2877
    for i, mstat in enumerate(rstats):
2878
      if mstat is None:
2879
        lu.LogWarning("Can't compute data for node %s/%s",
2880
                           node, disks[i].iv_name)
2881
        continue
2882

    
2883
      cumul_degraded = (cumul_degraded or
2884
                        (mstat.is_degraded and mstat.sync_percent is None))
2885
      if mstat.sync_percent is not None:
2886
        done = False
2887
        if mstat.estimated_time is not None:
2888
          rem_time = ("%s remaining (estimated)" %
2889
                      utils.FormatSeconds(mstat.estimated_time))
2890
          max_time = mstat.estimated_time
2891
        else:
2892
          rem_time = "no time estimate"
2893
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2894
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
2895

    
2896
    # if we're done but degraded, let's do a few small retries, to
2897
    # make sure we see a stable and not transient situation; therefore
2898
    # we force restart of the loop
2899
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2900
      logging.info("Degraded disks found, %d retries left", degr_retries)
2901
      degr_retries -= 1
2902
      time.sleep(1)
2903
      continue
2904

    
2905
    if done or oneshot:
2906
      break
2907

    
2908
    time.sleep(min(60, max_time))
2909

    
2910
  if done:
2911
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2912
  return not cumul_degraded
2913

    
2914

    
2915
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2916
  """Check that mirrors are not degraded.
2917

2918
  The ldisk parameter, if True, will change the test from the
2919
  is_degraded attribute (which represents overall non-ok status for
2920
  the device(s)) to the ldisk (representing the local storage status).
2921

2922
  """
2923
  lu.cfg.SetDiskID(dev, node)
2924

    
2925
  result = True
2926

    
2927
  if on_primary or dev.AssembleOnSecondary():
2928
    rstats = lu.rpc.call_blockdev_find(node, dev)
2929
    msg = rstats.fail_msg
2930
    if msg:
2931
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2932
      result = False
2933
    elif not rstats.payload:
2934
      lu.LogWarning("Can't find disk on node %s", node)
2935
      result = False
2936
    else:
2937
      if ldisk:
2938
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2939
      else:
2940
        result = result and not rstats.payload.is_degraded
2941

    
2942
  if dev.children:
2943
    for child in dev.children:
2944
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2945

    
2946
  return result
2947

    
2948

    
2949
class LUDiagnoseOS(NoHooksLU):
2950
  """Logical unit for OS diagnose/query.
2951

2952
  """
2953
  _OP_REQP = [
2954
    ("output_fields", _TListOf(_TNEString)),
2955
    ("names", _TListOf(_TNEString)),
2956
    ]
2957
  REQ_BGL = False
2958
  _FIELDS_STATIC = utils.FieldSet()
2959
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2960
                                   "parameters", "api_versions")
2961

    
2962
  def CheckArguments(self):
2963
    if self.op.names:
2964
      raise errors.OpPrereqError("Selective OS query not supported",
2965
                                 errors.ECODE_INVAL)
2966

    
2967
    _CheckOutputFields(static=self._FIELDS_STATIC,
2968
                       dynamic=self._FIELDS_DYNAMIC,
2969
                       selected=self.op.output_fields)
2970

    
2971
  def ExpandNames(self):
2972
    # Lock all nodes, in shared mode
2973
    # Temporary removal of locks, should be reverted later
2974
    # TODO: reintroduce locks when they are lighter-weight
2975
    self.needed_locks = {}
2976
    #self.share_locks[locking.LEVEL_NODE] = 1
2977
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2978

    
2979
  @staticmethod
2980
  def _DiagnoseByOS(rlist):
2981
    """Remaps a per-node return list into an a per-os per-node dictionary
2982

2983
    @param rlist: a map with node names as keys and OS objects as values
2984

2985
    @rtype: dict
2986
    @return: a dictionary with osnames as keys and as value another
2987
        map, with nodes as keys and tuples of (path, status, diagnose,
2988
        variants, parameters, api_versions) as values, eg::
2989

2990
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2991
                                     (/srv/..., False, "invalid api")],
2992
                           "node2": [(/srv/..., True, "", [], [])]}
2993
          }
2994

2995
    """
2996
    all_os = {}
2997
    # we build here the list of nodes that didn't fail the RPC (at RPC
2998
    # level), so that nodes with a non-responding node daemon don't
2999
    # make all OSes invalid
3000
    good_nodes = [node_name for node_name in rlist
3001
                  if not rlist[node_name].fail_msg]
3002
    for node_name, nr in rlist.items():
3003
      if nr.fail_msg or not nr.payload:
3004
        continue
3005
      for (name, path, status, diagnose, variants,
3006
           params, api_versions) in nr.payload:
3007
        if name not in all_os:
3008
          # build a list of nodes for this os containing empty lists
3009
          # for each node in node_list
3010
          all_os[name] = {}
3011
          for nname in good_nodes:
3012
            all_os[name][nname] = []
3013
        # convert params from [name, help] to (name, help)
3014
        params = [tuple(v) for v in params]
3015
        all_os[name][node_name].append((path, status, diagnose,
3016
                                        variants, params, api_versions))
3017
    return all_os
3018

    
3019
  def Exec(self, feedback_fn):
3020
    """Compute the list of OSes.
3021

3022
    """
3023
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3024
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3025
    pol = self._DiagnoseByOS(node_data)
3026
    output = []
3027

    
3028
    for os_name, os_data in pol.items():
3029
      row = []
3030
      valid = True
3031
      (variants, params, api_versions) = null_state = (set(), set(), set())
3032
      for idx, osl in enumerate(os_data.values()):
3033
        valid = bool(valid and osl and osl[0][1])
3034
        if not valid:
3035
          (variants, params, api_versions) = null_state
3036
          break
3037
        node_variants, node_params, node_api = osl[0][3:6]
3038
        if idx == 0: # first entry
3039
          variants = set(node_variants)
3040
          params = set(node_params)
3041
          api_versions = set(node_api)
3042
        else: # keep consistency
3043
          variants.intersection_update(node_variants)
3044
          params.intersection_update(node_params)
3045
          api_versions.intersection_update(node_api)
3046

    
3047
      for field in self.op.output_fields:
3048
        if field == "name":
3049
          val = os_name
3050
        elif field == "valid":
3051
          val = valid
3052
        elif field == "node_status":
3053
          # this is just a copy of the dict
3054
          val = {}
3055
          for node_name, nos_list in os_data.items():
3056
            val[node_name] = nos_list
3057
        elif field == "variants":
3058
          val = list(variants)
3059
        elif field == "parameters":
3060
          val = list(params)
3061
        elif field == "api_versions":
3062
          val = list(api_versions)
3063
        else:
3064
          raise errors.ParameterError(field)
3065
        row.append(val)
3066
      output.append(row)
3067

    
3068
    return output
3069

    
3070

    
3071
class LURemoveNode(LogicalUnit):
3072
  """Logical unit for removing a node.
3073

3074
  """
3075
  HPATH = "node-remove"
3076
  HTYPE = constants.HTYPE_NODE
3077
  _OP_REQP = [("node_name", _TNEString)]
3078

    
3079
  def BuildHooksEnv(self):
3080
    """Build hooks env.
3081

3082
    This doesn't run on the target node in the pre phase as a failed
3083
    node would then be impossible to remove.
3084

3085
    """
3086
    env = {
3087
      "OP_TARGET": self.op.node_name,
3088
      "NODE_NAME": self.op.node_name,
3089
      }
3090
    all_nodes = self.cfg.GetNodeList()
3091
    try:
3092
      all_nodes.remove(self.op.node_name)
3093
    except ValueError:
3094
      logging.warning("Node %s which is about to be removed not found"
3095
                      " in the all nodes list", self.op.node_name)
3096
    return env, all_nodes, all_nodes
3097

    
3098
  def CheckPrereq(self):
3099
    """Check prerequisites.
3100

3101
    This checks:
3102
     - the node exists in the configuration
3103
     - it does not have primary or secondary instances
3104
     - it's not the master
3105

3106
    Any errors are signaled by raising errors.OpPrereqError.
3107

3108
    """
3109
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3110
    node = self.cfg.GetNodeInfo(self.op.node_name)
3111
    assert node is not None
3112

    
3113
    instance_list = self.cfg.GetInstanceList()
3114

    
3115
    masternode = self.cfg.GetMasterNode()
3116
    if node.name == masternode:
3117
      raise errors.OpPrereqError("Node is the master node,"
3118
                                 " you need to failover first.",
3119
                                 errors.ECODE_INVAL)
3120

    
3121
    for instance_name in instance_list:
3122
      instance = self.cfg.GetInstanceInfo(instance_name)
3123
      if node.name in instance.all_nodes:
3124
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3125
                                   " please remove first." % instance_name,
3126
                                   errors.ECODE_INVAL)
3127
    self.op.node_name = node.name
3128
    self.node = node
3129

    
3130
  def Exec(self, feedback_fn):
3131
    """Removes the node from the cluster.
3132

3133
    """
3134
    node = self.node
3135
    logging.info("Stopping the node daemon and removing configs from node %s",
3136
                 node.name)
3137

    
3138
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3139

    
3140
    # Promote nodes to master candidate as needed
3141
    _AdjustCandidatePool(self, exceptions=[node.name])
3142
    self.context.RemoveNode(node.name)
3143

    
3144
    # Run post hooks on the node before it's removed
3145
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3146
    try:
3147
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3148
    except:
3149
      # pylint: disable-msg=W0702
3150
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3151

    
3152
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3153
    msg = result.fail_msg
3154
    if msg:
3155
      self.LogWarning("Errors encountered on the remote node while leaving"
3156
                      " the cluster: %s", msg)
3157

    
3158
    # Remove node from our /etc/hosts
3159
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3160
      # FIXME: this should be done via an rpc call to node daemon
3161
      utils.RemoveHostFromEtcHosts(node.name)
3162
      _RedistributeAncillaryFiles(self)
3163

    
3164

    
3165
class LUQueryNodes(NoHooksLU):
3166
  """Logical unit for querying nodes.
3167

3168
  """
3169
  # pylint: disable-msg=W0142
3170
  _OP_REQP = [
3171
    ("output_fields", _TListOf(_TNEString)),
3172
    ("names", _TListOf(_TNEString)),
3173
    ("use_locking", _TBool),
3174
    ]
3175
  REQ_BGL = False
3176

    
3177
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3178
                    "master_candidate", "offline", "drained"]
3179

    
3180
  _FIELDS_DYNAMIC = utils.FieldSet(
3181
    "dtotal", "dfree",
3182
    "mtotal", "mnode", "mfree",
3183
    "bootid",
3184
    "ctotal", "cnodes", "csockets",
3185
    )
3186

    
3187
  _FIELDS_STATIC = utils.FieldSet(*[
3188
    "pinst_cnt", "sinst_cnt",
3189
    "pinst_list", "sinst_list",
3190
    "pip", "sip", "tags",
3191
    "master",
3192
    "role"] + _SIMPLE_FIELDS
3193
    )
3194

    
3195
  def CheckArguments(self):
3196
    _CheckOutputFields(static=self._FIELDS_STATIC,
3197
                       dynamic=self._FIELDS_DYNAMIC,
3198
                       selected=self.op.output_fields)
3199

    
3200
  def ExpandNames(self):
3201
    self.needed_locks = {}
3202
    self.share_locks[locking.LEVEL_NODE] = 1
3203

    
3204
    if self.op.names:
3205
      self.wanted = _GetWantedNodes(self, self.op.names)
3206
    else:
3207
      self.wanted = locking.ALL_SET
3208

    
3209
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3210
    self.do_locking = self.do_node_query and self.op.use_locking
3211
    if self.do_locking:
3212
      # if we don't request only static fields, we need to lock the nodes
3213
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
3214

    
3215
  def Exec(self, feedback_fn):
3216
    """Computes the list of nodes and their attributes.
3217

3218
    """
3219
    all_info = self.cfg.GetAllNodesInfo()
3220
    if self.do_locking:
3221
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
3222
    elif self.wanted != locking.ALL_SET:
3223
      nodenames = self.wanted
3224
      missing = set(nodenames).difference(all_info.keys())
3225
      if missing:
3226
        raise errors.OpExecError(
3227
          "Some nodes were removed before retrieving their data: %s" % missing)
3228
    else:
3229
      nodenames = all_info.keys()
3230

    
3231
    nodenames = utils.NiceSort(nodenames)
3232
    nodelist = [all_info[name] for name in nodenames]
3233

    
3234
    # begin data gathering
3235

    
3236
    if self.do_node_query:
3237
      live_data = {}
3238
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3239
                                          self.cfg.GetHypervisorType())
3240
      for name in nodenames:
3241
        nodeinfo = node_data[name]
3242
        if not nodeinfo.fail_msg and nodeinfo.payload:
3243
          nodeinfo = nodeinfo.payload
3244
          fn = utils.TryConvert
3245
          live_data[name] = {
3246
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3247
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3248
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
3249
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3250
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3251
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3252
            "bootid": nodeinfo.get('bootid', None),
3253
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3254
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3255
            }
3256
        else:
3257
          live_data[name] = {}
3258
    else:
3259
      live_data = dict.fromkeys(nodenames, {})
3260

    
3261
    node_to_primary = dict([(name, set()) for name in nodenames])
3262
    node_to_secondary = dict([(name, set()) for name in nodenames])
3263

    
3264
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3265
                             "sinst_cnt", "sinst_list"))
3266
    if inst_fields & frozenset(self.op.output_fields):
3267
      inst_data = self.cfg.GetAllInstancesInfo()
3268

    
3269
      for inst in inst_data.values():
3270
        if inst.primary_node in node_to_primary:
3271
          node_to_primary[inst.primary_node].add(inst.name)
3272
        for secnode in inst.secondary_nodes:
3273
          if secnode in node_to_secondary:
3274
            node_to_secondary[secnode].add(inst.name)
3275

    
3276
    master_node = self.cfg.GetMasterNode()
3277

    
3278
    # end data gathering
3279

    
3280
    output = []
3281
    for node in nodelist:
3282
      node_output = []
3283
      for field in self.op.output_fields:
3284
        if field in self._SIMPLE_FIELDS:
3285
          val = getattr(node, field)
3286
        elif field == "pinst_list":
3287
          val = list(node_to_primary[node.name])
3288
        elif field == "sinst_list":
3289
          val = list(node_to_secondary[node.name])
3290
        elif field == "pinst_cnt":
3291
          val = len(node_to_primary[node.name])
3292
        elif field == "sinst_cnt":
3293
          val = len(node_to_secondary[node.name])
3294
        elif field == "pip":
3295
          val = node.primary_ip
3296
        elif field == "sip":
3297
          val = node.secondary_ip
3298
        elif field == "tags":
3299
          val = list(node.GetTags())
3300
        elif field == "master":
3301
          val = node.name == master_node
3302
        elif self._FIELDS_DYNAMIC.Matches(field):
3303
          val = live_data[node.name].get(field, None)
3304
        elif field == "role":
3305
          if node.name == master_node:
3306
            val = "M"
3307
          elif node.master_candidate:
3308
            val = "C"
3309
          elif node.drained:
3310
            val = "D"
3311
          elif node.offline:
3312
            val = "O"
3313
          else:
3314
            val = "R"
3315
        else:
3316
          raise errors.ParameterError(field)
3317
        node_output.append(val)
3318
      output.append(node_output)
3319

    
3320
    return output
3321

    
3322

    
3323
class LUQueryNodeVolumes(NoHooksLU):
3324
  """Logical unit for getting volumes on node(s).
3325

3326
  """
3327
  _OP_REQP = [
3328
    ("nodes", _TListOf(_TNEString)),
3329
    ("output_fields", _TListOf(_TNEString)),
3330
    ]
3331
  REQ_BGL = False
3332
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3333
  _FIELDS_STATIC = utils.FieldSet("node")
3334

    
3335
  def CheckArguments(self):
3336
    _CheckOutputFields(static=self._FIELDS_STATIC,
3337
                       dynamic=self._FIELDS_DYNAMIC,
3338
                       selected=self.op.output_fields)
3339

    
3340
  def ExpandNames(self):
3341
    self.needed_locks = {}
3342
    self.share_locks[locking.LEVEL_NODE] = 1
3343
    if not self.op.nodes:
3344
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3345
    else:
3346
      self.needed_locks[locking.LEVEL_NODE] = \
3347
        _GetWantedNodes(self, self.op.nodes)
3348

    
3349
  def Exec(self, feedback_fn):
3350
    """Computes the list of nodes and their attributes.
3351

3352
    """
3353
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3354
    volumes = self.rpc.call_node_volumes(nodenames)
3355

    
3356
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3357
             in self.cfg.GetInstanceList()]
3358

    
3359
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3360

    
3361
    output = []
3362
    for node in nodenames:
3363
      nresult = volumes[node]
3364
      if nresult.offline:
3365
        continue
3366
      msg = nresult.fail_msg
3367
      if msg:
3368
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3369
        continue
3370

    
3371
      node_vols = nresult.payload[:]
3372
      node_vols.sort(key=lambda vol: vol['dev'])
3373

    
3374
      for vol in node_vols:
3375
        node_output = []
3376
        for field in self.op.output_fields:
3377
          if field == "node":
3378
            val = node
3379
          elif field == "phys":
3380
            val = vol['dev']
3381
          elif field == "vg":
3382
            val = vol['vg']
3383
          elif field == "name":
3384
            val = vol['name']
3385
          elif field == "size":
3386
            val = int(float(vol['size']))
3387
          elif field == "instance":
3388
            for inst in ilist:
3389
              if node not in lv_by_node[inst]:
3390
                continue
3391
              if vol['name'] in lv_by_node[inst][node]:
3392
                val = inst.name
3393
                break
3394
            else:
3395
              val = '-'
3396
          else:
3397
            raise errors.ParameterError(field)
3398
          node_output.append(str(val))
3399

    
3400
        output.append(node_output)
3401

    
3402
    return output
3403

    
3404

    
3405
class LUQueryNodeStorage(NoHooksLU):
3406
  """Logical unit for getting information on storage units on node(s).
3407

3408
  """
3409
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3410
  _OP_REQP = [
3411
    ("nodes", _TListOf(_TNEString)),
3412
    ("storage_type", _CheckStorageType),
3413
    ("output_fields", _TListOf(_TNEString)),
3414
    ]
3415
  _OP_DEFS = [("name", None)]
3416
  REQ_BGL = False
3417

    
3418
  def CheckArguments(self):
3419
    _CheckOutputFields(static=self._FIELDS_STATIC,
3420
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3421
                       selected=self.op.output_fields)
3422

    
3423
  def ExpandNames(self):
3424
    self.needed_locks = {}
3425
    self.share_locks[locking.LEVEL_NODE] = 1
3426

    
3427
    if self.op.nodes:
3428
      self.needed_locks[locking.LEVEL_NODE] = \
3429
        _GetWantedNodes(self, self.op.nodes)
3430
    else:
3431
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3432

    
3433
  def Exec(self, feedback_fn):
3434
    """Computes the list of nodes and their attributes.
3435

3436
    """
3437
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3438

    
3439
    # Always get name to sort by
3440
    if constants.SF_NAME in self.op.output_fields:
3441
      fields = self.op.output_fields[:]
3442
    else:
3443
      fields = [constants.SF_NAME] + self.op.output_fields
3444

    
3445
    # Never ask for node or type as it's only known to the LU
3446
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3447
      while extra in fields:
3448
        fields.remove(extra)
3449

    
3450
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3451
    name_idx = field_idx[constants.SF_NAME]
3452

    
3453
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3454
    data = self.rpc.call_storage_list(self.nodes,
3455
                                      self.op.storage_type, st_args,
3456
                                      self.op.name, fields)
3457

    
3458
    result = []
3459

    
3460
    for node in utils.NiceSort(self.nodes):
3461
      nresult = data[node]
3462
      if nresult.offline:
3463
        continue
3464

    
3465
      msg = nresult.fail_msg
3466
      if msg:
3467
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3468
        continue
3469

    
3470
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3471

    
3472
      for name in utils.NiceSort(rows.keys()):
3473
        row = rows[name]
3474

    
3475
        out = []
3476

    
3477
        for field in self.op.output_fields:
3478
          if field == constants.SF_NODE:
3479
            val = node
3480
          elif field == constants.SF_TYPE:
3481
            val = self.op.storage_type
3482
          elif field in field_idx:
3483
            val = row[field_idx[field]]
3484
          else:
3485
            raise errors.ParameterError(field)
3486

    
3487
          out.append(val)
3488

    
3489
        result.append(out)
3490

    
3491
    return result
3492

    
3493

    
3494
class LUModifyNodeStorage(NoHooksLU):
3495
  """Logical unit for modifying a storage volume on a node.
3496

3497
  """
3498
  _OP_REQP = [
3499
    ("node_name", _TNEString),
3500
    ("storage_type", _CheckStorageType),
3501
    ("name", _TNEString),
3502
    ("changes", _TDict),
3503
    ]
3504
  REQ_BGL = False
3505

    
3506
  def CheckArguments(self):
3507
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3508

    
3509
    storage_type = self.op.storage_type
3510

    
3511
    try:
3512
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3513
    except KeyError:
3514
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3515
                                 " modified" % storage_type,
3516
                                 errors.ECODE_INVAL)
3517

    
3518
    diff = set(self.op.changes.keys()) - modifiable
3519
    if diff:
3520
      raise errors.OpPrereqError("The following fields can not be modified for"
3521
                                 " storage units of type '%s': %r" %
3522
                                 (storage_type, list(diff)),
3523
                                 errors.ECODE_INVAL)
3524

    
3525
  def ExpandNames(self):
3526
    self.needed_locks = {
3527
      locking.LEVEL_NODE: self.op.node_name,
3528
      }
3529

    
3530
  def Exec(self, feedback_fn):
3531
    """Computes the list of nodes and their attributes.
3532

3533
    """
3534
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3535
    result = self.rpc.call_storage_modify(self.op.node_name,
3536
                                          self.op.storage_type, st_args,
3537
                                          self.op.name, self.op.changes)
3538
    result.Raise("Failed to modify storage unit '%s' on %s" %
3539
                 (self.op.name, self.op.node_name))
3540

    
3541

    
3542
class LUAddNode(LogicalUnit):
3543
  """Logical unit for adding node to the cluster.
3544

3545
  """
3546
  HPATH = "node-add"
3547
  HTYPE = constants.HTYPE_NODE
3548
  _OP_REQP = [
3549
    ("node_name", _TNEString),
3550
    ]
3551
  _OP_DEFS = [("secondary_ip", None)]
3552

    
3553
  def CheckArguments(self):
3554
    # validate/normalize the node name
3555
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3556

    
3557
  def BuildHooksEnv(self):
3558
    """Build hooks env.
3559

3560
    This will run on all nodes before, and on all nodes + the new node after.
3561

3562
    """
3563
    env = {
3564
      "OP_TARGET": self.op.node_name,
3565
      "NODE_NAME": self.op.node_name,
3566
      "NODE_PIP": self.op.primary_ip,
3567
      "NODE_SIP": self.op.secondary_ip,
3568
      }
3569
    nodes_0 = self.cfg.GetNodeList()
3570
    nodes_1 = nodes_0 + [self.op.node_name, ]
3571
    return env, nodes_0, nodes_1
3572

    
3573
  def CheckPrereq(self):
3574
    """Check prerequisites.
3575

3576
    This checks:
3577
     - the new node is not already in the config
3578
     - it is resolvable
3579
     - its parameters (single/dual homed) matches the cluster
3580

3581
    Any errors are signaled by raising errors.OpPrereqError.
3582

3583
    """
3584
    node_name = self.op.node_name
3585
    cfg = self.cfg
3586

    
3587
    dns_data = utils.GetHostInfo(node_name)
3588

    
3589
    node = dns_data.name
3590
    primary_ip = self.op.primary_ip = dns_data.ip
3591
    if self.op.secondary_ip is None:
3592
      self.op.secondary_ip = primary_ip
3593
    if not utils.IsValidIP(self.op.secondary_ip):
3594
      raise errors.OpPrereqError("Invalid secondary IP given",
3595
                                 errors.ECODE_INVAL)
3596
    secondary_ip = self.op.secondary_ip
3597

    
3598
    node_list = cfg.GetNodeList()
3599
    if not self.op.readd and node in node_list:
3600
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3601
                                 node, errors.ECODE_EXISTS)
3602
    elif self.op.readd and node not in node_list:
3603
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3604
                                 errors.ECODE_NOENT)
3605

    
3606
    self.changed_primary_ip = False
3607

    
3608
    for existing_node_name in node_list:
3609
      existing_node = cfg.GetNodeInfo(existing_node_name)
3610

    
3611
      if self.op.readd and node == existing_node_name:
3612
        if existing_node.secondary_ip != secondary_ip:
3613
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3614
                                     " address configuration as before",
3615
                                     errors.ECODE_INVAL)
3616
        if existing_node.primary_ip != primary_ip:
3617
          self.changed_primary_ip = True
3618

    
3619
        continue
3620

    
3621
      if (existing_node.primary_ip == primary_ip or
3622
          existing_node.secondary_ip == primary_ip or
3623
          existing_node.primary_ip == secondary_ip or
3624
          existing_node.secondary_ip == secondary_ip):
3625
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3626
                                   " existing node %s" % existing_node.name,
3627
                                   errors.ECODE_NOTUNIQUE)
3628

    
3629
    # check that the type of the node (single versus dual homed) is the
3630
    # same as for the master
3631
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3632
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3633
    newbie_singlehomed = secondary_ip == primary_ip
3634
    if master_singlehomed != newbie_singlehomed:
3635
      if master_singlehomed:
3636
        raise errors.OpPrereqError("The master has no private ip but the"
3637
                                   " new node has one",
3638
                                   errors.ECODE_INVAL)
3639
      else:
3640
        raise errors.OpPrereqError("The master has a private ip but the"
3641
                                   " new node doesn't have one",
3642
                                   errors.ECODE_INVAL)
3643

    
3644
    # checks reachability
3645
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3646
      raise errors.OpPrereqError("Node not reachable by ping",
3647
                                 errors.ECODE_ENVIRON)
3648

    
3649
    if not newbie_singlehomed:
3650
      # check reachability from my secondary ip to newbie's secondary ip
3651
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3652
                           source=myself.secondary_ip):
3653
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3654
                                   " based ping to noded port",
3655
                                   errors.ECODE_ENVIRON)
3656

    
3657
    if self.op.readd:
3658
      exceptions = [node]
3659
    else:
3660
      exceptions = []
3661

    
3662
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3663

    
3664
    if self.op.readd:
3665
      self.new_node = self.cfg.GetNodeInfo(node)
3666
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3667
    else:
3668
      self.new_node = objects.Node(name=node,
3669
                                   primary_ip=primary_ip,
3670
                                   secondary_ip=secondary_ip,
3671
                                   master_candidate=self.master_candidate,
3672
                                   offline=False, drained=False)
3673

    
3674
  def Exec(self, feedback_fn):
3675
    """Adds the new node to the cluster.
3676

3677
    """
3678
    new_node = self.new_node
3679
    node = new_node.name
3680

    
3681
    # for re-adds, reset the offline/drained/master-candidate flags;
3682
    # we need to reset here, otherwise offline would prevent RPC calls
3683
    # later in the procedure; this also means that if the re-add
3684
    # fails, we are left with a non-offlined, broken node
3685
    if self.op.readd:
3686
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3687
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3688
      # if we demote the node, we do cleanup later in the procedure
3689
      new_node.master_candidate = self.master_candidate
3690
      if self.changed_primary_ip:
3691
        new_node.primary_ip = self.op.primary_ip
3692

    
3693
    # notify the user about any possible mc promotion
3694
    if new_node.master_candidate:
3695
      self.LogInfo("Node will be a master candidate")
3696

    
3697
    # check connectivity
3698
    result = self.rpc.call_version([node])[node]
3699
    result.Raise("Can't get version information from node %s" % node)
3700
    if constants.PROTOCOL_VERSION == result.payload:
3701
      logging.info("Communication to node %s fine, sw version %s match",
3702
                   node, result.payload)
3703
    else:
3704
      raise errors.OpExecError("Version mismatch master version %s,"
3705
                               " node version %s" %
3706
                               (constants.PROTOCOL_VERSION, result.payload))
3707

    
3708
    # setup ssh on node
3709
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3710
      logging.info("Copy ssh key to node %s", node)
3711
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3712
      keyarray = []
3713
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3714
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3715
                  priv_key, pub_key]
3716

    
3717
      for i in keyfiles:
3718
        keyarray.append(utils.ReadFile(i))
3719

    
3720
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3721
                                      keyarray[2], keyarray[3], keyarray[4],
3722
                                      keyarray[5])
3723
      result.Raise("Cannot transfer ssh keys to the new node")
3724

    
3725
    # Add node to our /etc/hosts, and add key to known_hosts
3726
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3727
      # FIXME: this should be done via an rpc call to node daemon
3728
      utils.AddHostToEtcHosts(new_node.name)
3729

    
3730
    if new_node.secondary_ip != new_node.primary_ip:
3731
      result = self.rpc.call_node_has_ip_address(new_node.name,
3732
                                                 new_node.secondary_ip)
3733
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3734
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3735
      if not result.payload:
3736
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3737
                                 " you gave (%s). Please fix and re-run this"
3738
                                 " command." % new_node.secondary_ip)
3739

    
3740
    node_verify_list = [self.cfg.GetMasterNode()]
3741
    node_verify_param = {
3742
      constants.NV_NODELIST: [node],
3743
      # TODO: do a node-net-test as well?
3744
    }
3745

    
3746
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3747
                                       self.cfg.GetClusterName())
3748
    for verifier in node_verify_list:
3749
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3750
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3751
      if nl_payload:
3752
        for failed in nl_payload:
3753
          feedback_fn("ssh/hostname verification failed"
3754
                      " (checking from %s): %s" %
3755
                      (verifier, nl_payload[failed]))
3756
        raise errors.OpExecError("ssh/hostname verification failed.")
3757

    
3758
    if self.op.readd:
3759
      _RedistributeAncillaryFiles(self)
3760
      self.context.ReaddNode(new_node)
3761
      # make sure we redistribute the config
3762
      self.cfg.Update(new_node, feedback_fn)
3763
      # and make sure the new node will not have old files around
3764
      if not new_node.master_candidate:
3765
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3766
        msg = result.fail_msg
3767
        if msg:
3768
          self.LogWarning("Node failed to demote itself from master"
3769
                          " candidate status: %s" % msg)
3770
    else:
3771
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3772
      self.context.AddNode(new_node, self.proc.GetECId())
3773

    
3774

    
3775
class LUSetNodeParams(LogicalUnit):
3776
  """Modifies the parameters of a node.
3777

3778
  """
3779
  HPATH = "node-modify"
3780
  HTYPE = constants.HTYPE_NODE
3781
  _OP_REQP = [("node_name", _TNEString)]
3782
  REQ_BGL = False
3783

    
3784
  def CheckArguments(self):
3785
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3786
    _CheckBooleanOpField(self.op, 'master_candidate')
3787
    _CheckBooleanOpField(self.op, 'offline')
3788
    _CheckBooleanOpField(self.op, 'drained')
3789
    _CheckBooleanOpField(self.op, 'auto_promote')
3790
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3791
    if all_mods.count(None) == 3:
3792
      raise errors.OpPrereqError("Please pass at least one modification",
3793
                                 errors.ECODE_INVAL)
3794
    if all_mods.count(True) > 1:
3795
      raise errors.OpPrereqError("Can't set the node into more than one"
3796
                                 " state at the same time",
3797
                                 errors.ECODE_INVAL)
3798

    
3799
    # Boolean value that tells us whether we're offlining or draining the node
3800
    self.offline_or_drain = (self.op.offline == True or
3801
                             self.op.drained == True)
3802
    self.deoffline_or_drain = (self.op.offline == False or
3803
                               self.op.drained == False)
3804
    self.might_demote = (self.op.master_candidate == False or
3805
                         self.offline_or_drain)
3806

    
3807
    self.lock_all = self.op.auto_promote and self.might_demote
3808

    
3809

    
3810
  def ExpandNames(self):
3811
    if self.lock_all:
3812
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3813
    else:
3814
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3815

    
3816
  def BuildHooksEnv(self):
3817
    """Build hooks env.
3818

3819
    This runs on the master node.
3820

3821
    """
3822
    env = {
3823
      "OP_TARGET": self.op.node_name,
3824
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3825
      "OFFLINE": str(self.op.offline),
3826
      "DRAINED": str(self.op.drained),
3827
      }
3828
    nl = [self.cfg.GetMasterNode(),
3829
          self.op.node_name]
3830
    return env, nl, nl
3831

    
3832
  def CheckPrereq(self):
3833
    """Check prerequisites.
3834

3835
    This only checks the instance list against the existing names.
3836

3837
    """
3838
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3839

    
3840
    if (self.op.master_candidate is not None or
3841
        self.op.drained is not None or
3842
        self.op.offline is not None):
3843
      # we can't change the master's node flags
3844
      if self.op.node_name == self.cfg.GetMasterNode():
3845
        raise errors.OpPrereqError("The master role can be changed"
3846
                                   " only via masterfailover",
3847
                                   errors.ECODE_INVAL)
3848

    
3849

    
3850
    if node.master_candidate and self.might_demote and not self.lock_all:
3851
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3852
      # check if after removing the current node, we're missing master
3853
      # candidates
3854
      (mc_remaining, mc_should, _) = \
3855
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3856
      if mc_remaining < mc_should:
3857
        raise errors.OpPrereqError("Not enough master candidates, please"
3858
                                   " pass auto_promote to allow promotion",
3859
                                   errors.ECODE_INVAL)
3860

    
3861
    if (self.op.master_candidate == True and
3862
        ((node.offline and not self.op.offline == False) or
3863
         (node.drained and not self.op.drained == False))):
3864
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3865
                                 " to master_candidate" % node.name,
3866
                                 errors.ECODE_INVAL)
3867

    
3868
    # If we're being deofflined/drained, we'll MC ourself if needed
3869
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3870
        self.op.master_candidate == True and not node.master_candidate):
3871
      self.op.master_candidate = _DecideSelfPromotion(self)
3872
      if self.op.master_candidate:
3873
        self.LogInfo("Autopromoting node to master candidate")
3874

    
3875
    return
3876

    
3877
  def Exec(self, feedback_fn):
3878
    """Modifies a node.
3879

3880
    """
3881
    node = self.node
3882

    
3883
    result = []
3884
    changed_mc = False
3885

    
3886
    if self.op.offline is not None:
3887
      node.offline = self.op.offline
3888
      result.append(("offline", str(self.op.offline)))
3889
      if self.op.offline == True:
3890
        if node.master_candidate:
3891
          node.master_candidate = False
3892
          changed_mc = True
3893
          result.append(("master_candidate", "auto-demotion due to offline"))
3894
        if node.drained:
3895
          node.drained = False
3896
          result.append(("drained", "clear drained status due to offline"))
3897

    
3898
    if self.op.master_candidate is not None:
3899
      node.master_candidate = self.op.master_candidate
3900
      changed_mc = True
3901
      result.append(("master_candidate", str(self.op.master_candidate)))
3902
      if self.op.master_candidate == False:
3903
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3904
        msg = rrc.fail_msg
3905
        if msg:
3906
          self.LogWarning("Node failed to demote itself: %s" % msg)
3907

    
3908
    if self.op.drained is not None:
3909
      node.drained = self.op.drained
3910
      result.append(("drained", str(self.op.drained)))
3911
      if self.op.drained == True:
3912
        if node.master_candidate:
3913
          node.master_candidate = False
3914
          changed_mc = True
3915
          result.append(("master_candidate", "auto-demotion due to drain"))
3916
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3917
          msg = rrc.fail_msg
3918
          if msg:
3919
            self.LogWarning("Node failed to demote itself: %s" % msg)
3920
        if node.offline:
3921
          node.offline = False
3922
          result.append(("offline", "clear offline status due to drain"))
3923

    
3924
    # we locked all nodes, we adjust the CP before updating this node
3925
    if self.lock_all:
3926
      _AdjustCandidatePool(self, [node.name])
3927

    
3928
    # this will trigger configuration file update, if needed
3929
    self.cfg.Update(node, feedback_fn)
3930

    
3931
    # this will trigger job queue propagation or cleanup
3932
    if changed_mc:
3933
      self.context.ReaddNode(node)
3934

    
3935
    return result
3936

    
3937

    
3938
class LUPowercycleNode(NoHooksLU):
3939
  """Powercycles a node.
3940

3941
  """
3942
  _OP_REQP = [
3943
    ("node_name", _TNEString),
3944
    ("force", _TBool),
3945
    ]
3946
  REQ_BGL = False
3947

    
3948
  def CheckArguments(self):
3949
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3950
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3951
      raise errors.OpPrereqError("The node is the master and the force"
3952
                                 " parameter was not set",
3953
                                 errors.ECODE_INVAL)
3954

    
3955
  def ExpandNames(self):
3956
    """Locking for PowercycleNode.
3957

3958
    This is a last-resort option and shouldn't block on other
3959
    jobs. Therefore, we grab no locks.
3960

3961
    """
3962
    self.needed_locks = {}
3963

    
3964
  def Exec(self, feedback_fn):
3965
    """Reboots a node.
3966

3967
    """
3968
    result = self.rpc.call_node_powercycle(self.op.node_name,
3969
                                           self.cfg.GetHypervisorType())
3970
    result.Raise("Failed to schedule the reboot")
3971
    return result.payload
3972

    
3973

    
3974
class LUQueryClusterInfo(NoHooksLU):
3975
  """Query cluster configuration.
3976

3977
  """
3978
  _OP_REQP = []
3979
  REQ_BGL = False
3980

    
3981
  def ExpandNames(self):
3982
    self.needed_locks = {}
3983

    
3984
  def Exec(self, feedback_fn):
3985
    """Return cluster config.
3986

3987
    """
3988
    cluster = self.cfg.GetClusterInfo()
3989
    os_hvp = {}
3990

    
3991
    # Filter just for enabled hypervisors
3992
    for os_name, hv_dict in cluster.os_hvp.items():
3993
      os_hvp[os_name] = {}
3994
      for hv_name, hv_params in hv_dict.items():
3995
        if hv_name in cluster.enabled_hypervisors:
3996
          os_hvp[os_name][hv_name] = hv_params
3997

    
3998
    result = {
3999
      "software_version": constants.RELEASE_VERSION,
4000
      "protocol_version": constants.PROTOCOL_VERSION,
4001
      "config_version": constants.CONFIG_VERSION,
4002
      "os_api_version": max(constants.OS_API_VERSIONS),
4003
      "export_version": constants.EXPORT_VERSION,
4004
      "architecture": (platform.architecture()[0], platform.machine()),
4005
      "name": cluster.cluster_name,
4006
      "master": cluster.master_node,
4007
      "default_hypervisor": cluster.enabled_hypervisors[0],
4008
      "enabled_hypervisors": cluster.enabled_hypervisors,
4009
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4010
                        for hypervisor_name in cluster.enabled_hypervisors]),
4011
      "os_hvp": os_hvp,
4012
      "beparams": cluster.beparams,
4013
      "osparams": cluster.osparams,
4014
      "nicparams": cluster.nicparams,
4015
      "candidate_pool_size": cluster.candidate_pool_size,
4016
      "master_netdev": cluster.master_netdev,
4017
      "volume_group_name": cluster.volume_group_name,
4018
      "file_storage_dir": cluster.file_storage_dir,
4019
      "maintain_node_health": cluster.maintain_node_health,
4020
      "ctime": cluster.ctime,
4021
      "mtime": cluster.mtime,
4022
      "uuid": cluster.uuid,
4023
      "tags": list(cluster.GetTags()),
4024
      "uid_pool": cluster.uid_pool,
4025
      }
4026

    
4027
    return result
4028

    
4029

    
4030
class LUQueryConfigValues(NoHooksLU):
4031
  """Return configuration values.
4032

4033
  """
4034
  _OP_REQP = []
4035
  REQ_BGL = False
4036
  _FIELDS_DYNAMIC = utils.FieldSet()
4037
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4038
                                  "watcher_pause")
4039

    
4040
  def CheckArguments(self):
4041
    _CheckOutputFields(static=self._FIELDS_STATIC,
4042
                       dynamic=self._FIELDS_DYNAMIC,
4043
                       selected=self.op.output_fields)
4044

    
4045
  def ExpandNames(self):
4046
    self.needed_locks = {}
4047

    
4048
  def Exec(self, feedback_fn):
4049
    """Dump a representation of the cluster config to the standard output.
4050

4051
    """
4052
    values = []
4053
    for field in self.op.output_fields:
4054
      if field == "cluster_name":
4055
        entry = self.cfg.GetClusterName()
4056
      elif field == "master_node":
4057
        entry = self.cfg.GetMasterNode()
4058
      elif field == "drain_flag":
4059
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4060
      elif field == "watcher_pause":
4061
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4062
      else:
4063
        raise errors.ParameterError(field)
4064
      values.append(entry)
4065
    return values
4066

    
4067

    
4068
class LUActivateInstanceDisks(NoHooksLU):
4069
  """Bring up an instance's disks.
4070

4071
  """
4072
  _OP_REQP = [("instance_name", _TNEString)]
4073
  _OP_DEFS = [("ignore_size", False)]
4074
  REQ_BGL = False
4075

    
4076
  def ExpandNames(self):
4077
    self._ExpandAndLockInstance()
4078
    self.needed_locks[locking.LEVEL_NODE] = []
4079
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4080

    
4081
  def DeclareLocks(self, level):
4082
    if level == locking.LEVEL_NODE:
4083
      self._LockInstancesNodes()
4084

    
4085
  def CheckPrereq(self):
4086
    """Check prerequisites.
4087

4088
    This checks that the instance is in the cluster.
4089

4090
    """
4091
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4092
    assert self.instance is not None, \
4093
      "Cannot retrieve locked instance %s" % self.op.instance_name
4094
    _CheckNodeOnline(self, self.instance.primary_node)
4095

    
4096
  def Exec(self, feedback_fn):
4097
    """Activate the disks.
4098

4099
    """
4100
    disks_ok, disks_info = \
4101
              _AssembleInstanceDisks(self, self.instance,
4102
                                     ignore_size=self.op.ignore_size)
4103
    if not disks_ok:
4104
      raise errors.OpExecError("Cannot activate block devices")
4105

    
4106
    return disks_info
4107

    
4108

    
4109
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4110
                           ignore_size=False):
4111
  """Prepare the block devices for an instance.
4112

4113
  This sets up the block devices on all nodes.
4114

4115
  @type lu: L{LogicalUnit}
4116
  @param lu: the logical unit on whose behalf we execute
4117
  @type instance: L{objects.Instance}
4118
  @param instance: the instance for whose disks we assemble
4119
  @type disks: list of L{objects.Disk} or None
4120
  @param disks: which disks to assemble (or all, if None)
4121
  @type ignore_secondaries: boolean
4122
  @param ignore_secondaries: if true, errors on secondary nodes
4123
      won't result in an error return from the function
4124
  @type ignore_size: boolean
4125
  @param ignore_size: if true, the current known size of the disk
4126
      will not be used during the disk activation, useful for cases
4127
      when the size is wrong
4128
  @return: False if the operation failed, otherwise a list of
4129
      (host, instance_visible_name, node_visible_name)
4130
      with the mapping from node devices to instance devices
4131

4132
  """
4133
  device_info = []
4134
  disks_ok = True
4135
  iname = instance.name
4136
  disks = _ExpandCheckDisks(instance, disks)
4137

    
4138
  # With the two passes mechanism we try to reduce the window of
4139
  # opportunity for the race condition of switching DRBD to primary
4140
  # before handshaking occured, but we do not eliminate it
4141

    
4142
  # The proper fix would be to wait (with some limits) until the
4143
  # connection has been made and drbd transitions from WFConnection
4144
  # into any other network-connected state (Connected, SyncTarget,
4145
  # SyncSource, etc.)
4146

    
4147
  # 1st pass, assemble on all nodes in secondary mode
4148
  for inst_disk in disks:
4149
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4150
      if ignore_size:
4151
        node_disk = node_disk.Copy()
4152
        node_disk.UnsetSize()
4153
      lu.cfg.SetDiskID(node_disk, node)
4154
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4155
      msg = result.fail_msg
4156
      if msg:
4157
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4158
                           " (is_primary=False, pass=1): %s",
4159
                           inst_disk.iv_name, node, msg)
4160
        if not ignore_secondaries:
4161
          disks_ok = False
4162

    
4163
  # FIXME: race condition on drbd migration to primary
4164

    
4165
  # 2nd pass, do only the primary node
4166
  for inst_disk in disks:
4167
    dev_path = None
4168

    
4169
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4170
      if node != instance.primary_node:
4171
        continue
4172
      if ignore_size:
4173
        node_disk = node_disk.Copy()
4174
        node_disk.UnsetSize()
4175
      lu.cfg.SetDiskID(node_disk, node)
4176
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4177
      msg = result.fail_msg
4178
      if msg:
4179
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4180
                           " (is_primary=True, pass=2): %s",
4181
                           inst_disk.iv_name, node, msg)
4182
        disks_ok = False
4183
      else:
4184
        dev_path = result.payload
4185

    
4186
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4187

    
4188
  # leave the disks configured for the primary node
4189
  # this is a workaround that would be fixed better by
4190
  # improving the logical/physical id handling
4191
  for disk in disks:
4192
    lu.cfg.SetDiskID(disk, instance.primary_node)
4193

    
4194
  return disks_ok, device_info
4195

    
4196

    
4197
def _StartInstanceDisks(lu, instance, force):
4198
  """Start the disks of an instance.
4199

4200
  """
4201
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4202
                                           ignore_secondaries=force)
4203
  if not disks_ok:
4204
    _ShutdownInstanceDisks(lu, instance)
4205
    if force is not None and not force:
4206
      lu.proc.LogWarning("", hint="If the message above refers to a"
4207
                         " secondary node,"
4208
                         " you can retry the operation using '--force'.")
4209
    raise errors.OpExecError("Disk consistency error")
4210

    
4211

    
4212
class LUDeactivateInstanceDisks(NoHooksLU):
4213
  """Shutdown an instance's disks.
4214

4215
  """
4216
  _OP_REQP = [("instance_name", _TNEString)]
4217
  REQ_BGL = False
4218

    
4219
  def ExpandNames(self):
4220
    self._ExpandAndLockInstance()
4221
    self.needed_locks[locking.LEVEL_NODE] = []
4222
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4223

    
4224
  def DeclareLocks(self, level):
4225
    if level == locking.LEVEL_NODE:
4226
      self._LockInstancesNodes()
4227

    
4228
  def CheckPrereq(self):
4229
    """Check prerequisites.
4230

4231
    This checks that the instance is in the cluster.
4232

4233
    """
4234
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4235
    assert self.instance is not None, \
4236
      "Cannot retrieve locked instance %s" % self.op.instance_name
4237

    
4238
  def Exec(self, feedback_fn):
4239
    """Deactivate the disks
4240

4241
    """
4242
    instance = self.instance
4243
    _SafeShutdownInstanceDisks(self, instance)
4244

    
4245

    
4246
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4247
  """Shutdown block devices of an instance.
4248

4249
  This function checks if an instance is running, before calling
4250
  _ShutdownInstanceDisks.
4251

4252
  """
4253
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4254
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4255

    
4256

    
4257
def _ExpandCheckDisks(instance, disks):
4258
  """Return the instance disks selected by the disks list
4259

4260
  @type disks: list of L{objects.Disk} or None
4261
  @param disks: selected disks
4262
  @rtype: list of L{objects.Disk}
4263
  @return: selected instance disks to act on
4264

4265
  """
4266
  if disks is None:
4267
    return instance.disks
4268
  else:
4269
    if not set(disks).issubset(instance.disks):
4270
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4271
                                   " target instance")
4272
    return disks
4273

    
4274

    
4275
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4276
  """Shutdown block devices of an instance.
4277

4278
  This does the shutdown on all nodes of the instance.
4279

4280
  If the ignore_primary is false, errors on the primary node are
4281
  ignored.
4282

4283
  """
4284
  all_result = True
4285
  disks = _ExpandCheckDisks(instance, disks)
4286

    
4287
  for disk in disks:
4288
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4289
      lu.cfg.SetDiskID(top_disk, node)
4290
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4291
      msg = result.fail_msg
4292
      if msg:
4293
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4294
                      disk.iv_name, node, msg)
4295
        if not ignore_primary or node != instance.primary_node:
4296
          all_result = False
4297
  return all_result
4298

    
4299

    
4300
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4301
  """Checks if a node has enough free memory.
4302

4303
  This function check if a given node has the needed amount of free
4304
  memory. In case the node has less memory or we cannot get the
4305
  information from the node, this function raise an OpPrereqError
4306
  exception.
4307

4308
  @type lu: C{LogicalUnit}
4309
  @param lu: a logical unit from which we get configuration data
4310
  @type node: C{str}
4311
  @param node: the node to check
4312
  @type reason: C{str}
4313
  @param reason: string to use in the error message
4314
  @type requested: C{int}
4315
  @param requested: the amount of memory in MiB to check for
4316
  @type hypervisor_name: C{str}
4317
  @param hypervisor_name: the hypervisor to ask for memory stats
4318
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4319
      we cannot check the node
4320

4321
  """
4322
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4323
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4324
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4325
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4326
  if not isinstance(free_mem, int):
4327
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4328
                               " was '%s'" % (node, free_mem),
4329
                               errors.ECODE_ENVIRON)
4330
  if requested > free_mem:
4331
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4332
                               " needed %s MiB, available %s MiB" %
4333
                               (node, reason, requested, free_mem),
4334
                               errors.ECODE_NORES)
4335

    
4336

    
4337
def _CheckNodesFreeDisk(lu, nodenames, requested):
4338
  """Checks if nodes have enough free disk space in the default VG.
4339

4340
  This function check if all given nodes have the needed amount of
4341
  free disk. In case any node has less disk or we cannot get the
4342
  information from the node, this function raise an OpPrereqError
4343
  exception.
4344

4345
  @type lu: C{LogicalUnit}
4346
  @param lu: a logical unit from which we get configuration data
4347
  @type nodenames: C{list}
4348
  @param nodenames: the list of node names to check
4349
  @type requested: C{int}
4350
  @param requested: the amount of disk in MiB to check for
4351
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4352
      we cannot check the node
4353

4354
  """
4355
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4356
                                   lu.cfg.GetHypervisorType())
4357
  for node in nodenames:
4358
    info = nodeinfo[node]
4359
    info.Raise("Cannot get current information from node %s" % node,
4360
               prereq=True, ecode=errors.ECODE_ENVIRON)
4361
    vg_free = info.payload.get("vg_free", None)
4362
    if not isinstance(vg_free, int):
4363
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4364
                                 " result was '%s'" % (node, vg_free),
4365
                                 errors.ECODE_ENVIRON)
4366
    if requested > vg_free:
4367
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4368
                                 " required %d MiB, available %d MiB" %
4369
                                 (node, requested, vg_free),
4370
                                 errors.ECODE_NORES)
4371

    
4372

    
4373
class LUStartupInstance(LogicalUnit):
4374
  """Starts an instance.
4375

4376
  """
4377
  HPATH = "instance-start"
4378
  HTYPE = constants.HTYPE_INSTANCE
4379
  _OP_REQP = [
4380
    ("instance_name", _TNEString),
4381
    ("force", _TBool),
4382
    ("beparams", _TDict),
4383
    ("hvparams", _TDict),
4384
    ]
4385
  _OP_DEFS = [
4386
    ("beparams", _EmptyDict),
4387
    ("hvparams", _EmptyDict),
4388
    ]
4389
  REQ_BGL = False
4390

    
4391
  def CheckArguments(self):
4392
    # extra beparams
4393
    if self.op.beparams:
4394
      # fill the beparams dict
4395
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4396

    
4397
  def ExpandNames(self):
4398
    self._ExpandAndLockInstance()
4399

    
4400
  def BuildHooksEnv(self):
4401
    """Build hooks env.
4402

4403
    This runs on master, primary and secondary nodes of the instance.
4404

4405
    """
4406
    env = {
4407
      "FORCE": self.op.force,
4408
      }
4409
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4410
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4411
    return env, nl, nl
4412

    
4413
  def CheckPrereq(self):
4414
    """Check prerequisites.
4415

4416
    This checks that the instance is in the cluster.
4417

4418
    """
4419
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4420
    assert self.instance is not None, \
4421
      "Cannot retrieve locked instance %s" % self.op.instance_name
4422

    
4423
    # extra hvparams
4424
    if self.op.hvparams:
4425
      # check hypervisor parameter syntax (locally)
4426
      cluster = self.cfg.GetClusterInfo()
4427
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4428
      filled_hvp = cluster.FillHV(instance)
4429
      filled_hvp.update(self.op.hvparams)
4430
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4431
      hv_type.CheckParameterSyntax(filled_hvp)
4432
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4433

    
4434
    _CheckNodeOnline(self, instance.primary_node)
4435

    
4436
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4437
    # check bridges existence
4438
    _CheckInstanceBridgesExist(self, instance)
4439

    
4440
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4441
                                              instance.name,
4442
                                              instance.hypervisor)
4443
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4444
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4445
    if not remote_info.payload: # not running already
4446
      _CheckNodeFreeMemory(self, instance.primary_node,
4447
                           "starting instance %s" % instance.name,
4448
                           bep[constants.BE_MEMORY], instance.hypervisor)
4449

    
4450
  def Exec(self, feedback_fn):
4451
    """Start the instance.
4452

4453
    """
4454
    instance = self.instance
4455
    force = self.op.force
4456

    
4457
    self.cfg.MarkInstanceUp(instance.name)
4458

    
4459
    node_current = instance.primary_node
4460

    
4461
    _StartInstanceDisks(self, instance, force)
4462

    
4463
    result = self.rpc.call_instance_start(node_current, instance,
4464
                                          self.op.hvparams, self.op.beparams)
4465
    msg = result.fail_msg
4466
    if msg:
4467
      _ShutdownInstanceDisks(self, instance)
4468
      raise errors.OpExecError("Could not start instance: %s" % msg)
4469

    
4470

    
4471
class LURebootInstance(LogicalUnit):
4472
  """Reboot an instance.
4473

4474
  """
4475
  HPATH = "instance-reboot"
4476
  HTYPE = constants.HTYPE_INSTANCE
4477
  _OP_REQP = [
4478
    ("instance_name", _TNEString),
4479
    ("ignore_secondaries", _TBool),
4480
    ("reboot_type", _TElemOf(constants.REBOOT_TYPES)),
4481
    ]
4482
  _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4483
  REQ_BGL = False
4484

    
4485
  def ExpandNames(self):
4486
    self._ExpandAndLockInstance()
4487

    
4488
  def BuildHooksEnv(self):
4489
    """Build hooks env.
4490

4491
    This runs on master, primary and secondary nodes of the instance.
4492

4493
    """
4494
    env = {
4495
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4496
      "REBOOT_TYPE": self.op.reboot_type,
4497
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4498
      }
4499
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4500
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4501
    return env, nl, nl
4502

    
4503
  def CheckPrereq(self):
4504
    """Check prerequisites.
4505

4506
    This checks that the instance is in the cluster.
4507

4508
    """
4509
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4510
    assert self.instance is not None, \
4511
      "Cannot retrieve locked instance %s" % self.op.instance_name
4512

    
4513
    _CheckNodeOnline(self, instance.primary_node)
4514

    
4515
    # check bridges existence
4516
    _CheckInstanceBridgesExist(self, instance)
4517

    
4518
  def Exec(self, feedback_fn):
4519
    """Reboot the instance.
4520

4521
    """
4522
    instance = self.instance
4523
    ignore_secondaries = self.op.ignore_secondaries
4524
    reboot_type = self.op.reboot_type
4525

    
4526
    node_current = instance.primary_node
4527

    
4528
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4529
                       constants.INSTANCE_REBOOT_HARD]:
4530
      for disk in instance.disks:
4531
        self.cfg.SetDiskID(disk, node_current)
4532
      result = self.rpc.call_instance_reboot(node_current, instance,
4533
                                             reboot_type,
4534
                                             self.op.shutdown_timeout)
4535
      result.Raise("Could not reboot instance")
4536
    else:
4537
      result = self.rpc.call_instance_shutdown(node_current, instance,
4538
                                               self.op.shutdown_timeout)
4539
      result.Raise("Could not shutdown instance for full reboot")
4540
      _ShutdownInstanceDisks(self, instance)
4541
      _StartInstanceDisks(self, instance, ignore_secondaries)
4542
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4543
      msg = result.fail_msg
4544
      if msg:
4545
        _ShutdownInstanceDisks(self, instance)
4546
        raise errors.OpExecError("Could not start instance for"
4547
                                 " full reboot: %s" % msg)
4548

    
4549
    self.cfg.MarkInstanceUp(instance.name)
4550

    
4551

    
4552
class LUShutdownInstance(LogicalUnit):
4553
  """Shutdown an instance.
4554

4555
  """
4556
  HPATH = "instance-stop"
4557
  HTYPE = constants.HTYPE_INSTANCE
4558
  _OP_REQP = [("instance_name", _TNEString)]
4559
  _OP_DEFS = [("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4560
  REQ_BGL = False
4561

    
4562
  def ExpandNames(self):
4563
    self._ExpandAndLockInstance()
4564

    
4565
  def BuildHooksEnv(self):
4566
    """Build hooks env.
4567

4568
    This runs on master, primary and secondary nodes of the instance.
4569

4570
    """
4571
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4572
    env["TIMEOUT"] = self.op.timeout
4573
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4574
    return env, nl, nl
4575

    
4576
  def CheckPrereq(self):
4577
    """Check prerequisites.
4578

4579
    This checks that the instance is in the cluster.
4580

4581
    """
4582
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4583
    assert self.instance is not None, \
4584
      "Cannot retrieve locked instance %s" % self.op.instance_name
4585
    _CheckNodeOnline(self, self.instance.primary_node)
4586

    
4587
  def Exec(self, feedback_fn):
4588
    """Shutdown the instance.
4589

4590
    """
4591
    instance = self.instance
4592
    node_current = instance.primary_node
4593
    timeout = self.op.timeout
4594
    self.cfg.MarkInstanceDown(instance.name)
4595
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4596
    msg = result.fail_msg
4597
    if msg:
4598
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4599

    
4600
    _ShutdownInstanceDisks(self, instance)
4601

    
4602

    
4603
class LUReinstallInstance(LogicalUnit):
4604
  """Reinstall an instance.
4605

4606
  """
4607
  HPATH = "instance-reinstall"
4608
  HTYPE = constants.HTYPE_INSTANCE
4609
  _OP_REQP = [("instance_name", _TNEString)]
4610
  _OP_DEFS = [
4611
    ("os_type", None),
4612
    ("force_variant", False),
4613
    ]
4614
  REQ_BGL = False
4615

    
4616
  def ExpandNames(self):
4617
    self._ExpandAndLockInstance()
4618

    
4619
  def BuildHooksEnv(self):
4620
    """Build hooks env.
4621

4622
    This runs on master, primary and secondary nodes of the instance.
4623

4624
    """
4625
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4626
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4627
    return env, nl, nl
4628

    
4629
  def CheckPrereq(self):
4630
    """Check prerequisites.
4631

4632
    This checks that the instance is in the cluster and is not running.
4633

4634
    """
4635
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4636
    assert instance is not None, \
4637
      "Cannot retrieve locked instance %s" % self.op.instance_name
4638
    _CheckNodeOnline(self, instance.primary_node)
4639

    
4640
    if instance.disk_template == constants.DT_DISKLESS:
4641
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4642
                                 self.op.instance_name,
4643
                                 errors.ECODE_INVAL)
4644
    _CheckInstanceDown(self, instance, "cannot reinstall")
4645

    
4646
    if self.op.os_type is not None:
4647
      # OS verification
4648
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4649
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4650

    
4651
    self.instance = instance
4652

    
4653
  def Exec(self, feedback_fn):
4654
    """Reinstall the instance.
4655

4656
    """
4657
    inst = self.instance
4658

    
4659
    if self.op.os_type is not None:
4660
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4661
      inst.os = self.op.os_type
4662
      self.cfg.Update(inst, feedback_fn)
4663

    
4664
    _StartInstanceDisks(self, inst, None)
4665
    try:
4666
      feedback_fn("Running the instance OS create scripts...")
4667
      # FIXME: pass debug option from opcode to backend
4668
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4669
                                             self.op.debug_level)
4670
      result.Raise("Could not install OS for instance %s on node %s" %
4671
                   (inst.name, inst.primary_node))
4672
    finally:
4673
      _ShutdownInstanceDisks(self, inst)
4674

    
4675

    
4676
class LURecreateInstanceDisks(LogicalUnit):
4677
  """Recreate an instance's missing disks.
4678

4679
  """
4680
  HPATH = "instance-recreate-disks"
4681
  HTYPE = constants.HTYPE_INSTANCE
4682
  _OP_REQP = [
4683
    ("instance_name", _TNEString),
4684
    ("disks", _TListOf(_TPInt)),
4685
    ]
4686
  REQ_BGL = False
4687

    
4688
  def ExpandNames(self):
4689
    self._ExpandAndLockInstance()
4690

    
4691
  def BuildHooksEnv(self):
4692
    """Build hooks env.
4693

4694
    This runs on master, primary and secondary nodes of the instance.
4695

4696
    """
4697
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4698
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4699
    return env, nl, nl
4700

    
4701
  def CheckPrereq(self):
4702
    """Check prerequisites.
4703

4704
    This checks that the instance is in the cluster and is not running.
4705

4706
    """
4707
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4708
    assert instance is not None, \
4709
      "Cannot retrieve locked instance %s" % self.op.instance_name
4710
    _CheckNodeOnline(self, instance.primary_node)
4711

    
4712
    if instance.disk_template == constants.DT_DISKLESS:
4713
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4714
                                 self.op.instance_name, errors.ECODE_INVAL)
4715
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4716

    
4717
    if not self.op.disks:
4718
      self.op.disks = range(len(instance.disks))
4719
    else:
4720
      for idx in self.op.disks:
4721
        if idx >= len(instance.disks):
4722
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4723
                                     errors.ECODE_INVAL)
4724

    
4725
    self.instance = instance
4726

    
4727
  def Exec(self, feedback_fn):
4728
    """Recreate the disks.
4729

4730
    """
4731
    to_skip = []
4732
    for idx, _ in enumerate(self.instance.disks):
4733
      if idx not in self.op.disks: # disk idx has not been passed in
4734
        to_skip.append(idx)
4735
        continue
4736

    
4737
    _CreateDisks(self, self.instance, to_skip=to_skip)
4738

    
4739

    
4740
class LURenameInstance(LogicalUnit):
4741
  """Rename an instance.
4742

4743
  """
4744
  HPATH = "instance-rename"
4745
  HTYPE = constants.HTYPE_INSTANCE
4746
  _OP_REQP = [
4747
    ("instance_name", _TNEString),
4748
    ("new_name", _TNEString),
4749
    ]
4750
  _OP_DEFS = [("ignore_ip", False)]
4751

    
4752
  def BuildHooksEnv(self):
4753
    """Build hooks env.
4754

4755
    This runs on master, primary and secondary nodes of the instance.
4756

4757
    """
4758
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4759
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4760
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4761
    return env, nl, nl
4762

    
4763
  def CheckPrereq(self):
4764
    """Check prerequisites.
4765

4766
    This checks that the instance is in the cluster and is not running.
4767

4768
    """
4769
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4770
                                                self.op.instance_name)
4771
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4772
    assert instance is not None
4773
    _CheckNodeOnline(self, instance.primary_node)
4774
    _CheckInstanceDown(self, instance, "cannot rename")
4775
    self.instance = instance
4776

    
4777
    # new name verification
4778
    name_info = utils.GetHostInfo(self.op.new_name)
4779

    
4780
    self.op.new_name = new_name = name_info.name
4781
    instance_list = self.cfg.GetInstanceList()
4782
    if new_name in instance_list:
4783
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4784
                                 new_name, errors.ECODE_EXISTS)
4785

    
4786
    if not self.op.ignore_ip:
4787
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4788
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4789
                                   (name_info.ip, new_name),
4790
                                   errors.ECODE_NOTUNIQUE)
4791

    
4792
  def Exec(self, feedback_fn):
4793
    """Reinstall the instance.
4794

4795
    """
4796
    inst = self.instance
4797
    old_name = inst.name
4798

    
4799
    if inst.disk_template == constants.DT_FILE:
4800
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4801

    
4802
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4803
    # Change the instance lock. This is definitely safe while we hold the BGL
4804
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4805
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4806

    
4807
    # re-read the instance from the configuration after rename
4808
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4809

    
4810
    if inst.disk_template == constants.DT_FILE:
4811
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4812
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4813
                                                     old_file_storage_dir,
4814
                                                     new_file_storage_dir)
4815
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4816
                   " (but the instance has been renamed in Ganeti)" %
4817
                   (inst.primary_node, old_file_storage_dir,
4818
                    new_file_storage_dir))
4819

    
4820
    _StartInstanceDisks(self, inst, None)
4821
    try:
4822
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4823
                                                 old_name, self.op.debug_level)
4824
      msg = result.fail_msg
4825
      if msg:
4826
        msg = ("Could not run OS rename script for instance %s on node %s"
4827
               " (but the instance has been renamed in Ganeti): %s" %
4828
               (inst.name, inst.primary_node, msg))
4829
        self.proc.LogWarning(msg)
4830
    finally:
4831
      _ShutdownInstanceDisks(self, inst)
4832

    
4833

    
4834
class LURemoveInstance(LogicalUnit):
4835
  """Remove an instance.
4836

4837
  """
4838
  HPATH = "instance-remove"
4839
  HTYPE = constants.HTYPE_INSTANCE
4840
  _OP_REQP = [
4841
    ("instance_name", _TNEString),
4842
    ("ignore_failures", _TBool),
4843
    ]
4844
  _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4845
  REQ_BGL = False
4846

    
4847
  def ExpandNames(self):
4848
    self._ExpandAndLockInstance()
4849
    self.needed_locks[locking.LEVEL_NODE] = []
4850
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4851

    
4852
  def DeclareLocks(self, level):
4853
    if level == locking.LEVEL_NODE:
4854
      self._LockInstancesNodes()
4855

    
4856
  def BuildHooksEnv(self):
4857
    """Build hooks env.
4858

4859
    This runs on master, primary and secondary nodes of the instance.
4860

4861
    """
4862
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4863
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4864
    nl = [self.cfg.GetMasterNode()]
4865
    nl_post = list(self.instance.all_nodes) + nl
4866
    return env, nl, nl_post
4867

    
4868
  def CheckPrereq(self):
4869
    """Check prerequisites.
4870

4871
    This checks that the instance is in the cluster.
4872

4873
    """
4874
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4875
    assert self.instance is not None, \
4876
      "Cannot retrieve locked instance %s" % self.op.instance_name
4877

    
4878
  def Exec(self, feedback_fn):
4879
    """Remove the instance.
4880

4881
    """
4882
    instance = self.instance
4883
    logging.info("Shutting down instance %s on node %s",
4884
                 instance.name, instance.primary_node)
4885

    
4886
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4887
                                             self.op.shutdown_timeout)
4888
    msg = result.fail_msg
4889
    if msg:
4890
      if self.op.ignore_failures:
4891
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4892
      else:
4893
        raise errors.OpExecError("Could not shutdown instance %s on"
4894
                                 " node %s: %s" %
4895
                                 (instance.name, instance.primary_node, msg))
4896

    
4897
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4898

    
4899

    
4900
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4901
  """Utility function to remove an instance.
4902

4903
  """
4904
  logging.info("Removing block devices for instance %s", instance.name)
4905

    
4906
  if not _RemoveDisks(lu, instance):
4907
    if not ignore_failures:
4908
      raise errors.OpExecError("Can't remove instance's disks")
4909
    feedback_fn("Warning: can't remove instance's disks")
4910

    
4911
  logging.info("Removing instance %s out of cluster config", instance.name)
4912

    
4913
  lu.cfg.RemoveInstance(instance.name)
4914

    
4915
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4916
    "Instance lock removal conflict"
4917

    
4918
  # Remove lock for the instance
4919
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4920

    
4921

    
4922
class LUQueryInstances(NoHooksLU):
4923
  """Logical unit for querying instances.
4924

4925
  """
4926
  # pylint: disable-msg=W0142
4927
  _OP_REQP = [
4928
    ("output_fields", _TListOf(_TNEString)),
4929
    ("names", _TListOf(_TNEString)),
4930
    ("use_locking", _TBool),
4931
    ]
4932
  REQ_BGL = False
4933
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4934
                    "serial_no", "ctime", "mtime", "uuid"]
4935
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4936
                                    "admin_state",
4937
                                    "disk_template", "ip", "mac", "bridge",
4938
                                    "nic_mode", "nic_link",
4939
                                    "sda_size", "sdb_size", "vcpus", "tags",
4940
                                    "network_port", "beparams",
4941
                                    r"(disk)\.(size)/([0-9]+)",
4942
                                    r"(disk)\.(sizes)", "disk_usage",
4943
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4944
                                    r"(nic)\.(bridge)/([0-9]+)",
4945
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4946
                                    r"(disk|nic)\.(count)",
4947
                                    "hvparams",
4948
                                    ] + _SIMPLE_FIELDS +
4949
                                  ["hv/%s" % name
4950
                                   for name in constants.HVS_PARAMETERS
4951
                                   if name not in constants.HVC_GLOBALS] +
4952
                                  ["be/%s" % name
4953
                                   for name in constants.BES_PARAMETERS])
4954
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4955

    
4956

    
4957
  def CheckArguments(self):
4958
    _CheckOutputFields(static=self._FIELDS_STATIC,
4959
                       dynamic=self._FIELDS_DYNAMIC,
4960
                       selected=self.op.output_fields)
4961

    
4962
  def ExpandNames(self):
4963
    self.needed_locks = {}
4964
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4965
    self.share_locks[locking.LEVEL_NODE] = 1
4966

    
4967
    if self.op.names:
4968
      self.wanted = _GetWantedInstances(self, self.op.names)
4969
    else:
4970
      self.wanted = locking.ALL_SET
4971

    
4972
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4973
    self.do_locking = self.do_node_query and self.op.use_locking
4974
    if self.do_locking:
4975
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4976
      self.needed_locks[locking.LEVEL_NODE] = []
4977
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4978

    
4979
  def DeclareLocks(self, level):
4980
    if level == locking.LEVEL_NODE and self.do_locking:
4981
      self._LockInstancesNodes()
4982

    
4983
  def Exec(self, feedback_fn):
4984
    """Computes the list of nodes and their attributes.
4985

4986
    """
4987
    # pylint: disable-msg=R0912
4988
    # way too many branches here
4989
    all_info = self.cfg.GetAllInstancesInfo()
4990
    if self.wanted == locking.ALL_SET:
4991
      # caller didn't specify instance names, so ordering is not important
4992
      if self.do_locking:
4993
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4994
      else:
4995
        instance_names = all_info.keys()
4996
      instance_names = utils.NiceSort(instance_names)
4997
    else:
4998
      # caller did specify names, so we must keep the ordering
4999
      if self.do_locking:
5000
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5001
      else:
5002
        tgt_set = all_info.keys()
5003
      missing = set(self.wanted).difference(tgt_set)
5004
      if missing:
5005
        raise errors.OpExecError("Some instances were removed before"
5006
                                 " retrieving their data: %s" % missing)
5007
      instance_names = self.wanted
5008

    
5009
    instance_list = [all_info[iname] for iname in instance_names]
5010

    
5011
    # begin data gathering
5012

    
5013
    nodes = frozenset([inst.primary_node for inst in instance_list])
5014
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
5015

    
5016
    bad_nodes = []
5017
    off_nodes = []
5018
    if self.do_node_query:
5019
      live_data = {}
5020
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5021
      for name in nodes:
5022
        result = node_data[name]
5023
        if result.offline:
5024
          # offline nodes will be in both lists
5025
          off_nodes.append(name)
5026
        if result.fail_msg:
5027
          bad_nodes.append(name)
5028
        else:
5029
          if result.payload:
5030
            live_data.update(result.payload)
5031
          # else no instance is alive
5032
    else:
5033
      live_data = dict([(name, {}) for name in instance_names])
5034

    
5035
    # end data gathering
5036

    
5037
    HVPREFIX = "hv/"
5038
    BEPREFIX = "be/"
5039
    output = []
5040
    cluster = self.cfg.GetClusterInfo()
5041
    for instance in instance_list:
5042
      iout = []
5043
      i_hv = cluster.FillHV(instance, skip_globals=True)
5044
      i_be = cluster.FillBE(instance)
5045
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5046
      for field in self.op.output_fields:
5047
        st_match = self._FIELDS_STATIC.Matches(field)
5048
        if field in self._SIMPLE_FIELDS:
5049
          val = getattr(instance, field)
5050
        elif field == "pnode":
5051
          val = instance.primary_node
5052
        elif field == "snodes":
5053
          val = list(instance.secondary_nodes)
5054
        elif field == "admin_state":
5055
          val = instance.admin_up
5056
        elif field == "oper_state":
5057
          if instance.primary_node in bad_nodes:
5058
            val = None
5059
          else:
5060
            val = bool(live_data.get(instance.name))
5061
        elif field == "status":
5062
          if instance.primary_node in off_nodes:
5063
            val = "ERROR_nodeoffline"
5064
          elif instance.primary_node in bad_nodes:
5065
            val = "ERROR_nodedown"
5066
          else:
5067
            running = bool(live_data.get(instance.name))
5068
            if running:
5069
              if instance.admin_up:
5070
                val = "running"
5071
              else:
5072
                val = "ERROR_up"
5073
            else:
5074
              if instance.admin_up:
5075
                val = "ERROR_down"
5076
              else:
5077
                val = "ADMIN_down"
5078
        elif field == "oper_ram":
5079
          if instance.primary_node in bad_nodes:
5080
            val = None
5081
          elif instance.name in live_data:
5082
            val = live_data[instance.name].get("memory", "?")
5083
          else:
5084
            val = "-"
5085
        elif field == "vcpus":
5086
          val = i_be[constants.BE_VCPUS]
5087
        elif field == "disk_template":
5088
          val = instance.disk_template
5089
        elif field == "ip":
5090
          if instance.nics:
5091
            val = instance.nics[0].ip
5092
          else:
5093
            val = None
5094
        elif field == "nic_mode":
5095
          if instance.nics:
5096
            val = i_nicp[0][constants.NIC_MODE]
5097
          else:
5098
            val = None
5099
        elif field == "nic_link":
5100
          if instance.nics:
5101
            val = i_nicp[0][constants.NIC_LINK]
5102
          else:
5103
            val = None
5104
        elif field == "bridge":
5105
          if (instance.nics and
5106
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5107
            val = i_nicp[0][constants.NIC_LINK]
5108
          else:
5109
            val = None
5110
        elif field == "mac":
5111
          if instance.nics:
5112
            val = instance.nics[0].mac
5113
          else:
5114
            val = None
5115
        elif field == "sda_size" or field == "sdb_size":
5116
          idx = ord(field[2]) - ord('a')
5117
          try:
5118
            val = instance.FindDisk(idx).size
5119
          except errors.OpPrereqError:
5120
            val = None
5121
        elif field == "disk_usage": # total disk usage per node
5122
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5123
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5124
        elif field == "tags":
5125
          val = list(instance.GetTags())
5126
        elif field == "hvparams":
5127
          val = i_hv
5128
        elif (field.startswith(HVPREFIX) and
5129
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5130
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5131
          val = i_hv.get(field[len(HVPREFIX):], None)
5132
        elif field == "beparams":
5133
          val = i_be
5134
        elif (field.startswith(BEPREFIX) and
5135
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5136
          val = i_be.get(field[len(BEPREFIX):], None)
5137
        elif st_match and st_match.groups():
5138
          # matches a variable list
5139
          st_groups = st_match.groups()
5140
          if st_groups and st_groups[0] == "disk":
5141
            if st_groups[1] == "count":
5142
              val = len(instance.disks)
5143
            elif st_groups[1] == "sizes":
5144
              val = [disk.size for disk in instance.disks]
5145
            elif st_groups[1] == "size":
5146
              try:
5147
                val = instance.FindDisk(st_groups[2]).size
5148
              except errors.OpPrereqError:
5149
                val = None
5150
            else:
5151
              assert False, "Unhandled disk parameter"
5152
          elif st_groups[0] == "nic":
5153
            if st_groups[1] == "count":
5154
              val = len(instance.nics)
5155
            elif st_groups[1] == "macs":
5156
              val = [nic.mac for nic in instance.nics]
5157
            elif st_groups[1] == "ips":
5158
              val = [nic.ip for nic in instance.nics]
5159
            elif st_groups[1] == "modes":
5160
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5161
            elif st_groups[1] == "links":
5162
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5163
            elif st_groups[1] == "bridges":
5164
              val = []
5165
              for nicp in i_nicp:
5166
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5167
                  val.append(nicp[constants.NIC_LINK])
5168
                else:
5169
                  val.append(None)
5170
            else:
5171
              # index-based item
5172
              nic_idx = int(st_groups[2])
5173
              if nic_idx >= len(instance.nics):
5174
                val = None
5175
              else:
5176
                if st_groups[1] == "mac":
5177
                  val = instance.nics[nic_idx].mac
5178
                elif st_groups[1] == "ip":
5179
                  val = instance.nics[nic_idx].ip
5180
                elif st_groups[1] == "mode":
5181
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5182
                elif st_groups[1] == "link":
5183
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5184
                elif st_groups[1] == "bridge":
5185
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5186
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5187
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5188
                  else:
5189
                    val = None
5190
                else:
5191
                  assert False, "Unhandled NIC parameter"
5192
          else:
5193
            assert False, ("Declared but unhandled variable parameter '%s'" %
5194
                           field)
5195
        else:
5196
          assert False, "Declared but unhandled parameter '%s'" % field
5197
        iout.append(val)
5198
      output.append(iout)
5199

    
5200
    return output
5201

    
5202

    
5203
class LUFailoverInstance(LogicalUnit):
5204
  """Failover an instance.
5205

5206
  """
5207
  HPATH = "instance-failover"
5208
  HTYPE = constants.HTYPE_INSTANCE
5209
  _OP_REQP = [
5210
    ("instance_name", _TNEString),
5211
    ("ignore_consistency", _TBool),
5212
    ]
5213
  _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5214
  REQ_BGL = False
5215

    
5216
  def ExpandNames(self):
5217
    self._ExpandAndLockInstance()
5218
    self.needed_locks[locking.LEVEL_NODE] = []
5219
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5220

    
5221
  def DeclareLocks(self, level):
5222
    if level == locking.LEVEL_NODE:
5223
      self._LockInstancesNodes()
5224

    
5225
  def BuildHooksEnv(self):
5226
    """Build hooks env.
5227

5228
    This runs on master, primary and secondary nodes of the instance.
5229

5230
    """
5231
    instance = self.instance
5232
    source_node = instance.primary_node
5233
    target_node = instance.secondary_nodes[0]
5234
    env = {
5235
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5236
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5237
      "OLD_PRIMARY": source_node,
5238
      "OLD_SECONDARY": target_node,
5239
      "NEW_PRIMARY": target_node,
5240
      "NEW_SECONDARY": source_node,
5241
      }
5242
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5243
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5244
    nl_post = list(nl)
5245
    nl_post.append(source_node)
5246
    return env, nl, nl_post
5247

    
5248
  def CheckPrereq(self):
5249
    """Check prerequisites.
5250

5251
    This checks that the instance is in the cluster.
5252

5253
    """
5254
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5255
    assert self.instance is not None, \
5256
      "Cannot retrieve locked instance %s" % self.op.instance_name
5257

    
5258
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5259
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5260
      raise errors.OpPrereqError("Instance's disk layout is not"
5261
                                 " network mirrored, cannot failover.",
5262
                                 errors.ECODE_STATE)
5263

    
5264
    secondary_nodes = instance.secondary_nodes
5265
    if not secondary_nodes:
5266
      raise errors.ProgrammerError("no secondary node but using "
5267
                                   "a mirrored disk template")
5268

    
5269
    target_node = secondary_nodes[0]
5270
    _CheckNodeOnline(self, target_node)
5271
    _CheckNodeNotDrained(self, target_node)
5272
    if instance.admin_up:
5273
      # check memory requirements on the secondary node
5274
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5275
                           instance.name, bep[constants.BE_MEMORY],
5276
                           instance.hypervisor)
5277
    else:
5278
      self.LogInfo("Not checking memory on the secondary node as"
5279
                   " instance will not be started")
5280

    
5281
    # check bridge existance
5282
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5283

    
5284
  def Exec(self, feedback_fn):
5285
    """Failover an instance.
5286

5287
    The failover is done by shutting it down on its present node and
5288
    starting it on the secondary.
5289

5290
    """
5291
    instance = self.instance
5292

    
5293
    source_node = instance.primary_node
5294
    target_node = instance.secondary_nodes[0]
5295

    
5296
    if instance.admin_up:
5297
      feedback_fn("* checking disk consistency between source and target")
5298
      for dev in instance.disks:
5299
        # for drbd, these are drbd over lvm
5300
        if not _CheckDiskConsistency(self, dev, target_node, False):
5301
          if not self.op.ignore_consistency:
5302
            raise errors.OpExecError("Disk %s is degraded on target node,"
5303
                                     " aborting failover." % dev.iv_name)
5304
    else:
5305
      feedback_fn("* not checking disk consistency as instance is not running")
5306

    
5307
    feedback_fn("* shutting down instance on source node")
5308
    logging.info("Shutting down instance %s on node %s",
5309
                 instance.name, source_node)
5310

    
5311
    result = self.rpc.call_instance_shutdown(source_node, instance,
5312
                                             self.op.shutdown_timeout)
5313
    msg = result.fail_msg
5314
    if msg:
5315
      if self.op.ignore_consistency:
5316
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5317
                             " Proceeding anyway. Please make sure node"
5318
                             " %s is down. Error details: %s",
5319
                             instance.name, source_node, source_node, msg)
5320
      else:
5321
        raise errors.OpExecError("Could not shutdown instance %s on"
5322
                                 " node %s: %s" %
5323
                                 (instance.name, source_node, msg))
5324

    
5325
    feedback_fn("* deactivating the instance's disks on source node")
5326
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5327
      raise errors.OpExecError("Can't shut down the instance's disks.")
5328

    
5329
    instance.primary_node = target_node
5330
    # distribute new instance config to the other nodes
5331
    self.cfg.Update(instance, feedback_fn)
5332

    
5333
    # Only start the instance if it's marked as up
5334
    if instance.admin_up:
5335
      feedback_fn("* activating the instance's disks on target node")
5336
      logging.info("Starting instance %s on node %s",
5337
                   instance.name, target_node)
5338

    
5339
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5340
                                           ignore_secondaries=True)
5341
      if not disks_ok:
5342
        _ShutdownInstanceDisks(self, instance)
5343
        raise errors.OpExecError("Can't activate the instance's disks")
5344

    
5345
      feedback_fn("* starting the instance on the target node")
5346
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5347
      msg = result.fail_msg
5348
      if msg:
5349
        _ShutdownInstanceDisks(self, instance)
5350
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5351
                                 (instance.name, target_node, msg))
5352

    
5353

    
5354
class LUMigrateInstance(LogicalUnit):
5355
  """Migrate an instance.
5356

5357
  This is migration without shutting down, compared to the failover,
5358
  which is done with shutdown.
5359

5360
  """
5361
  HPATH = "instance-migrate"
5362
  HTYPE = constants.HTYPE_INSTANCE
5363
  _OP_REQP = [
5364
    ("instance_name", _TNEString),
5365
    ("live", _TBool),
5366
    ("cleanup", _TBool),
5367
    ]
5368

    
5369
  REQ_BGL = False
5370

    
5371
  def ExpandNames(self):
5372
    self._ExpandAndLockInstance()
5373

    
5374
    self.needed_locks[locking.LEVEL_NODE] = []
5375
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5376

    
5377
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5378
                                       self.op.live, self.op.cleanup)
5379
    self.tasklets = [self._migrater]
5380

    
5381
  def DeclareLocks(self, level):
5382
    if level == locking.LEVEL_NODE:
5383
      self._LockInstancesNodes()
5384

    
5385
  def BuildHooksEnv(self):
5386
    """Build hooks env.
5387

5388
    This runs on master, primary and secondary nodes of the instance.
5389

5390
    """
5391
    instance = self._migrater.instance
5392
    source_node = instance.primary_node
5393
    target_node = instance.secondary_nodes[0]
5394
    env = _BuildInstanceHookEnvByObject(self, instance)
5395
    env["MIGRATE_LIVE"] = self.op.live
5396
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5397
    env.update({
5398
        "OLD_PRIMARY": source_node,
5399
        "OLD_SECONDARY": target_node,
5400
        "NEW_PRIMARY": target_node,
5401
        "NEW_SECONDARY": source_node,
5402
        })
5403
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5404
    nl_post = list(nl)
5405
    nl_post.append(source_node)
5406
    return env, nl, nl_post
5407

    
5408

    
5409
class LUMoveInstance(LogicalUnit):
5410
  """Move an instance by data-copying.
5411

5412
  """
5413
  HPATH = "instance-move"
5414
  HTYPE = constants.HTYPE_INSTANCE
5415
  _OP_REQP = [
5416
    ("instance_name", _TNEString),
5417
    ("target_node", _TNEString),
5418
    ]
5419
  _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5420
  REQ_BGL = False
5421

    
5422
  def ExpandNames(self):
5423
    self._ExpandAndLockInstance()
5424
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5425
    self.op.target_node = target_node
5426
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5427
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5428

    
5429
  def DeclareLocks(self, level):
5430
    if level == locking.LEVEL_NODE:
5431
      self._LockInstancesNodes(primary_only=True)
5432

    
5433
  def BuildHooksEnv(self):
5434
    """Build hooks env.
5435

5436
    This runs on master, primary and secondary nodes of the instance.
5437

5438
    """
5439
    env = {
5440
      "TARGET_NODE": self.op.target_node,
5441
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5442
      }
5443
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5444
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5445
                                       self.op.target_node]
5446
    return env, nl, nl
5447

    
5448
  def CheckPrereq(self):
5449
    """Check prerequisites.
5450

5451
    This checks that the instance is in the cluster.
5452

5453
    """
5454
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5455
    assert self.instance is not None, \
5456
      "Cannot retrieve locked instance %s" % self.op.instance_name
5457

    
5458
    node = self.cfg.GetNodeInfo(self.op.target_node)
5459
    assert node is not None, \
5460
      "Cannot retrieve locked node %s" % self.op.target_node
5461

    
5462
    self.target_node = target_node = node.name
5463

    
5464
    if target_node == instance.primary_node:
5465
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5466
                                 (instance.name, target_node),
5467
                                 errors.ECODE_STATE)
5468

    
5469
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5470

    
5471
    for idx, dsk in enumerate(instance.disks):
5472
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5473
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5474
                                   " cannot copy" % idx, errors.ECODE_STATE)
5475

    
5476
    _CheckNodeOnline(self, target_node)
5477
    _CheckNodeNotDrained(self, target_node)
5478

    
5479
    if instance.admin_up:
5480
      # check memory requirements on the secondary node
5481
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5482
                           instance.name, bep[constants.BE_MEMORY],
5483
                           instance.hypervisor)
5484
    else:
5485
      self.LogInfo("Not checking memory on the secondary node as"
5486
                   " instance will not be started")
5487

    
5488
    # check bridge existance
5489
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5490

    
5491
  def Exec(self, feedback_fn):
5492
    """Move an instance.
5493

5494
    The move is done by shutting it down on its present node, copying
5495
    the data over (slow) and starting it on the new node.
5496

5497
    """
5498
    instance = self.instance
5499

    
5500
    source_node = instance.primary_node
5501
    target_node = self.target_node
5502

    
5503
    self.LogInfo("Shutting down instance %s on source node %s",
5504
                 instance.name, source_node)
5505

    
5506
    result = self.rpc.call_instance_shutdown(source_node, instance,
5507
                                             self.op.shutdown_timeout)
5508
    msg = result.fail_msg
5509
    if msg:
5510
      if self.op.ignore_consistency:
5511
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5512
                             " Proceeding anyway. Please make sure node"
5513
                             " %s is down. Error details: %s",
5514
                             instance.name, source_node, source_node, msg)
5515
      else:
5516
        raise errors.OpExecError("Could not shutdown instance %s on"
5517
                                 " node %s: %s" %
5518
                                 (instance.name, source_node, msg))
5519

    
5520
    # create the target disks
5521
    try:
5522
      _CreateDisks(self, instance, target_node=target_node)
5523
    except errors.OpExecError:
5524
      self.LogWarning("Device creation failed, reverting...")
5525
      try:
5526
        _RemoveDisks(self, instance, target_node=target_node)
5527
      finally:
5528
        self.cfg.ReleaseDRBDMinors(instance.name)
5529
        raise
5530

    
5531
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5532

    
5533
    errs = []
5534
    # activate, get path, copy the data over
5535
    for idx, disk in enumerate(instance.disks):
5536
      self.LogInfo("Copying data for disk %d", idx)
5537
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5538
                                               instance.name, True)
5539
      if result.fail_msg:
5540
        self.LogWarning("Can't assemble newly created disk %d: %s",
5541
                        idx, result.fail_msg)
5542
        errs.append(result.fail_msg)
5543
        break
5544
      dev_path = result.payload
5545
      result = self.rpc.call_blockdev_export(source_node, disk,
5546
                                             target_node, dev_path,
5547
                                             cluster_name)
5548
      if result.fail_msg:
5549
        self.LogWarning("Can't copy data over for disk %d: %s",
5550
                        idx, result.fail_msg)
5551
        errs.append(result.fail_msg)
5552
        break
5553

    
5554
    if errs:
5555
      self.LogWarning("Some disks failed to copy, aborting")
5556
      try:
5557
        _RemoveDisks(self, instance, target_node=target_node)
5558
      finally:
5559
        self.cfg.ReleaseDRBDMinors(instance.name)
5560
        raise errors.OpExecError("Errors during disk copy: %s" %
5561
                                 (",".join(errs),))
5562

    
5563
    instance.primary_node = target_node
5564
    self.cfg.Update(instance, feedback_fn)
5565

    
5566
    self.LogInfo("Removing the disks on the original node")
5567
    _RemoveDisks(self, instance, target_node=source_node)
5568

    
5569
    # Only start the instance if it's marked as up
5570
    if instance.admin_up:
5571
      self.LogInfo("Starting instance %s on node %s",
5572
                   instance.name, target_node)
5573

    
5574
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5575
                                           ignore_secondaries=True)
5576
      if not disks_ok:
5577
        _ShutdownInstanceDisks(self, instance)
5578
        raise errors.OpExecError("Can't activate the instance's disks")
5579

    
5580
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5581
      msg = result.fail_msg
5582
      if msg:
5583
        _ShutdownInstanceDisks(self, instance)
5584
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5585
                                 (instance.name, target_node, msg))
5586

    
5587

    
5588
class LUMigrateNode(LogicalUnit):
5589
  """Migrate all instances from a node.
5590

5591
  """
5592
  HPATH = "node-migrate"
5593
  HTYPE = constants.HTYPE_NODE
5594
  _OP_REQP = [
5595
    ("node_name", _TNEString),
5596
    ("live", _TBool),
5597
    ]
5598
  REQ_BGL = False
5599

    
5600
  def ExpandNames(self):
5601
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5602

    
5603
    self.needed_locks = {
5604
      locking.LEVEL_NODE: [self.op.node_name],
5605
      }
5606

    
5607
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5608

    
5609
    # Create tasklets for migrating instances for all instances on this node
5610
    names = []
5611
    tasklets = []
5612

    
5613
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5614
      logging.debug("Migrating instance %s", inst.name)
5615
      names.append(inst.name)
5616

    
5617
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5618

    
5619
    self.tasklets = tasklets
5620

    
5621
    # Declare instance locks
5622
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5623

    
5624
  def DeclareLocks(self, level):
5625
    if level == locking.LEVEL_NODE:
5626
      self._LockInstancesNodes()
5627

    
5628
  def BuildHooksEnv(self):
5629
    """Build hooks env.
5630

5631
    This runs on the master, the primary and all the secondaries.
5632

5633
    """
5634
    env = {
5635
      "NODE_NAME": self.op.node_name,
5636
      }
5637

    
5638
    nl = [self.cfg.GetMasterNode()]
5639

    
5640
    return (env, nl, nl)
5641

    
5642

    
5643
class TLMigrateInstance(Tasklet):
5644
  def __init__(self, lu, instance_name, live, cleanup):
5645
    """Initializes this class.
5646

5647
    """
5648
    Tasklet.__init__(self, lu)
5649

    
5650
    # Parameters
5651
    self.instance_name = instance_name
5652
    self.live = live
5653
    self.cleanup = cleanup
5654

    
5655
  def CheckPrereq(self):
5656
    """Check prerequisites.
5657

5658
    This checks that the instance is in the cluster.
5659

5660
    """
5661
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5662
    instance = self.cfg.GetInstanceInfo(instance_name)
5663
    assert instance is not None
5664

    
5665
    if instance.disk_template != constants.DT_DRBD8:
5666
      raise errors.OpPrereqError("Instance's disk layout is not"
5667
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5668

    
5669
    secondary_nodes = instance.secondary_nodes
5670
    if not secondary_nodes:
5671
      raise errors.ConfigurationError("No secondary node but using"
5672
                                      " drbd8 disk template")
5673

    
5674
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5675

    
5676
    target_node = secondary_nodes[0]
5677
    # check memory requirements on the secondary node
5678
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5679
                         instance.name, i_be[constants.BE_MEMORY],
5680
                         instance.hypervisor)
5681

    
5682
    # check bridge existance
5683
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5684

    
5685
    if not self.cleanup:
5686
      _CheckNodeNotDrained(self.lu, target_node)
5687
      result = self.rpc.call_instance_migratable(instance.primary_node,
5688
                                                 instance)
5689
      result.Raise("Can't migrate, please use failover",
5690
                   prereq=True, ecode=errors.ECODE_STATE)
5691

    
5692
    self.instance = instance
5693

    
5694
  def _WaitUntilSync(self):
5695
    """Poll with custom rpc for disk sync.
5696

5697
    This uses our own step-based rpc call.
5698

5699
    """
5700
    self.feedback_fn("* wait until resync is done")
5701
    all_done = False
5702
    while not all_done:
5703
      all_done = True
5704
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5705
                                            self.nodes_ip,
5706
                                            self.instance.disks)
5707
      min_percent = 100
5708
      for node, nres in result.items():
5709
        nres.Raise("Cannot resync disks on node %s" % node)
5710
        node_done, node_percent = nres.payload
5711
        all_done = all_done and node_done
5712
        if node_percent is not None:
5713
          min_percent = min(min_percent, node_percent)
5714
      if not all_done:
5715
        if min_percent < 100:
5716
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5717
        time.sleep(2)
5718

    
5719
  def _EnsureSecondary(self, node):
5720
    """Demote a node to secondary.
5721

5722
    """
5723
    self.feedback_fn("* switching node %s to secondary mode" % node)
5724

    
5725
    for dev in self.instance.disks:
5726
      self.cfg.SetDiskID(dev, node)
5727

    
5728
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5729
                                          self.instance.disks)
5730
    result.Raise("Cannot change disk to secondary on node %s" % node)
5731

    
5732
  def _GoStandalone(self):
5733
    """Disconnect from the network.
5734

5735
    """
5736
    self.feedback_fn("* changing into standalone mode")
5737
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5738
                                               self.instance.disks)
5739
    for node, nres in result.items():
5740
      nres.Raise("Cannot disconnect disks node %s" % node)
5741

    
5742
  def _GoReconnect(self, multimaster):
5743
    """Reconnect to the network.
5744

5745
    """
5746
    if multimaster:
5747
      msg = "dual-master"
5748
    else:
5749
      msg = "single-master"
5750
    self.feedback_fn("* changing disks into %s mode" % msg)
5751
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5752
                                           self.instance.disks,
5753
                                           self.instance.name, multimaster)
5754
    for node, nres in result.items():
5755
      nres.Raise("Cannot change disks config on node %s" % node)
5756

    
5757
  def _ExecCleanup(self):
5758
    """Try to cleanup after a failed migration.
5759

5760
    The cleanup is done by:
5761
      - check that the instance is running only on one node
5762
        (and update the config if needed)
5763
      - change disks on its secondary node to secondary
5764
      - wait until disks are fully synchronized
5765
      - disconnect from the network
5766
      - change disks into single-master mode
5767
      - wait again until disks are fully synchronized
5768

5769
    """
5770
    instance = self.instance
5771
    target_node = self.target_node
5772
    source_node = self.source_node
5773

    
5774
    # check running on only one node
5775
    self.feedback_fn("* checking where the instance actually runs"
5776
                     " (if this hangs, the hypervisor might be in"
5777
                     " a bad state)")
5778
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5779
    for node, result in ins_l.items():
5780
      result.Raise("Can't contact node %s" % node)
5781

    
5782
    runningon_source = instance.name in ins_l[source_node].payload
5783
    runningon_target = instance.name in ins_l[target_node].payload
5784

    
5785
    if runningon_source and runningon_target:
5786
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5787
                               " or the hypervisor is confused. You will have"
5788
                               " to ensure manually that it runs only on one"
5789
                               " and restart this operation.")
5790

    
5791
    if not (runningon_source or runningon_target):
5792
      raise errors.OpExecError("Instance does not seem to be running at all."
5793
                               " In this case, it's safer to repair by"
5794
                               " running 'gnt-instance stop' to ensure disk"
5795
                               " shutdown, and then restarting it.")
5796

    
5797
    if runningon_target:
5798
      # the migration has actually succeeded, we need to update the config
5799
      self.feedback_fn("* instance running on secondary node (%s),"
5800
                       " updating config" % target_node)
5801
      instance.primary_node = target_node
5802
      self.cfg.Update(instance, self.feedback_fn)
5803
      demoted_node = source_node
5804
    else:
5805
      self.feedback_fn("* instance confirmed to be running on its"
5806
                       " primary node (%s)" % source_node)
5807
      demoted_node = target_node
5808

    
5809
    self._EnsureSecondary(demoted_node)
5810
    try:
5811
      self._WaitUntilSync()
5812
    except errors.OpExecError:
5813
      # we ignore here errors, since if the device is standalone, it
5814
      # won't be able to sync
5815
      pass
5816
    self._GoStandalone()
5817
    self._GoReconnect(False)
5818
    self._WaitUntilSync()
5819

    
5820
    self.feedback_fn("* done")
5821

    
5822
  def _RevertDiskStatus(self):
5823
    """Try to revert the disk status after a failed migration.
5824

5825
    """
5826
    target_node = self.target_node
5827
    try:
5828
      self._EnsureSecondary(target_node)
5829
      self._GoStandalone()
5830
      self._GoReconnect(False)
5831
      self._WaitUntilSync()
5832
    except errors.OpExecError, err:
5833
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5834
                         " drives: error '%s'\n"
5835
                         "Please look and recover the instance status" %
5836
                         str(err))
5837

    
5838
  def _AbortMigration(self):
5839
    """Call the hypervisor code to abort a started migration.
5840

5841
    """
5842
    instance = self.instance
5843
    target_node = self.target_node
5844
    migration_info = self.migration_info
5845

    
5846
    abort_result = self.rpc.call_finalize_migration(target_node,
5847
                                                    instance,
5848
                                                    migration_info,
5849
                                                    False)
5850
    abort_msg = abort_result.fail_msg
5851
    if abort_msg:
5852
      logging.error("Aborting migration failed on target node %s: %s",
5853
                    target_node, abort_msg)
5854
      # Don't raise an exception here, as we stil have to try to revert the
5855
      # disk status, even if this step failed.
5856

    
5857
  def _ExecMigration(self):
5858
    """Migrate an instance.
5859

5860
    The migrate is done by:
5861
      - change the disks into dual-master mode
5862
      - wait until disks are fully synchronized again
5863
      - migrate the instance
5864
      - change disks on the new secondary node (the old primary) to secondary
5865
      - wait until disks are fully synchronized
5866
      - change disks into single-master mode
5867

5868
    """
5869
    instance = self.instance
5870
    target_node = self.target_node
5871
    source_node = self.source_node
5872

    
5873
    self.feedback_fn("* checking disk consistency between source and target")
5874
    for dev in instance.disks:
5875
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5876
        raise errors.OpExecError("Disk %s is degraded or not fully"
5877
                                 " synchronized on target node,"
5878
                                 " aborting migrate." % dev.iv_name)
5879

    
5880
    # First get the migration information from the remote node
5881
    result = self.rpc.call_migration_info(source_node, instance)
5882
    msg = result.fail_msg
5883
    if msg:
5884
      log_err = ("Failed fetching source migration information from %s: %s" %
5885
                 (source_node, msg))
5886
      logging.error(log_err)
5887
      raise errors.OpExecError(log_err)
5888

    
5889
    self.migration_info = migration_info = result.payload
5890

    
5891
    # Then switch the disks to master/master mode
5892
    self._EnsureSecondary(target_node)
5893
    self._GoStandalone()
5894
    self._GoReconnect(True)
5895
    self._WaitUntilSync()
5896

    
5897
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5898
    result = self.rpc.call_accept_instance(target_node,
5899
                                           instance,
5900
                                           migration_info,
5901
                                           self.nodes_ip[target_node])
5902

    
5903
    msg = result.fail_msg
5904
    if msg:
5905
      logging.error("Instance pre-migration failed, trying to revert"
5906
                    " disk status: %s", msg)
5907
      self.feedback_fn("Pre-migration failed, aborting")
5908
      self._AbortMigration()
5909
      self._RevertDiskStatus()
5910
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5911
                               (instance.name, msg))
5912

    
5913
    self.feedback_fn("* migrating instance to %s" % target_node)
5914
    time.sleep(10)
5915
    result = self.rpc.call_instance_migrate(source_node, instance,
5916
                                            self.nodes_ip[target_node],
5917
                                            self.live)
5918
    msg = result.fail_msg
5919
    if msg:
5920
      logging.error("Instance migration failed, trying to revert"
5921
                    " disk status: %s", msg)
5922
      self.feedback_fn("Migration failed, aborting")
5923
      self._AbortMigration()
5924
      self._RevertDiskStatus()
5925
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5926
                               (instance.name, msg))
5927
    time.sleep(10)
5928

    
5929
    instance.primary_node = target_node
5930
    # distribute new instance config to the other nodes
5931
    self.cfg.Update(instance, self.feedback_fn)
5932

    
5933
    result = self.rpc.call_finalize_migration(target_node,
5934
                                              instance,
5935
                                              migration_info,
5936
                                              True)
5937
    msg = result.fail_msg
5938
    if msg:
5939
      logging.error("Instance migration succeeded, but finalization failed:"
5940
                    " %s", msg)
5941
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5942
                               msg)
5943

    
5944
    self._EnsureSecondary(source_node)
5945
    self._WaitUntilSync()
5946
    self._GoStandalone()
5947
    self._GoReconnect(False)
5948
    self._WaitUntilSync()
5949

    
5950
    self.feedback_fn("* done")
5951

    
5952
  def Exec(self, feedback_fn):
5953
    """Perform the migration.
5954

5955
    """
5956
    feedback_fn("Migrating instance %s" % self.instance.name)
5957

    
5958
    self.feedback_fn = feedback_fn
5959

    
5960
    self.source_node = self.instance.primary_node
5961
    self.target_node = self.instance.secondary_nodes[0]
5962
    self.all_nodes = [self.source_node, self.target_node]
5963
    self.nodes_ip = {
5964
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5965
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5966
      }
5967

    
5968
    if self.cleanup:
5969
      return self._ExecCleanup()
5970
    else:
5971
      return self._ExecMigration()
5972

    
5973

    
5974
def _CreateBlockDev(lu, node, instance, device, force_create,
5975
                    info, force_open):
5976
  """Create a tree of block devices on a given node.
5977

5978
  If this device type has to be created on secondaries, create it and
5979
  all its children.
5980

5981
  If not, just recurse to children keeping the same 'force' value.
5982

5983
  @param lu: the lu on whose behalf we execute
5984
  @param node: the node on which to create the device
5985
  @type instance: L{objects.Instance}
5986
  @param instance: the instance which owns the device
5987
  @type device: L{objects.Disk}
5988
  @param device: the device to create
5989
  @type force_create: boolean
5990
  @param force_create: whether to force creation of this device; this
5991
      will be change to True whenever we find a device which has
5992
      CreateOnSecondary() attribute
5993
  @param info: the extra 'metadata' we should attach to the device
5994
      (this will be represented as a LVM tag)
5995
  @type force_open: boolean
5996
  @param force_open: this parameter will be passes to the
5997
      L{backend.BlockdevCreate} function where it specifies
5998
      whether we run on primary or not, and it affects both
5999
      the child assembly and the device own Open() execution
6000

6001
  """
6002
  if device.CreateOnSecondary():
6003
    force_create = True
6004

    
6005
  if device.children:
6006
    for child in device.children:
6007
      _CreateBlockDev(lu, node, instance, child, force_create,
6008
                      info, force_open)
6009

    
6010
  if not force_create:
6011
    return
6012

    
6013
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6014

    
6015

    
6016
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6017
  """Create a single block device on a given node.
6018

6019
  This will not recurse over children of the device, so they must be
6020
  created in advance.
6021

6022
  @param lu: the lu on whose behalf we execute
6023
  @param node: the node on which to create the device
6024
  @type instance: L{objects.Instance}
6025
  @param instance: the instance which owns the device
6026
  @type device: L{objects.Disk}
6027
  @param device: the device to create
6028
  @param info: the extra 'metadata' we should attach to the device
6029
      (this will be represented as a LVM tag)
6030
  @type force_open: boolean
6031
  @param force_open: this parameter will be passes to the
6032
      L{backend.BlockdevCreate} function where it specifies
6033
      whether we run on primary or not, and it affects both
6034
      the child assembly and the device own Open() execution
6035

6036
  """
6037
  lu.cfg.SetDiskID(device, node)
6038
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6039
                                       instance.name, force_open, info)
6040
  result.Raise("Can't create block device %s on"
6041
               " node %s for instance %s" % (device, node, instance.name))
6042
  if device.physical_id is None:
6043
    device.physical_id = result.payload
6044

    
6045

    
6046
def _GenerateUniqueNames(lu, exts):
6047
  """Generate a suitable LV name.
6048

6049
  This will generate a logical volume name for the given instance.
6050

6051
  """
6052
  results = []
6053
  for val in exts:
6054
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6055
    results.append("%s%s" % (new_id, val))
6056
  return results
6057

    
6058

    
6059
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6060
                         p_minor, s_minor):
6061
  """Generate a drbd8 device complete with its children.
6062

6063
  """
6064
  port = lu.cfg.AllocatePort()
6065
  vgname = lu.cfg.GetVGName()
6066
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6067
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6068
                          logical_id=(vgname, names[0]))
6069
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6070
                          logical_id=(vgname, names[1]))
6071
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6072
                          logical_id=(primary, secondary, port,
6073
                                      p_minor, s_minor,
6074
                                      shared_secret),
6075
                          children=[dev_data, dev_meta],
6076
                          iv_name=iv_name)
6077
  return drbd_dev
6078

    
6079

    
6080
def _GenerateDiskTemplate(lu, template_name,
6081
                          instance_name, primary_node,
6082
                          secondary_nodes, disk_info,
6083
                          file_storage_dir, file_driver,
6084
                          base_index):
6085
  """Generate the entire disk layout for a given template type.
6086

6087
  """
6088
  #TODO: compute space requirements
6089

    
6090
  vgname = lu.cfg.GetVGName()
6091
  disk_count = len(disk_info)
6092
  disks = []
6093
  if template_name == constants.DT_DISKLESS:
6094
    pass
6095
  elif template_name == constants.DT_PLAIN:
6096
    if len(secondary_nodes) != 0:
6097
      raise errors.ProgrammerError("Wrong template configuration")
6098

    
6099
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6100
                                      for i in range(disk_count)])
6101
    for idx, disk in enumerate(disk_info):
6102
      disk_index = idx + base_index
6103
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6104
                              logical_id=(vgname, names[idx]),
6105
                              iv_name="disk/%d" % disk_index,
6106
                              mode=disk["mode"])
6107
      disks.append(disk_dev)
6108
  elif template_name == constants.DT_DRBD8:
6109
    if len(secondary_nodes) != 1:
6110
      raise errors.ProgrammerError("Wrong template configuration")
6111
    remote_node = secondary_nodes[0]
6112
    minors = lu.cfg.AllocateDRBDMinor(
6113
      [primary_node, remote_node] * len(disk_info), instance_name)
6114

    
6115
    names = []
6116
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6117
                                               for i in range(disk_count)]):
6118
      names.append(lv_prefix + "_data")
6119
      names.append(lv_prefix + "_meta")
6120
    for idx, disk in enumerate(disk_info):
6121
      disk_index = idx + base_index
6122
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6123
                                      disk["size"], names[idx*2:idx*2+2],
6124
                                      "disk/%d" % disk_index,
6125
                                      minors[idx*2], minors[idx*2+1])
6126
      disk_dev.mode = disk["mode"]
6127
      disks.append(disk_dev)
6128
  elif template_name == constants.DT_FILE:
6129
    if len(secondary_nodes) != 0:
6130
      raise errors.ProgrammerError("Wrong template configuration")
6131

    
6132
    _RequireFileStorage()
6133

    
6134
    for idx, disk in enumerate(disk_info):
6135
      disk_index = idx + base_index
6136
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6137
                              iv_name="disk/%d" % disk_index,
6138
                              logical_id=(file_driver,
6139
                                          "%s/disk%d" % (file_storage_dir,
6140
                                                         disk_index)),
6141
                              mode=disk["mode"])
6142
      disks.append(disk_dev)
6143
  else:
6144
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6145
  return disks
6146

    
6147

    
6148
def _GetInstanceInfoText(instance):
6149
  """Compute that text that should be added to the disk's metadata.
6150

6151
  """
6152
  return "originstname+%s" % instance.name
6153

    
6154

    
6155
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6156
  """Create all disks for an instance.
6157

6158
  This abstracts away some work from AddInstance.
6159

6160
  @type lu: L{LogicalUnit}
6161
  @param lu: the logical unit on whose behalf we execute
6162
  @type instance: L{objects.Instance}
6163
  @param instance: the instance whose disks we should create
6164
  @type to_skip: list
6165
  @param to_skip: list of indices to skip
6166
  @type target_node: string
6167
  @param target_node: if passed, overrides the target node for creation
6168
  @rtype: boolean
6169
  @return: the success of the creation
6170

6171
  """
6172
  info = _GetInstanceInfoText(instance)
6173
  if target_node is None:
6174
    pnode = instance.primary_node
6175
    all_nodes = instance.all_nodes
6176
  else:
6177
    pnode = target_node
6178
    all_nodes = [pnode]
6179

    
6180
  if instance.disk_template == constants.DT_FILE:
6181
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6182
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6183

    
6184
    result.Raise("Failed to create directory '%s' on"
6185
                 " node %s" % (file_storage_dir, pnode))
6186

    
6187
  # Note: this needs to be kept in sync with adding of disks in
6188
  # LUSetInstanceParams
6189
  for idx, device in enumerate(instance.disks):
6190
    if to_skip and idx in to_skip:
6191
      continue
6192
    logging.info("Creating volume %s for instance %s",
6193
                 device.iv_name, instance.name)
6194
    #HARDCODE
6195
    for node in all_nodes:
6196
      f_create = node == pnode
6197
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6198

    
6199

    
6200
def _RemoveDisks(lu, instance, target_node=None):
6201
  """Remove all disks for an instance.
6202

6203
  This abstracts away some work from `AddInstance()` and
6204
  `RemoveInstance()`. Note that in case some of the devices couldn't
6205
  be removed, the removal will continue with the other ones (compare
6206
  with `_CreateDisks()`).
6207

6208
  @type lu: L{LogicalUnit}
6209
  @param lu: the logical unit on whose behalf we execute
6210
  @type instance: L{objects.Instance}
6211
  @param instance: the instance whose disks we should remove
6212
  @type target_node: string
6213
  @param target_node: used to override the node on which to remove the disks
6214
  @rtype: boolean
6215
  @return: the success of the removal
6216

6217
  """
6218
  logging.info("Removing block devices for instance %s", instance.name)
6219

    
6220
  all_result = True
6221
  for device in instance.disks:
6222
    if target_node:
6223
      edata = [(target_node, device)]
6224
    else:
6225
      edata = device.ComputeNodeTree(instance.primary_node)
6226
    for node, disk in edata:
6227
      lu.cfg.SetDiskID(disk, node)
6228
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6229
      if msg:
6230
        lu.LogWarning("Could not remove block device %s on node %s,"
6231
                      " continuing anyway: %s", device.iv_name, node, msg)
6232
        all_result = False
6233

    
6234
  if instance.disk_template == constants.DT_FILE:
6235
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6236
    if target_node:
6237
      tgt = target_node
6238
    else:
6239
      tgt = instance.primary_node
6240
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6241
    if result.fail_msg:
6242
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6243
                    file_storage_dir, instance.primary_node, result.fail_msg)
6244
      all_result = False
6245

    
6246
  return all_result
6247

    
6248

    
6249
def _ComputeDiskSize(disk_template, disks):
6250
  """Compute disk size requirements in the volume group
6251

6252
  """
6253
  # Required free disk space as a function of disk and swap space
6254
  req_size_dict = {
6255
    constants.DT_DISKLESS: None,
6256
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6257
    # 128 MB are added for drbd metadata for each disk
6258
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6259
    constants.DT_FILE: None,
6260
  }
6261

    
6262
  if disk_template not in req_size_dict:
6263
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6264
                                 " is unknown" %  disk_template)
6265

    
6266
  return req_size_dict[disk_template]
6267

    
6268

    
6269
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6270
  """Hypervisor parameter validation.
6271

6272
  This function abstract the hypervisor parameter validation to be
6273
  used in both instance create and instance modify.
6274

6275
  @type lu: L{LogicalUnit}
6276
  @param lu: the logical unit for which we check
6277
  @type nodenames: list
6278
  @param nodenames: the list of nodes on which we should check
6279
  @type hvname: string
6280
  @param hvname: the name of the hypervisor we should use
6281
  @type hvparams: dict
6282
  @param hvparams: the parameters which we need to check
6283
  @raise errors.OpPrereqError: if the parameters are not valid
6284

6285
  """
6286
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6287
                                                  hvname,
6288
                                                  hvparams)
6289
  for node in nodenames:
6290
    info = hvinfo[node]
6291
    if info.offline:
6292
      continue
6293
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6294

    
6295

    
6296
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6297
  """OS parameters validation.
6298

6299
  @type lu: L{LogicalUnit}
6300
  @param lu: the logical unit for which we check
6301
  @type required: boolean
6302
  @param required: whether the validation should fail if the OS is not
6303
      found
6304
  @type nodenames: list
6305
  @param nodenames: the list of nodes on which we should check
6306
  @type osname: string
6307
  @param osname: the name of the hypervisor we should use
6308
  @type osparams: dict
6309
  @param osparams: the parameters which we need to check
6310
  @raise errors.OpPrereqError: if the parameters are not valid
6311

6312
  """
6313
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6314
                                   [constants.OS_VALIDATE_PARAMETERS],
6315
                                   osparams)
6316
  for node, nres in result.items():
6317
    # we don't check for offline cases since this should be run only
6318
    # against the master node and/or an instance's nodes
6319
    nres.Raise("OS Parameters validation failed on node %s" % node)
6320
    if not nres.payload:
6321
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6322
                 osname, node)
6323

    
6324

    
6325
class LUCreateInstance(LogicalUnit):
6326
  """Create an instance.
6327

6328
  """
6329
  HPATH = "instance-add"
6330
  HTYPE = constants.HTYPE_INSTANCE
6331
  _OP_REQP = [
6332
    ("instance_name", _TNEString),
6333
    ("mode", _TElemOf(constants.INSTANCE_CREATE_MODES)),
6334
    ("start", _TBool),
6335
    ("wait_for_sync", _TBool),
6336
    ("ip_check", _TBool),
6337
    ("disks", _TListOf(_TDict)),
6338
    ("nics", _TListOf(_TDict)),
6339
    ("hvparams", _TDict),
6340
    ("beparams", _TDict),
6341
    ("osparams", _TDict),
6342
    ]
6343
  _OP_DEFS = [
6344
    ("name_check", True),
6345
    ("no_install", False),
6346
    ("os_type", None),
6347
    ("force_variant", False),
6348
    ("source_handshake", None),
6349
    ("source_x509_ca", None),
6350
    ("source_instance_name", None),
6351
    ("src_node", None),
6352
    ("src_path", None),
6353
    ("pnode", None),
6354
    ("snode", None),
6355
    ("iallocator", None),
6356
    ("hypervisor", None),
6357
    ("disk_template", None),
6358
    ("identify_defaults", None),
6359
    ]
6360
  REQ_BGL = False
6361

    
6362
  def CheckArguments(self):
6363
    """Check arguments.
6364

6365
    """
6366
    # do not require name_check to ease forward/backward compatibility
6367
    # for tools
6368
    if self.op.no_install and self.op.start:
6369
      self.LogInfo("No-installation mode selected, disabling startup")
6370
      self.op.start = False
6371
    # validate/normalize the instance name
6372
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6373
    if self.op.ip_check and not self.op.name_check:
6374
      # TODO: make the ip check more flexible and not depend on the name check
6375
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6376
                                 errors.ECODE_INVAL)
6377

    
6378
    # check nics' parameter names
6379
    for nic in self.op.nics:
6380
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6381

    
6382
    # check disks. parameter names and consistent adopt/no-adopt strategy
6383
    has_adopt = has_no_adopt = False
6384
    for disk in self.op.disks:
6385
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6386
      if "adopt" in disk:
6387
        has_adopt = True
6388
      else:
6389
        has_no_adopt = True
6390
    if has_adopt and has_no_adopt:
6391
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6392
                                 errors.ECODE_INVAL)
6393
    if has_adopt:
6394
      if self.op.disk_template != constants.DT_PLAIN:
6395
        raise errors.OpPrereqError("Disk adoption is only supported for the"
6396
                                   " 'plain' disk template",
6397
                                   errors.ECODE_INVAL)
6398
      if self.op.iallocator is not None:
6399
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6400
                                   " iallocator script", errors.ECODE_INVAL)
6401
      if self.op.mode == constants.INSTANCE_IMPORT:
6402
        raise errors.OpPrereqError("Disk adoption not allowed for"
6403
                                   " instance import", errors.ECODE_INVAL)
6404

    
6405
    self.adopt_disks = has_adopt
6406

    
6407
    # instance name verification
6408
    if self.op.name_check:
6409
      self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6410
      self.op.instance_name = self.hostname1.name
6411
      # used in CheckPrereq for ip ping check
6412
      self.check_ip = self.hostname1.ip
6413
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6414
      raise errors.OpPrereqError("Remote imports require names to be checked" %
6415
                                 errors.ECODE_INVAL)
6416
    else:
6417
      self.check_ip = None
6418

    
6419
    # file storage checks
6420
    if (self.op.file_driver and
6421
        not self.op.file_driver in constants.FILE_DRIVER):
6422
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6423
                                 self.op.file_driver, errors.ECODE_INVAL)
6424

    
6425
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6426
      raise errors.OpPrereqError("File storage directory path not absolute",
6427
                                 errors.ECODE_INVAL)
6428

    
6429
    ### Node/iallocator related checks
6430
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6431
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6432
                                 " node must be given",
6433
                                 errors.ECODE_INVAL)
6434

    
6435
    self._cds = _GetClusterDomainSecret()
6436

    
6437
    if self.op.mode == constants.INSTANCE_IMPORT:
6438
      # On import force_variant must be True, because if we forced it at
6439
      # initial install, our only chance when importing it back is that it
6440
      # works again!
6441
      self.op.force_variant = True
6442

    
6443
      if self.op.no_install:
6444
        self.LogInfo("No-installation mode has no effect during import")
6445

    
6446
    elif self.op.mode == constants.INSTANCE_CREATE:
6447
      if self.op.os_type is None:
6448
        raise errors.OpPrereqError("No guest OS specified",
6449
                                   errors.ECODE_INVAL)
6450
      if self.op.disk_template is None:
6451
        raise errors.OpPrereqError("No disk template specified",
6452
                                   errors.ECODE_INVAL)
6453

    
6454
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6455
      # Check handshake to ensure both clusters have the same domain secret
6456
      src_handshake = self.op.source_handshake
6457
      if not src_handshake:
6458
        raise errors.OpPrereqError("Missing source handshake",
6459
                                   errors.ECODE_INVAL)
6460

    
6461
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6462
                                                           src_handshake)
6463
      if errmsg:
6464
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6465
                                   errors.ECODE_INVAL)
6466

    
6467
      # Load and check source CA
6468
      self.source_x509_ca_pem = self.op.source_x509_ca
6469
      if not self.source_x509_ca_pem:
6470
        raise errors.OpPrereqError("Missing source X509 CA",
6471
                                   errors.ECODE_INVAL)
6472

    
6473
      try:
6474
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6475
                                                    self._cds)
6476
      except OpenSSL.crypto.Error, err:
6477
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6478
                                   (err, ), errors.ECODE_INVAL)
6479

    
6480
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6481
      if errcode is not None:
6482
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6483
                                   errors.ECODE_INVAL)
6484

    
6485
      self.source_x509_ca = cert
6486

    
6487
      src_instance_name = self.op.source_instance_name
6488
      if not src_instance_name:
6489
        raise errors.OpPrereqError("Missing source instance name",
6490
                                   errors.ECODE_INVAL)
6491

    
6492
      self.source_instance_name = \
6493
        utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6494

    
6495
    else:
6496
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6497
                                 self.op.mode, errors.ECODE_INVAL)
6498

    
6499
  def ExpandNames(self):
6500
    """ExpandNames for CreateInstance.
6501

6502
    Figure out the right locks for instance creation.
6503

6504
    """
6505
    self.needed_locks = {}
6506

    
6507
    instance_name = self.op.instance_name
6508
    # this is just a preventive check, but someone might still add this
6509
    # instance in the meantime, and creation will fail at lock-add time
6510
    if instance_name in self.cfg.GetInstanceList():
6511
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6512
                                 instance_name, errors.ECODE_EXISTS)
6513

    
6514
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6515

    
6516
    if self.op.iallocator:
6517
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6518
    else:
6519
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6520
      nodelist = [self.op.pnode]
6521
      if self.op.snode is not None:
6522
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6523
        nodelist.append(self.op.snode)
6524
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6525

    
6526
    # in case of import lock the source node too
6527
    if self.op.mode == constants.INSTANCE_IMPORT:
6528
      src_node = self.op.src_node
6529
      src_path = self.op.src_path
6530

    
6531
      if src_path is None:
6532
        self.op.src_path = src_path = self.op.instance_name
6533

    
6534
      if src_node is None:
6535
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6536
        self.op.src_node = None
6537
        if os.path.isabs(src_path):
6538
          raise errors.OpPrereqError("Importing an instance from an absolute"
6539
                                     " path requires a source node option.",
6540
                                     errors.ECODE_INVAL)
6541
      else:
6542
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6543
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6544
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6545
        if not os.path.isabs(src_path):
6546
          self.op.src_path = src_path = \
6547
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6548

    
6549
  def _RunAllocator(self):
6550
    """Run the allocator based on input opcode.
6551

6552
    """
6553
    nics = [n.ToDict() for n in self.nics]
6554
    ial = IAllocator(self.cfg, self.rpc,
6555
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6556
                     name=self.op.instance_name,
6557
                     disk_template=self.op.disk_template,
6558
                     tags=[],
6559
                     os=self.op.os_type,
6560
                     vcpus=self.be_full[constants.BE_VCPUS],
6561
                     mem_size=self.be_full[constants.BE_MEMORY],
6562
                     disks=self.disks,
6563
                     nics=nics,
6564
                     hypervisor=self.op.hypervisor,
6565
                     )
6566

    
6567
    ial.Run(self.op.iallocator)
6568

    
6569
    if not ial.success:
6570
      raise errors.OpPrereqError("Can't compute nodes using"
6571
                                 " iallocator '%s': %s" %
6572
                                 (self.op.iallocator, ial.info),
6573
                                 errors.ECODE_NORES)
6574
    if len(ial.result) != ial.required_nodes:
6575
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6576
                                 " of nodes (%s), required %s" %
6577
                                 (self.op.iallocator, len(ial.result),
6578
                                  ial.required_nodes), errors.ECODE_FAULT)
6579
    self.op.pnode = ial.result[0]
6580
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6581
                 self.op.instance_name, self.op.iallocator,
6582
                 utils.CommaJoin(ial.result))
6583
    if ial.required_nodes == 2:
6584
      self.op.snode = ial.result[1]
6585

    
6586
  def BuildHooksEnv(self):
6587
    """Build hooks env.
6588

6589
    This runs on master, primary and secondary nodes of the instance.
6590

6591
    """
6592
    env = {
6593
      "ADD_MODE": self.op.mode,
6594
      }
6595
    if self.op.mode == constants.INSTANCE_IMPORT:
6596
      env["SRC_NODE"] = self.op.src_node
6597
      env["SRC_PATH"] = self.op.src_path
6598
      env["SRC_IMAGES"] = self.src_images
6599

    
6600
    env.update(_BuildInstanceHookEnv(
6601
      name=self.op.instance_name,
6602
      primary_node=self.op.pnode,
6603
      secondary_nodes=self.secondaries,
6604
      status=self.op.start,
6605
      os_type=self.op.os_type,
6606
      memory=self.be_full[constants.BE_MEMORY],
6607
      vcpus=self.be_full[constants.BE_VCPUS],
6608
      nics=_NICListToTuple(self, self.nics),
6609
      disk_template=self.op.disk_template,
6610
      disks=[(d["size"], d["mode"]) for d in self.disks],
6611
      bep=self.be_full,
6612
      hvp=self.hv_full,
6613
      hypervisor_name=self.op.hypervisor,
6614
    ))
6615

    
6616
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6617
          self.secondaries)
6618
    return env, nl, nl
6619

    
6620
  def _ReadExportInfo(self):
6621
    """Reads the export information from disk.
6622

6623
    It will override the opcode source node and path with the actual
6624
    information, if these two were not specified before.
6625

6626
    @return: the export information
6627

6628
    """
6629
    assert self.op.mode == constants.INSTANCE_IMPORT
6630

    
6631
    src_node = self.op.src_node
6632
    src_path = self.op.src_path
6633

    
6634
    if src_node is None:
6635
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6636
      exp_list = self.rpc.call_export_list(locked_nodes)
6637
      found = False
6638
      for node in exp_list:
6639
        if exp_list[node].fail_msg:
6640
          continue
6641
        if src_path in exp_list[node].payload:
6642
          found = True
6643
          self.op.src_node = src_node = node
6644
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6645
                                                       src_path)
6646
          break
6647
      if not found:
6648
        raise errors.OpPrereqError("No export found for relative path %s" %
6649
                                    src_path, errors.ECODE_INVAL)
6650

    
6651
    _CheckNodeOnline(self, src_node)
6652
    result = self.rpc.call_export_info(src_node, src_path)
6653
    result.Raise("No export or invalid export found in dir %s" % src_path)
6654

    
6655
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6656
    if not export_info.has_section(constants.INISECT_EXP):
6657
      raise errors.ProgrammerError("Corrupted export config",
6658
                                   errors.ECODE_ENVIRON)
6659

    
6660
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6661
    if (int(ei_version) != constants.EXPORT_VERSION):
6662
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6663
                                 (ei_version, constants.EXPORT_VERSION),
6664
                                 errors.ECODE_ENVIRON)
6665
    return export_info
6666

    
6667
  def _ReadExportParams(self, einfo):
6668
    """Use export parameters as defaults.
6669

6670
    In case the opcode doesn't specify (as in override) some instance
6671
    parameters, then try to use them from the export information, if
6672
    that declares them.
6673

6674
    """
6675
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6676

    
6677
    if self.op.disk_template is None:
6678
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6679
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6680
                                          "disk_template")
6681
      else:
6682
        raise errors.OpPrereqError("No disk template specified and the export"
6683
                                   " is missing the disk_template information",
6684
                                   errors.ECODE_INVAL)
6685

    
6686
    if not self.op.disks:
6687
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6688
        disks = []
6689
        # TODO: import the disk iv_name too
6690
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6691
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6692
          disks.append({"size": disk_sz})
6693
        self.op.disks = disks
6694
      else:
6695
        raise errors.OpPrereqError("No disk info specified and the export"
6696
                                   " is missing the disk information",
6697
                                   errors.ECODE_INVAL)
6698

    
6699
    if (not self.op.nics and
6700
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6701
      nics = []
6702
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6703
        ndict = {}
6704
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6705
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6706
          ndict[name] = v
6707
        nics.append(ndict)
6708
      self.op.nics = nics
6709

    
6710
    if (self.op.hypervisor is None and
6711
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6712
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6713
    if einfo.has_section(constants.INISECT_HYP):
6714
      # use the export parameters but do not override the ones
6715
      # specified by the user
6716
      for name, value in einfo.items(constants.INISECT_HYP):
6717
        if name not in self.op.hvparams:
6718
          self.op.hvparams[name] = value
6719

    
6720
    if einfo.has_section(constants.INISECT_BEP):
6721
      # use the parameters, without overriding
6722
      for name, value in einfo.items(constants.INISECT_BEP):
6723
        if name not in self.op.beparams:
6724
          self.op.beparams[name] = value
6725
    else:
6726
      # try to read the parameters old style, from the main section
6727
      for name in constants.BES_PARAMETERS:
6728
        if (name not in self.op.beparams and
6729
            einfo.has_option(constants.INISECT_INS, name)):
6730
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6731

    
6732
    if einfo.has_section(constants.INISECT_OSP):
6733
      # use the parameters, without overriding
6734
      for name, value in einfo.items(constants.INISECT_OSP):
6735
        if name not in self.op.osparams:
6736
          self.op.osparams[name] = value
6737

    
6738
  def _RevertToDefaults(self, cluster):
6739
    """Revert the instance parameters to the default values.
6740

6741
    """
6742
    # hvparams
6743
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6744
    for name in self.op.hvparams.keys():
6745
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6746
        del self.op.hvparams[name]
6747
    # beparams
6748
    be_defs = cluster.SimpleFillBE({})
6749
    for name in self.op.beparams.keys():
6750
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6751
        del self.op.beparams[name]
6752
    # nic params
6753
    nic_defs = cluster.SimpleFillNIC({})
6754
    for nic in self.op.nics:
6755
      for name in constants.NICS_PARAMETERS:
6756
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6757
          del nic[name]
6758
    # osparams
6759
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6760
    for name in self.op.osparams.keys():
6761
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
6762
        del self.op.osparams[name]
6763

    
6764
  def CheckPrereq(self):
6765
    """Check prerequisites.
6766

6767
    """
6768
    if self.op.mode == constants.INSTANCE_IMPORT:
6769
      export_info = self._ReadExportInfo()
6770
      self._ReadExportParams(export_info)
6771

    
6772
    _CheckDiskTemplate(self.op.disk_template)
6773

    
6774
    if (not self.cfg.GetVGName() and
6775
        self.op.disk_template not in constants.DTS_NOT_LVM):
6776
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6777
                                 " instances", errors.ECODE_STATE)
6778

    
6779
    if self.op.hypervisor is None:
6780
      self.op.hypervisor = self.cfg.GetHypervisorType()
6781

    
6782
    cluster = self.cfg.GetClusterInfo()
6783
    enabled_hvs = cluster.enabled_hypervisors
6784
    if self.op.hypervisor not in enabled_hvs:
6785
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6786
                                 " cluster (%s)" % (self.op.hypervisor,
6787
                                  ",".join(enabled_hvs)),
6788
                                 errors.ECODE_STATE)
6789

    
6790
    # check hypervisor parameter syntax (locally)
6791
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6792
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6793
                                      self.op.hvparams)
6794
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6795
    hv_type.CheckParameterSyntax(filled_hvp)
6796
    self.hv_full = filled_hvp
6797
    # check that we don't specify global parameters on an instance
6798
    _CheckGlobalHvParams(self.op.hvparams)
6799

    
6800
    # fill and remember the beparams dict
6801
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6802
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
6803

    
6804
    # build os parameters
6805
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6806

    
6807
    # now that hvp/bep are in final format, let's reset to defaults,
6808
    # if told to do so
6809
    if self.op.identify_defaults:
6810
      self._RevertToDefaults(cluster)
6811

    
6812
    # NIC buildup
6813
    self.nics = []
6814
    for idx, nic in enumerate(self.op.nics):
6815
      nic_mode_req = nic.get("mode", None)
6816
      nic_mode = nic_mode_req
6817
      if nic_mode is None:
6818
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6819

    
6820
      # in routed mode, for the first nic, the default ip is 'auto'
6821
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6822
        default_ip_mode = constants.VALUE_AUTO
6823
      else:
6824
        default_ip_mode = constants.VALUE_NONE
6825

    
6826
      # ip validity checks
6827
      ip = nic.get("ip", default_ip_mode)
6828
      if ip is None or ip.lower() == constants.VALUE_NONE:
6829
        nic_ip = None
6830
      elif ip.lower() == constants.VALUE_AUTO:
6831
        if not self.op.name_check:
6832
          raise errors.OpPrereqError("IP address set to auto but name checks"
6833
                                     " have been skipped. Aborting.",
6834
                                     errors.ECODE_INVAL)
6835
        nic_ip = self.hostname1.ip
6836
      else:
6837
        if not utils.IsValidIP(ip):
6838
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6839
                                     " like a valid IP" % ip,
6840
                                     errors.ECODE_INVAL)
6841
        nic_ip = ip
6842

    
6843
      # TODO: check the ip address for uniqueness
6844
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6845
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6846
                                   errors.ECODE_INVAL)
6847

    
6848
      # MAC address verification
6849
      mac = nic.get("mac", constants.VALUE_AUTO)
6850
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6851
        mac = utils.NormalizeAndValidateMac(mac)
6852

    
6853
        try:
6854
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6855
        except errors.ReservationError:
6856
          raise errors.OpPrereqError("MAC address %s already in use"
6857
                                     " in cluster" % mac,
6858
                                     errors.ECODE_NOTUNIQUE)
6859

    
6860
      # bridge verification
6861
      bridge = nic.get("bridge", None)
6862
      link = nic.get("link", None)
6863
      if bridge and link:
6864
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6865
                                   " at the same time", errors.ECODE_INVAL)
6866
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6867
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6868
                                   errors.ECODE_INVAL)
6869
      elif bridge:
6870
        link = bridge
6871

    
6872
      nicparams = {}
6873
      if nic_mode_req:
6874
        nicparams[constants.NIC_MODE] = nic_mode_req
6875
      if link:
6876
        nicparams[constants.NIC_LINK] = link
6877

    
6878
      check_params = cluster.SimpleFillNIC(nicparams)
6879
      objects.NIC.CheckParameterSyntax(check_params)
6880
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6881

    
6882
    # disk checks/pre-build
6883
    self.disks = []
6884
    for disk in self.op.disks:
6885
      mode = disk.get("mode", constants.DISK_RDWR)
6886
      if mode not in constants.DISK_ACCESS_SET:
6887
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6888
                                   mode, errors.ECODE_INVAL)
6889
      size = disk.get("size", None)
6890
      if size is None:
6891
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6892
      try:
6893
        size = int(size)
6894
      except (TypeError, ValueError):
6895
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6896
                                   errors.ECODE_INVAL)
6897
      new_disk = {"size": size, "mode": mode}
6898
      if "adopt" in disk:
6899
        new_disk["adopt"] = disk["adopt"]
6900
      self.disks.append(new_disk)
6901

    
6902
    if self.op.mode == constants.INSTANCE_IMPORT:
6903

    
6904
      # Check that the new instance doesn't have less disks than the export
6905
      instance_disks = len(self.disks)
6906
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6907
      if instance_disks < export_disks:
6908
        raise errors.OpPrereqError("Not enough disks to import."
6909
                                   " (instance: %d, export: %d)" %
6910
                                   (instance_disks, export_disks),
6911
                                   errors.ECODE_INVAL)
6912

    
6913
      disk_images = []
6914
      for idx in range(export_disks):
6915
        option = 'disk%d_dump' % idx
6916
        if export_info.has_option(constants.INISECT_INS, option):
6917
          # FIXME: are the old os-es, disk sizes, etc. useful?
6918
          export_name = export_info.get(constants.INISECT_INS, option)
6919
          image = utils.PathJoin(self.op.src_path, export_name)
6920
          disk_images.append(image)
6921
        else:
6922
          disk_images.append(False)
6923

    
6924
      self.src_images = disk_images
6925

    
6926
      old_name = export_info.get(constants.INISECT_INS, 'name')
6927
      try:
6928
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6929
      except (TypeError, ValueError), err:
6930
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
6931
                                   " an integer: %s" % str(err),
6932
                                   errors.ECODE_STATE)
6933
      if self.op.instance_name == old_name:
6934
        for idx, nic in enumerate(self.nics):
6935
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6936
            nic_mac_ini = 'nic%d_mac' % idx
6937
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6938

    
6939
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6940

    
6941
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6942
    if self.op.ip_check:
6943
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6944
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6945
                                   (self.check_ip, self.op.instance_name),
6946
                                   errors.ECODE_NOTUNIQUE)
6947

    
6948
    #### mac address generation
6949
    # By generating here the mac address both the allocator and the hooks get
6950
    # the real final mac address rather than the 'auto' or 'generate' value.
6951
    # There is a race condition between the generation and the instance object
6952
    # creation, which means that we know the mac is valid now, but we're not
6953
    # sure it will be when we actually add the instance. If things go bad
6954
    # adding the instance will abort because of a duplicate mac, and the
6955
    # creation job will fail.
6956
    for nic in self.nics:
6957
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6958
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6959

    
6960
    #### allocator run
6961

    
6962
    if self.op.iallocator is not None:
6963
      self._RunAllocator()
6964

    
6965
    #### node related checks
6966

    
6967
    # check primary node
6968
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6969
    assert self.pnode is not None, \
6970
      "Cannot retrieve locked node %s" % self.op.pnode
6971
    if pnode.offline:
6972
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6973
                                 pnode.name, errors.ECODE_STATE)
6974
    if pnode.drained:
6975
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6976
                                 pnode.name, errors.ECODE_STATE)
6977

    
6978
    self.secondaries = []
6979

    
6980
    # mirror node verification
6981
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6982
      if self.op.snode is None:
6983
        raise errors.OpPrereqError("The networked disk templates need"
6984
                                   " a mirror node", errors.ECODE_INVAL)
6985
      if self.op.snode == pnode.name:
6986
        raise errors.OpPrereqError("The secondary node cannot be the"
6987
                                   " primary node.", errors.ECODE_INVAL)
6988
      _CheckNodeOnline(self, self.op.snode)
6989
      _CheckNodeNotDrained(self, self.op.snode)
6990
      self.secondaries.append(self.op.snode)
6991

    
6992
    nodenames = [pnode.name] + self.secondaries
6993

    
6994
    req_size = _ComputeDiskSize(self.op.disk_template,
6995
                                self.disks)
6996

    
6997
    # Check lv size requirements, if not adopting
6998
    if req_size is not None and not self.adopt_disks:
6999
      _CheckNodesFreeDisk(self, nodenames, req_size)
7000

    
7001
    if self.adopt_disks: # instead, we must check the adoption data
7002
      all_lvs = set([i["adopt"] for i in self.disks])
7003
      if len(all_lvs) != len(self.disks):
7004
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7005
                                   errors.ECODE_INVAL)
7006
      for lv_name in all_lvs:
7007
        try:
7008
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7009
        except errors.ReservationError:
7010
          raise errors.OpPrereqError("LV named %s used by another instance" %
7011
                                     lv_name, errors.ECODE_NOTUNIQUE)
7012

    
7013
      node_lvs = self.rpc.call_lv_list([pnode.name],
7014
                                       self.cfg.GetVGName())[pnode.name]
7015
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7016
      node_lvs = node_lvs.payload
7017
      delta = all_lvs.difference(node_lvs.keys())
7018
      if delta:
7019
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7020
                                   utils.CommaJoin(delta),
7021
                                   errors.ECODE_INVAL)
7022
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7023
      if online_lvs:
7024
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7025
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7026
                                   errors.ECODE_STATE)
7027
      # update the size of disk based on what is found
7028
      for dsk in self.disks:
7029
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7030

    
7031
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7032

    
7033
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7034
    # check OS parameters (remotely)
7035
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7036

    
7037
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7038

    
7039
    # memory check on primary node
7040
    if self.op.start:
7041
      _CheckNodeFreeMemory(self, self.pnode.name,
7042
                           "creating instance %s" % self.op.instance_name,
7043
                           self.be_full[constants.BE_MEMORY],
7044
                           self.op.hypervisor)
7045

    
7046
    self.dry_run_result = list(nodenames)
7047

    
7048
  def Exec(self, feedback_fn):
7049
    """Create and add the instance to the cluster.
7050

7051
    """
7052
    instance = self.op.instance_name
7053
    pnode_name = self.pnode.name
7054

    
7055
    ht_kind = self.op.hypervisor
7056
    if ht_kind in constants.HTS_REQ_PORT:
7057
      network_port = self.cfg.AllocatePort()
7058
    else:
7059
      network_port = None
7060

    
7061
    if constants.ENABLE_FILE_STORAGE:
7062
      # this is needed because os.path.join does not accept None arguments
7063
      if self.op.file_storage_dir is None:
7064
        string_file_storage_dir = ""
7065
      else:
7066
        string_file_storage_dir = self.op.file_storage_dir
7067

    
7068
      # build the full file storage dir path
7069
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7070
                                        string_file_storage_dir, instance)
7071
    else:
7072
      file_storage_dir = ""
7073

    
7074
    disks = _GenerateDiskTemplate(self,
7075
                                  self.op.disk_template,
7076
                                  instance, pnode_name,
7077
                                  self.secondaries,
7078
                                  self.disks,
7079
                                  file_storage_dir,
7080
                                  self.op.file_driver,
7081
                                  0)
7082

    
7083
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7084
                            primary_node=pnode_name,
7085
                            nics=self.nics, disks=disks,
7086
                            disk_template=self.op.disk_template,
7087
                            admin_up=False,
7088
                            network_port=network_port,
7089
                            beparams=self.op.beparams,
7090
                            hvparams=self.op.hvparams,
7091
                            hypervisor=self.op.hypervisor,
7092
                            osparams=self.op.osparams,
7093
                            )
7094

    
7095
    if self.adopt_disks:
7096
      # rename LVs to the newly-generated names; we need to construct
7097
      # 'fake' LV disks with the old data, plus the new unique_id
7098
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7099
      rename_to = []
7100
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7101
        rename_to.append(t_dsk.logical_id)
7102
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7103
        self.cfg.SetDiskID(t_dsk, pnode_name)
7104
      result = self.rpc.call_blockdev_rename(pnode_name,
7105
                                             zip(tmp_disks, rename_to))
7106
      result.Raise("Failed to rename adoped LVs")
7107
    else:
7108
      feedback_fn("* creating instance disks...")
7109
      try:
7110
        _CreateDisks(self, iobj)
7111
      except errors.OpExecError:
7112
        self.LogWarning("Device creation failed, reverting...")
7113
        try:
7114
          _RemoveDisks(self, iobj)
7115
        finally:
7116
          self.cfg.ReleaseDRBDMinors(instance)
7117
          raise
7118

    
7119
    feedback_fn("adding instance %s to cluster config" % instance)
7120

    
7121
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7122

    
7123
    # Declare that we don't want to remove the instance lock anymore, as we've
7124
    # added the instance to the config
7125
    del self.remove_locks[locking.LEVEL_INSTANCE]
7126
    # Unlock all the nodes
7127
    if self.op.mode == constants.INSTANCE_IMPORT:
7128
      nodes_keep = [self.op.src_node]
7129
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7130
                       if node != self.op.src_node]
7131
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7132
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7133
    else:
7134
      self.context.glm.release(locking.LEVEL_NODE)
7135
      del self.acquired_locks[locking.LEVEL_NODE]
7136

    
7137
    if self.op.wait_for_sync:
7138
      disk_abort = not _WaitForSync(self, iobj)
7139
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7140
      # make sure the disks are not degraded (still sync-ing is ok)
7141
      time.sleep(15)
7142
      feedback_fn("* checking mirrors status")
7143
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7144
    else:
7145
      disk_abort = False
7146

    
7147
    if disk_abort:
7148
      _RemoveDisks(self, iobj)
7149
      self.cfg.RemoveInstance(iobj.name)
7150
      # Make sure the instance lock gets removed
7151
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7152
      raise errors.OpExecError("There are some degraded disks for"
7153
                               " this instance")
7154

    
7155
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7156
      if self.op.mode == constants.INSTANCE_CREATE:
7157
        if not self.op.no_install:
7158
          feedback_fn("* running the instance OS create scripts...")
7159
          # FIXME: pass debug option from opcode to backend
7160
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7161
                                                 self.op.debug_level)
7162
          result.Raise("Could not add os for instance %s"
7163
                       " on node %s" % (instance, pnode_name))
7164

    
7165
      elif self.op.mode == constants.INSTANCE_IMPORT:
7166
        feedback_fn("* running the instance OS import scripts...")
7167

    
7168
        transfers = []
7169

    
7170
        for idx, image in enumerate(self.src_images):
7171
          if not image:
7172
            continue
7173

    
7174
          # FIXME: pass debug option from opcode to backend
7175
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7176
                                             constants.IEIO_FILE, (image, ),
7177
                                             constants.IEIO_SCRIPT,
7178
                                             (iobj.disks[idx], idx),
7179
                                             None)
7180
          transfers.append(dt)
7181

    
7182
        import_result = \
7183
          masterd.instance.TransferInstanceData(self, feedback_fn,
7184
                                                self.op.src_node, pnode_name,
7185
                                                self.pnode.secondary_ip,
7186
                                                iobj, transfers)
7187
        if not compat.all(import_result):
7188
          self.LogWarning("Some disks for instance %s on node %s were not"
7189
                          " imported successfully" % (instance, pnode_name))
7190

    
7191
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7192
        feedback_fn("* preparing remote import...")
7193
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
7194
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7195

    
7196
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7197
                                                     self.source_x509_ca,
7198
                                                     self._cds, timeouts)
7199
        if not compat.all(disk_results):
7200
          # TODO: Should the instance still be started, even if some disks
7201
          # failed to import (valid for local imports, too)?
7202
          self.LogWarning("Some disks for instance %s on node %s were not"
7203
                          " imported successfully" % (instance, pnode_name))
7204

    
7205
        # Run rename script on newly imported instance
7206
        assert iobj.name == instance
7207
        feedback_fn("Running rename script for %s" % instance)
7208
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7209
                                                   self.source_instance_name,
7210
                                                   self.op.debug_level)
7211
        if result.fail_msg:
7212
          self.LogWarning("Failed to run rename script for %s on node"
7213
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7214

    
7215
      else:
7216
        # also checked in the prereq part
7217
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7218
                                     % self.op.mode)
7219

    
7220
    if self.op.start:
7221
      iobj.admin_up = True
7222
      self.cfg.Update(iobj, feedback_fn)
7223
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7224
      feedback_fn("* starting instance...")
7225
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7226
      result.Raise("Could not start instance")
7227

    
7228
    return list(iobj.all_nodes)
7229

    
7230

    
7231
class LUConnectConsole(NoHooksLU):
7232
  """Connect to an instance's console.
7233

7234
  This is somewhat special in that it returns the command line that
7235
  you need to run on the master node in order to connect to the
7236
  console.
7237

7238
  """
7239
  _OP_REQP = [("instance_name", _TNEString)]
7240
  REQ_BGL = False
7241

    
7242
  def ExpandNames(self):
7243
    self._ExpandAndLockInstance()
7244

    
7245
  def CheckPrereq(self):
7246
    """Check prerequisites.
7247

7248
    This checks that the instance is in the cluster.
7249

7250
    """
7251
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7252
    assert self.instance is not None, \
7253
      "Cannot retrieve locked instance %s" % self.op.instance_name
7254
    _CheckNodeOnline(self, self.instance.primary_node)
7255

    
7256
  def Exec(self, feedback_fn):
7257
    """Connect to the console of an instance
7258

7259
    """
7260
    instance = self.instance
7261
    node = instance.primary_node
7262

    
7263
    node_insts = self.rpc.call_instance_list([node],
7264
                                             [instance.hypervisor])[node]
7265
    node_insts.Raise("Can't get node information from %s" % node)
7266

    
7267
    if instance.name not in node_insts.payload:
7268
      raise errors.OpExecError("Instance %s is not running." % instance.name)
7269

    
7270
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7271

    
7272
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7273
    cluster = self.cfg.GetClusterInfo()
7274
    # beparams and hvparams are passed separately, to avoid editing the
7275
    # instance and then saving the defaults in the instance itself.
7276
    hvparams = cluster.FillHV(instance)
7277
    beparams = cluster.FillBE(instance)
7278
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7279

    
7280
    # build ssh cmdline
7281
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7282

    
7283

    
7284
class LUReplaceDisks(LogicalUnit):
7285
  """Replace the disks of an instance.
7286

7287
  """
7288
  HPATH = "mirrors-replace"
7289
  HTYPE = constants.HTYPE_INSTANCE
7290
  _OP_REQP = [
7291
    ("instance_name", _TNEString),
7292
    ("mode", _TElemOf(constants.REPLACE_MODES)),
7293
    ("disks", _TListOf(_TPInt)),
7294
    ]
7295
  _OP_DEFS = [
7296
    ("remote_node", None),
7297
    ("iallocator", None),
7298
    ("early_release", None),
7299
    ]
7300
  REQ_BGL = False
7301

    
7302
  def CheckArguments(self):
7303
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7304
                                  self.op.iallocator)
7305

    
7306
  def ExpandNames(self):
7307
    self._ExpandAndLockInstance()
7308

    
7309
    if self.op.iallocator is not None:
7310
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7311

    
7312
    elif self.op.remote_node is not None:
7313
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7314
      self.op.remote_node = remote_node
7315

    
7316
      # Warning: do not remove the locking of the new secondary here
7317
      # unless DRBD8.AddChildren is changed to work in parallel;
7318
      # currently it doesn't since parallel invocations of
7319
      # FindUnusedMinor will conflict
7320
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7321
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7322

    
7323
    else:
7324
      self.needed_locks[locking.LEVEL_NODE] = []
7325
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7326

    
7327
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7328
                                   self.op.iallocator, self.op.remote_node,
7329
                                   self.op.disks, False, self.op.early_release)
7330

    
7331
    self.tasklets = [self.replacer]
7332

    
7333
  def DeclareLocks(self, level):
7334
    # If we're not already locking all nodes in the set we have to declare the
7335
    # instance's primary/secondary nodes.
7336
    if (level == locking.LEVEL_NODE and
7337
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7338
      self._LockInstancesNodes()
7339

    
7340
  def BuildHooksEnv(self):
7341
    """Build hooks env.
7342

7343
    This runs on the master, the primary and all the secondaries.
7344

7345
    """
7346
    instance = self.replacer.instance
7347
    env = {
7348
      "MODE": self.op.mode,
7349
      "NEW_SECONDARY": self.op.remote_node,
7350
      "OLD_SECONDARY": instance.secondary_nodes[0],
7351
      }
7352
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7353
    nl = [
7354
      self.cfg.GetMasterNode(),
7355
      instance.primary_node,
7356
      ]
7357
    if self.op.remote_node is not None:
7358
      nl.append(self.op.remote_node)
7359
    return env, nl, nl
7360

    
7361

    
7362
class TLReplaceDisks(Tasklet):
7363
  """Replaces disks for an instance.
7364

7365
  Note: Locking is not within the scope of this class.
7366

7367
  """
7368
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7369
               disks, delay_iallocator, early_release):
7370
    """Initializes this class.
7371

7372
    """
7373
    Tasklet.__init__(self, lu)
7374

    
7375
    # Parameters
7376
    self.instance_name = instance_name
7377
    self.mode = mode
7378
    self.iallocator_name = iallocator_name
7379
    self.remote_node = remote_node
7380
    self.disks = disks
7381
    self.delay_iallocator = delay_iallocator
7382
    self.early_release = early_release
7383

    
7384
    # Runtime data
7385
    self.instance = None
7386
    self.new_node = None
7387
    self.target_node = None
7388
    self.other_node = None
7389
    self.remote_node_info = None
7390
    self.node_secondary_ip = None
7391

    
7392
  @staticmethod
7393
  def CheckArguments(mode, remote_node, iallocator):
7394
    """Helper function for users of this class.
7395

7396
    """
7397
    # check for valid parameter combination
7398
    if mode == constants.REPLACE_DISK_CHG:
7399
      if remote_node is None and iallocator is None:
7400
        raise errors.OpPrereqError("When changing the secondary either an"
7401
                                   " iallocator script must be used or the"
7402
                                   " new node given", errors.ECODE_INVAL)
7403

    
7404
      if remote_node is not None and iallocator is not None:
7405
        raise errors.OpPrereqError("Give either the iallocator or the new"
7406
                                   " secondary, not both", errors.ECODE_INVAL)
7407

    
7408
    elif remote_node is not None or iallocator is not None:
7409
      # Not replacing the secondary
7410
      raise errors.OpPrereqError("The iallocator and new node options can"
7411
                                 " only be used when changing the"
7412
                                 " secondary node", errors.ECODE_INVAL)
7413

    
7414
  @staticmethod
7415
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7416
    """Compute a new secondary node using an IAllocator.
7417

7418
    """
7419
    ial = IAllocator(lu.cfg, lu.rpc,
7420
                     mode=constants.IALLOCATOR_MODE_RELOC,
7421
                     name=instance_name,
7422
                     relocate_from=relocate_from)
7423

    
7424
    ial.Run(iallocator_name)
7425

    
7426
    if not ial.success:
7427
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7428
                                 " %s" % (iallocator_name, ial.info),
7429
                                 errors.ECODE_NORES)
7430

    
7431
    if len(ial.result) != ial.required_nodes:
7432
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7433
                                 " of nodes (%s), required %s" %
7434
                                 (iallocator_name,
7435
                                  len(ial.result), ial.required_nodes),
7436
                                 errors.ECODE_FAULT)
7437

    
7438
    remote_node_name = ial.result[0]
7439

    
7440
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7441
               instance_name, remote_node_name)
7442

    
7443
    return remote_node_name
7444

    
7445
  def _FindFaultyDisks(self, node_name):
7446
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7447
                                    node_name, True)
7448

    
7449
  def CheckPrereq(self):
7450
    """Check prerequisites.
7451

7452
    This checks that the instance is in the cluster.
7453

7454
    """
7455
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7456
    assert instance is not None, \
7457
      "Cannot retrieve locked instance %s" % self.instance_name
7458

    
7459
    if instance.disk_template != constants.DT_DRBD8:
7460
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7461
                                 " instances", errors.ECODE_INVAL)
7462

    
7463
    if len(instance.secondary_nodes) != 1:
7464
      raise errors.OpPrereqError("The instance has a strange layout,"
7465
                                 " expected one secondary but found %d" %
7466
                                 len(instance.secondary_nodes),
7467
                                 errors.ECODE_FAULT)
7468

    
7469
    if not self.delay_iallocator:
7470
      self._CheckPrereq2()
7471

    
7472
  def _CheckPrereq2(self):
7473
    """Check prerequisites, second part.
7474

7475
    This function should always be part of CheckPrereq. It was separated and is
7476
    now called from Exec because during node evacuation iallocator was only
7477
    called with an unmodified cluster model, not taking planned changes into
7478
    account.
7479

7480
    """
7481
    instance = self.instance
7482
    secondary_node = instance.secondary_nodes[0]
7483

    
7484
    if self.iallocator_name is None:
7485
      remote_node = self.remote_node
7486
    else:
7487
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7488
                                       instance.name, instance.secondary_nodes)
7489

    
7490
    if remote_node is not None:
7491
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7492
      assert self.remote_node_info is not None, \
7493
        "Cannot retrieve locked node %s" % remote_node
7494
    else:
7495
      self.remote_node_info = None
7496

    
7497
    if remote_node == self.instance.primary_node:
7498
      raise errors.OpPrereqError("The specified node is the primary node of"
7499
                                 " the instance.", errors.ECODE_INVAL)
7500

    
7501
    if remote_node == secondary_node:
7502
      raise errors.OpPrereqError("The specified node is already the"
7503
                                 " secondary node of the instance.",
7504
                                 errors.ECODE_INVAL)
7505

    
7506
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7507
                                    constants.REPLACE_DISK_CHG):
7508
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7509
                                 errors.ECODE_INVAL)
7510

    
7511
    if self.mode == constants.REPLACE_DISK_AUTO:
7512
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7513
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7514

    
7515
      if faulty_primary and faulty_secondary:
7516
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7517
                                   " one node and can not be repaired"
7518
                                   " automatically" % self.instance_name,
7519
                                   errors.ECODE_STATE)
7520

    
7521
      if faulty_primary:
7522
        self.disks = faulty_primary
7523
        self.target_node = instance.primary_node
7524
        self.other_node = secondary_node
7525
        check_nodes = [self.target_node, self.other_node]
7526
      elif faulty_secondary:
7527
        self.disks = faulty_secondary
7528
        self.target_node = secondary_node
7529
        self.other_node = instance.primary_node
7530
        check_nodes = [self.target_node, self.other_node]
7531
      else:
7532
        self.disks = []
7533
        check_nodes = []
7534

    
7535
    else:
7536
      # Non-automatic modes
7537
      if self.mode == constants.REPLACE_DISK_PRI:
7538
        self.target_node = instance.primary_node
7539
        self.other_node = secondary_node
7540
        check_nodes = [self.target_node, self.other_node]
7541

    
7542
      elif self.mode == constants.REPLACE_DISK_SEC:
7543
        self.target_node = secondary_node
7544
        self.other_node = instance.primary_node
7545
        check_nodes = [self.target_node, self.other_node]
7546

    
7547
      elif self.mode == constants.REPLACE_DISK_CHG:
7548
        self.new_node = remote_node
7549
        self.other_node = instance.primary_node
7550
        self.target_node = secondary_node
7551
        check_nodes = [self.new_node, self.other_node]
7552

    
7553
        _CheckNodeNotDrained(self.lu, remote_node)
7554

    
7555
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7556
        assert old_node_info is not None
7557
        if old_node_info.offline and not self.early_release:
7558
          # doesn't make sense to delay the release
7559
          self.early_release = True
7560
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7561
                          " early-release mode", secondary_node)
7562

    
7563
      else:
7564
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7565
                                     self.mode)
7566

    
7567
      # If not specified all disks should be replaced
7568
      if not self.disks:
7569
        self.disks = range(len(self.instance.disks))
7570

    
7571
    for node in check_nodes:
7572
      _CheckNodeOnline(self.lu, node)
7573

    
7574
    # Check whether disks are valid
7575
    for disk_idx in self.disks:
7576
      instance.FindDisk(disk_idx)
7577

    
7578
    # Get secondary node IP addresses
7579
    node_2nd_ip = {}
7580

    
7581
    for node_name in [self.target_node, self.other_node, self.new_node]:
7582
      if node_name is not None:
7583
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7584

    
7585
    self.node_secondary_ip = node_2nd_ip
7586

    
7587
  def Exec(self, feedback_fn):
7588
    """Execute disk replacement.
7589

7590
    This dispatches the disk replacement to the appropriate handler.
7591

7592
    """
7593
    if self.delay_iallocator:
7594
      self._CheckPrereq2()
7595

    
7596
    if not self.disks:
7597
      feedback_fn("No disks need replacement")
7598
      return
7599

    
7600
    feedback_fn("Replacing disk(s) %s for %s" %
7601
                (utils.CommaJoin(self.disks), self.instance.name))
7602

    
7603
    activate_disks = (not self.instance.admin_up)
7604

    
7605
    # Activate the instance disks if we're replacing them on a down instance
7606
    if activate_disks:
7607
      _StartInstanceDisks(self.lu, self.instance, True)
7608

    
7609
    try:
7610
      # Should we replace the secondary node?
7611
      if self.new_node is not None:
7612
        fn = self._ExecDrbd8Secondary
7613
      else:
7614
        fn = self._ExecDrbd8DiskOnly
7615

    
7616
      return fn(feedback_fn)
7617

    
7618
    finally:
7619
      # Deactivate the instance disks if we're replacing them on a
7620
      # down instance
7621
      if activate_disks:
7622
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7623

    
7624
  def _CheckVolumeGroup(self, nodes):
7625
    self.lu.LogInfo("Checking volume groups")
7626

    
7627
    vgname = self.cfg.GetVGName()
7628

    
7629
    # Make sure volume group exists on all involved nodes
7630
    results = self.rpc.call_vg_list(nodes)
7631
    if not results:
7632
      raise errors.OpExecError("Can't list volume groups on the nodes")
7633

    
7634
    for node in nodes:
7635
      res = results[node]
7636
      res.Raise("Error checking node %s" % node)
7637
      if vgname not in res.payload:
7638
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7639
                                 (vgname, node))
7640

    
7641
  def _CheckDisksExistence(self, nodes):
7642
    # Check disk existence
7643
    for idx, dev in enumerate(self.instance.disks):
7644
      if idx not in self.disks:
7645
        continue
7646

    
7647
      for node in nodes:
7648
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7649
        self.cfg.SetDiskID(dev, node)
7650

    
7651
        result = self.rpc.call_blockdev_find(node, dev)
7652

    
7653
        msg = result.fail_msg
7654
        if msg or not result.payload:
7655
          if not msg:
7656
            msg = "disk not found"
7657
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7658
                                   (idx, node, msg))
7659

    
7660
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7661
    for idx, dev in enumerate(self.instance.disks):
7662
      if idx not in self.disks:
7663
        continue
7664

    
7665
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7666
                      (idx, node_name))
7667

    
7668
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7669
                                   ldisk=ldisk):
7670
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7671
                                 " replace disks for instance %s" %
7672
                                 (node_name, self.instance.name))
7673

    
7674
  def _CreateNewStorage(self, node_name):
7675
    vgname = self.cfg.GetVGName()
7676
    iv_names = {}
7677

    
7678
    for idx, dev in enumerate(self.instance.disks):
7679
      if idx not in self.disks:
7680
        continue
7681

    
7682
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7683

    
7684
      self.cfg.SetDiskID(dev, node_name)
7685

    
7686
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7687
      names = _GenerateUniqueNames(self.lu, lv_names)
7688

    
7689
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7690
                             logical_id=(vgname, names[0]))
7691
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7692
                             logical_id=(vgname, names[1]))
7693

    
7694
      new_lvs = [lv_data, lv_meta]
7695
      old_lvs = dev.children
7696
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7697

    
7698
      # we pass force_create=True to force the LVM creation
7699
      for new_lv in new_lvs:
7700
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7701
                        _GetInstanceInfoText(self.instance), False)
7702

    
7703
    return iv_names
7704

    
7705
  def _CheckDevices(self, node_name, iv_names):
7706
    for name, (dev, _, _) in iv_names.iteritems():
7707
      self.cfg.SetDiskID(dev, node_name)
7708

    
7709
      result = self.rpc.call_blockdev_find(node_name, dev)
7710

    
7711
      msg = result.fail_msg
7712
      if msg or not result.payload:
7713
        if not msg:
7714
          msg = "disk not found"
7715
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7716
                                 (name, msg))
7717

    
7718
      if result.payload.is_degraded:
7719
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7720

    
7721
  def _RemoveOldStorage(self, node_name, iv_names):
7722
    for name, (_, old_lvs, _) in iv_names.iteritems():
7723
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7724

    
7725
      for lv in old_lvs:
7726
        self.cfg.SetDiskID(lv, node_name)
7727

    
7728
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7729
        if msg:
7730
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7731
                             hint="remove unused LVs manually")
7732

    
7733
  def _ReleaseNodeLock(self, node_name):
7734
    """Releases the lock for a given node."""
7735
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7736

    
7737
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7738
    """Replace a disk on the primary or secondary for DRBD 8.
7739

7740
    The algorithm for replace is quite complicated:
7741

7742
      1. for each disk to be replaced:
7743

7744
        1. create new LVs on the target node with unique names
7745
        1. detach old LVs from the drbd device
7746
        1. rename old LVs to name_replaced.<time_t>
7747
        1. rename new LVs to old LVs
7748
        1. attach the new LVs (with the old names now) to the drbd device
7749

7750
      1. wait for sync across all devices
7751

7752
      1. for each modified disk:
7753

7754
        1. remove old LVs (which have the name name_replaces.<time_t>)
7755

7756
    Failures are not very well handled.
7757

7758
    """
7759
    steps_total = 6
7760

    
7761
    # Step: check device activation
7762
    self.lu.LogStep(1, steps_total, "Check device existence")
7763
    self._CheckDisksExistence([self.other_node, self.target_node])
7764
    self._CheckVolumeGroup([self.target_node, self.other_node])
7765

    
7766
    # Step: check other node consistency
7767
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7768
    self._CheckDisksConsistency(self.other_node,
7769
                                self.other_node == self.instance.primary_node,
7770
                                False)
7771

    
7772
    # Step: create new storage
7773
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7774
    iv_names = self._CreateNewStorage(self.target_node)
7775

    
7776
    # Step: for each lv, detach+rename*2+attach
7777
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7778
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7779
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7780

    
7781
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7782
                                                     old_lvs)
7783
      result.Raise("Can't detach drbd from local storage on node"
7784
                   " %s for device %s" % (self.target_node, dev.iv_name))
7785
      #dev.children = []
7786
      #cfg.Update(instance)
7787

    
7788
      # ok, we created the new LVs, so now we know we have the needed
7789
      # storage; as such, we proceed on the target node to rename
7790
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7791
      # using the assumption that logical_id == physical_id (which in
7792
      # turn is the unique_id on that node)
7793

    
7794
      # FIXME(iustin): use a better name for the replaced LVs
7795
      temp_suffix = int(time.time())
7796
      ren_fn = lambda d, suff: (d.physical_id[0],
7797
                                d.physical_id[1] + "_replaced-%s" % suff)
7798

    
7799
      # Build the rename list based on what LVs exist on the node
7800
      rename_old_to_new = []
7801
      for to_ren in old_lvs:
7802
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7803
        if not result.fail_msg and result.payload:
7804
          # device exists
7805
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7806

    
7807
      self.lu.LogInfo("Renaming the old LVs on the target node")
7808
      result = self.rpc.call_blockdev_rename(self.target_node,
7809
                                             rename_old_to_new)
7810
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7811

    
7812
      # Now we rename the new LVs to the old LVs
7813
      self.lu.LogInfo("Renaming the new LVs on the target node")
7814
      rename_new_to_old = [(new, old.physical_id)
7815
                           for old, new in zip(old_lvs, new_lvs)]
7816
      result = self.rpc.call_blockdev_rename(self.target_node,
7817
                                             rename_new_to_old)
7818
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7819

    
7820
      for old, new in zip(old_lvs, new_lvs):
7821
        new.logical_id = old.logical_id
7822
        self.cfg.SetDiskID(new, self.target_node)
7823

    
7824
      for disk in old_lvs:
7825
        disk.logical_id = ren_fn(disk, temp_suffix)
7826
        self.cfg.SetDiskID(disk, self.target_node)
7827

    
7828
      # Now that the new lvs have the old name, we can add them to the device
7829
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7830
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7831
                                                  new_lvs)
7832
      msg = result.fail_msg
7833
      if msg:
7834
        for new_lv in new_lvs:
7835
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7836
                                               new_lv).fail_msg
7837
          if msg2:
7838
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7839
                               hint=("cleanup manually the unused logical"
7840
                                     "volumes"))
7841
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7842

    
7843
      dev.children = new_lvs
7844

    
7845
      self.cfg.Update(self.instance, feedback_fn)
7846

    
7847
    cstep = 5
7848
    if self.early_release:
7849
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7850
      cstep += 1
7851
      self._RemoveOldStorage(self.target_node, iv_names)
7852
      # WARNING: we release both node locks here, do not do other RPCs
7853
      # than WaitForSync to the primary node
7854
      self._ReleaseNodeLock([self.target_node, self.other_node])
7855

    
7856
    # Wait for sync
7857
    # This can fail as the old devices are degraded and _WaitForSync
7858
    # does a combined result over all disks, so we don't check its return value
7859
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7860
    cstep += 1
7861
    _WaitForSync(self.lu, self.instance)
7862

    
7863
    # Check all devices manually
7864
    self._CheckDevices(self.instance.primary_node, iv_names)
7865

    
7866
    # Step: remove old storage
7867
    if not self.early_release:
7868
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7869
      cstep += 1
7870
      self._RemoveOldStorage(self.target_node, iv_names)
7871

    
7872
  def _ExecDrbd8Secondary(self, feedback_fn):
7873
    """Replace the secondary node for DRBD 8.
7874

7875
    The algorithm for replace is quite complicated:
7876
      - for all disks of the instance:
7877
        - create new LVs on the new node with same names
7878
        - shutdown the drbd device on the old secondary
7879
        - disconnect the drbd network on the primary
7880
        - create the drbd device on the new secondary
7881
        - network attach the drbd on the primary, using an artifice:
7882
          the drbd code for Attach() will connect to the network if it
7883
          finds a device which is connected to the good local disks but
7884
          not network enabled
7885
      - wait for sync across all devices
7886
      - remove all disks from the old secondary
7887

7888
    Failures are not very well handled.
7889

7890
    """
7891
    steps_total = 6
7892

    
7893
    # Step: check device activation
7894
    self.lu.LogStep(1, steps_total, "Check device existence")
7895
    self._CheckDisksExistence([self.instance.primary_node])
7896
    self._CheckVolumeGroup([self.instance.primary_node])
7897

    
7898
    # Step: check other node consistency
7899
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7900
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7901

    
7902
    # Step: create new storage
7903
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7904
    for idx, dev in enumerate(self.instance.disks):
7905
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7906
                      (self.new_node, idx))
7907
      # we pass force_create=True to force LVM creation
7908
      for new_lv in dev.children:
7909
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7910
                        _GetInstanceInfoText(self.instance), False)
7911

    
7912
    # Step 4: dbrd minors and drbd setups changes
7913
    # after this, we must manually remove the drbd minors on both the
7914
    # error and the success paths
7915
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7916
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7917
                                         for dev in self.instance.disks],
7918
                                        self.instance.name)
7919
    logging.debug("Allocated minors %r", minors)
7920

    
7921
    iv_names = {}
7922
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7923
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7924
                      (self.new_node, idx))
7925
      # create new devices on new_node; note that we create two IDs:
7926
      # one without port, so the drbd will be activated without
7927
      # networking information on the new node at this stage, and one
7928
      # with network, for the latter activation in step 4
7929
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7930
      if self.instance.primary_node == o_node1:
7931
        p_minor = o_minor1
7932
      else:
7933
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7934
        p_minor = o_minor2
7935

    
7936
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7937
                      p_minor, new_minor, o_secret)
7938
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7939
                    p_minor, new_minor, o_secret)
7940

    
7941
      iv_names[idx] = (dev, dev.children, new_net_id)
7942
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7943
                    new_net_id)
7944
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7945
                              logical_id=new_alone_id,
7946
                              children=dev.children,
7947
                              size=dev.size)
7948
      try:
7949
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7950
                              _GetInstanceInfoText(self.instance), False)
7951
      except errors.GenericError:
7952
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7953
        raise
7954

    
7955
    # We have new devices, shutdown the drbd on the old secondary
7956
    for idx, dev in enumerate(self.instance.disks):
7957
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7958
      self.cfg.SetDiskID(dev, self.target_node)
7959
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7960
      if msg:
7961
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7962
                           "node: %s" % (idx, msg),
7963
                           hint=("Please cleanup this device manually as"
7964
                                 " soon as possible"))
7965

    
7966
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7967
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7968
                                               self.node_secondary_ip,
7969
                                               self.instance.disks)\
7970
                                              [self.instance.primary_node]
7971

    
7972
    msg = result.fail_msg
7973
    if msg:
7974
      # detaches didn't succeed (unlikely)
7975
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7976
      raise errors.OpExecError("Can't detach the disks from the network on"
7977
                               " old node: %s" % (msg,))
7978

    
7979
    # if we managed to detach at least one, we update all the disks of
7980
    # the instance to point to the new secondary
7981
    self.lu.LogInfo("Updating instance configuration")
7982
    for dev, _, new_logical_id in iv_names.itervalues():
7983
      dev.logical_id = new_logical_id
7984
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7985

    
7986
    self.cfg.Update(self.instance, feedback_fn)
7987

    
7988
    # and now perform the drbd attach
7989
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7990
                    " (standalone => connected)")
7991
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7992
                                            self.new_node],
7993
                                           self.node_secondary_ip,
7994
                                           self.instance.disks,
7995
                                           self.instance.name,
7996
                                           False)
7997
    for to_node, to_result in result.items():
7998
      msg = to_result.fail_msg
7999
      if msg:
8000
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8001
                           to_node, msg,
8002
                           hint=("please do a gnt-instance info to see the"
8003
                                 " status of disks"))
8004
    cstep = 5
8005
    if self.early_release:
8006
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8007
      cstep += 1
8008
      self._RemoveOldStorage(self.target_node, iv_names)
8009
      # WARNING: we release all node locks here, do not do other RPCs
8010
      # than WaitForSync to the primary node
8011
      self._ReleaseNodeLock([self.instance.primary_node,
8012
                             self.target_node,
8013
                             self.new_node])
8014

    
8015
    # Wait for sync
8016
    # This can fail as the old devices are degraded and _WaitForSync
8017
    # does a combined result over all disks, so we don't check its return value
8018
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8019
    cstep += 1
8020
    _WaitForSync(self.lu, self.instance)
8021

    
8022
    # Check all devices manually
8023
    self._CheckDevices(self.instance.primary_node, iv_names)
8024

    
8025
    # Step: remove old storage
8026
    if not self.early_release:
8027
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8028
      self._RemoveOldStorage(self.target_node, iv_names)
8029

    
8030

    
8031
class LURepairNodeStorage(NoHooksLU):
8032
  """Repairs the volume group on a node.
8033

8034
  """
8035
  _OP_REQP = [("node_name", _TNEString)]
8036
  REQ_BGL = False
8037

    
8038
  def CheckArguments(self):
8039
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8040

    
8041
    _CheckStorageType(self.op.storage_type)
8042

    
8043
    storage_type = self.op.storage_type
8044

    
8045
    if (constants.SO_FIX_CONSISTENCY not in
8046
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8047
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8048
                                 " repaired" % storage_type,
8049
                                 errors.ECODE_INVAL)
8050

    
8051
  def ExpandNames(self):
8052
    self.needed_locks = {
8053
      locking.LEVEL_NODE: [self.op.node_name],
8054
      }
8055

    
8056
  def _CheckFaultyDisks(self, instance, node_name):
8057
    """Ensure faulty disks abort the opcode or at least warn."""
8058
    try:
8059
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8060
                                  node_name, True):
8061
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8062
                                   " node '%s'" % (instance.name, node_name),
8063
                                   errors.ECODE_STATE)
8064
    except errors.OpPrereqError, err:
8065
      if self.op.ignore_consistency:
8066
        self.proc.LogWarning(str(err.args[0]))
8067
      else:
8068
        raise
8069

    
8070
  def CheckPrereq(self):
8071
    """Check prerequisites.
8072

8073
    """
8074
    # Check whether any instance on this node has faulty disks
8075
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8076
      if not inst.admin_up:
8077
        continue
8078
      check_nodes = set(inst.all_nodes)
8079
      check_nodes.discard(self.op.node_name)
8080
      for inst_node_name in check_nodes:
8081
        self._CheckFaultyDisks(inst, inst_node_name)
8082

    
8083
  def Exec(self, feedback_fn):
8084
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8085
                (self.op.name, self.op.node_name))
8086

    
8087
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8088
    result = self.rpc.call_storage_execute(self.op.node_name,
8089
                                           self.op.storage_type, st_args,
8090
                                           self.op.name,
8091
                                           constants.SO_FIX_CONSISTENCY)
8092
    result.Raise("Failed to repair storage unit '%s' on %s" %
8093
                 (self.op.name, self.op.node_name))
8094

    
8095

    
8096
class LUNodeEvacuationStrategy(NoHooksLU):
8097
  """Computes the node evacuation strategy.
8098

8099
  """
8100
  _OP_REQP = [("nodes", _TListOf(_TNEString))]
8101
  _OP_DEFS = [
8102
    ("remote_node", None),
8103
    ("iallocator", None),
8104
    ]
8105
  REQ_BGL = False
8106

    
8107
  def CheckArguments(self):
8108
    if self.op.remote_node is not None and self.op.iallocator is not None:
8109
      raise errors.OpPrereqError("Give either the iallocator or the new"
8110
                                 " secondary, not both", errors.ECODE_INVAL)
8111

    
8112
  def ExpandNames(self):
8113
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8114
    self.needed_locks = locks = {}
8115
    if self.op.remote_node is None:
8116
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8117
    else:
8118
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8119
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8120

    
8121
  def Exec(self, feedback_fn):
8122
    if self.op.remote_node is not None:
8123
      instances = []
8124
      for node in self.op.nodes:
8125
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8126
      result = []
8127
      for i in instances:
8128
        if i.primary_node == self.op.remote_node:
8129
          raise errors.OpPrereqError("Node %s is the primary node of"
8130
                                     " instance %s, cannot use it as"
8131
                                     " secondary" %
8132
                                     (self.op.remote_node, i.name),
8133
                                     errors.ECODE_INVAL)
8134
        result.append([i.name, self.op.remote_node])
8135
    else:
8136
      ial = IAllocator(self.cfg, self.rpc,
8137
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8138
                       evac_nodes=self.op.nodes)
8139
      ial.Run(self.op.iallocator, validate=True)
8140
      if not ial.success:
8141
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8142
                                 errors.ECODE_NORES)
8143
      result = ial.result
8144
    return result
8145

    
8146

    
8147
class LUGrowDisk(LogicalUnit):
8148
  """Grow a disk of an instance.
8149

8150
  """
8151
  HPATH = "disk-grow"
8152
  HTYPE = constants.HTYPE_INSTANCE
8153
  _OP_REQP = [
8154
    ("instance_name", _TNEString),
8155
    ("disk", _TInt),
8156
    ("amount", _TInt),
8157
    ("wait_for_sync", _TBool),
8158
    ]
8159
  REQ_BGL = False
8160

    
8161
  def ExpandNames(self):
8162
    self._ExpandAndLockInstance()
8163
    self.needed_locks[locking.LEVEL_NODE] = []
8164
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8165

    
8166
  def DeclareLocks(self, level):
8167
    if level == locking.LEVEL_NODE:
8168
      self._LockInstancesNodes()
8169

    
8170
  def BuildHooksEnv(self):
8171
    """Build hooks env.
8172

8173
    This runs on the master, the primary and all the secondaries.
8174

8175
    """
8176
    env = {
8177
      "DISK": self.op.disk,
8178
      "AMOUNT": self.op.amount,
8179
      }
8180
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8181
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8182
    return env, nl, nl
8183

    
8184
  def CheckPrereq(self):
8185
    """Check prerequisites.
8186

8187
    This checks that the instance is in the cluster.
8188

8189
    """
8190
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8191
    assert instance is not None, \
8192
      "Cannot retrieve locked instance %s" % self.op.instance_name
8193
    nodenames = list(instance.all_nodes)
8194
    for node in nodenames:
8195
      _CheckNodeOnline(self, node)
8196

    
8197
    self.instance = instance
8198

    
8199
    if instance.disk_template not in constants.DTS_GROWABLE:
8200
      raise errors.OpPrereqError("Instance's disk layout does not support"
8201
                                 " growing.", errors.ECODE_INVAL)
8202

    
8203
    self.disk = instance.FindDisk(self.op.disk)
8204

    
8205
    if instance.disk_template != constants.DT_FILE:
8206
      # TODO: check the free disk space for file, when that feature will be
8207
      # supported
8208
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8209

    
8210
  def Exec(self, feedback_fn):
8211
    """Execute disk grow.
8212

8213
    """
8214
    instance = self.instance
8215
    disk = self.disk
8216

    
8217
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8218
    if not disks_ok:
8219
      raise errors.OpExecError("Cannot activate block device to grow")
8220

    
8221
    for node in instance.all_nodes:
8222
      self.cfg.SetDiskID(disk, node)
8223
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8224
      result.Raise("Grow request failed to node %s" % node)
8225

    
8226
      # TODO: Rewrite code to work properly
8227
      # DRBD goes into sync mode for a short amount of time after executing the
8228
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8229
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8230
      # time is a work-around.
8231
      time.sleep(5)
8232

    
8233
    disk.RecordGrow(self.op.amount)
8234
    self.cfg.Update(instance, feedback_fn)
8235
    if self.op.wait_for_sync:
8236
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8237
      if disk_abort:
8238
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8239
                             " status.\nPlease check the instance.")
8240
      if not instance.admin_up:
8241
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8242
    elif not instance.admin_up:
8243
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8244
                           " not supposed to be running because no wait for"
8245
                           " sync mode was requested.")
8246

    
8247

    
8248
class LUQueryInstanceData(NoHooksLU):
8249
  """Query runtime instance data.
8250

8251
  """
8252
  _OP_REQP = [
8253
    ("instances", _TListOf(_TNEString)),
8254
    ("static", _TBool),
8255
    ]
8256
  REQ_BGL = False
8257

    
8258
  def ExpandNames(self):
8259
    self.needed_locks = {}
8260
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8261

    
8262
    if self.op.instances:
8263
      self.wanted_names = []
8264
      for name in self.op.instances:
8265
        full_name = _ExpandInstanceName(self.cfg, name)
8266
        self.wanted_names.append(full_name)
8267
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8268
    else:
8269
      self.wanted_names = None
8270
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8271

    
8272
    self.needed_locks[locking.LEVEL_NODE] = []
8273
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8274

    
8275
  def DeclareLocks(self, level):
8276
    if level == locking.LEVEL_NODE:
8277
      self._LockInstancesNodes()
8278

    
8279
  def CheckPrereq(self):
8280
    """Check prerequisites.
8281

8282
    This only checks the optional instance list against the existing names.
8283

8284
    """
8285
    if self.wanted_names is None:
8286
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8287

    
8288
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8289
                             in self.wanted_names]
8290

    
8291
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8292
    """Returns the status of a block device
8293

8294
    """
8295
    if self.op.static or not node:
8296
      return None
8297

    
8298
    self.cfg.SetDiskID(dev, node)
8299

    
8300
    result = self.rpc.call_blockdev_find(node, dev)
8301
    if result.offline:
8302
      return None
8303

    
8304
    result.Raise("Can't compute disk status for %s" % instance_name)
8305

    
8306
    status = result.payload
8307
    if status is None:
8308
      return None
8309

    
8310
    return (status.dev_path, status.major, status.minor,
8311
            status.sync_percent, status.estimated_time,
8312
            status.is_degraded, status.ldisk_status)
8313

    
8314
  def _ComputeDiskStatus(self, instance, snode, dev):
8315
    """Compute block device status.
8316

8317
    """
8318
    if dev.dev_type in constants.LDS_DRBD:
8319
      # we change the snode then (otherwise we use the one passed in)
8320
      if dev.logical_id[0] == instance.primary_node:
8321
        snode = dev.logical_id[1]
8322
      else:
8323
        snode = dev.logical_id[0]
8324

    
8325
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8326
                                              instance.name, dev)
8327
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8328

    
8329
    if dev.children:
8330
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8331
                      for child in dev.children]
8332
    else:
8333
      dev_children = []
8334

    
8335
    data = {
8336
      "iv_name": dev.iv_name,
8337
      "dev_type": dev.dev_type,
8338
      "logical_id": dev.logical_id,
8339
      "physical_id": dev.physical_id,
8340
      "pstatus": dev_pstatus,
8341
      "sstatus": dev_sstatus,
8342
      "children": dev_children,
8343
      "mode": dev.mode,
8344
      "size": dev.size,
8345
      }
8346

    
8347
    return data
8348

    
8349
  def Exec(self, feedback_fn):
8350
    """Gather and return data"""
8351
    result = {}
8352

    
8353
    cluster = self.cfg.GetClusterInfo()
8354

    
8355
    for instance in self.wanted_instances:
8356
      if not self.op.static:
8357
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8358
                                                  instance.name,
8359
                                                  instance.hypervisor)
8360
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8361
        remote_info = remote_info.payload
8362
        if remote_info and "state" in remote_info:
8363
          remote_state = "up"
8364
        else:
8365
          remote_state = "down"
8366
      else:
8367
        remote_state = None
8368
      if instance.admin_up:
8369
        config_state = "up"
8370
      else:
8371
        config_state = "down"
8372

    
8373
      disks = [self._ComputeDiskStatus(instance, None, device)
8374
               for device in instance.disks]
8375

    
8376
      idict = {
8377
        "name": instance.name,
8378
        "config_state": config_state,
8379
        "run_state": remote_state,
8380
        "pnode": instance.primary_node,
8381
        "snodes": instance.secondary_nodes,
8382
        "os": instance.os,
8383
        # this happens to be the same format used for hooks
8384
        "nics": _NICListToTuple(self, instance.nics),
8385
        "disk_template": instance.disk_template,
8386
        "disks": disks,
8387
        "hypervisor": instance.hypervisor,
8388
        "network_port": instance.network_port,
8389
        "hv_instance": instance.hvparams,
8390
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8391
        "be_instance": instance.beparams,
8392
        "be_actual": cluster.FillBE(instance),
8393
        "os_instance": instance.osparams,
8394
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8395
        "serial_no": instance.serial_no,
8396
        "mtime": instance.mtime,
8397
        "ctime": instance.ctime,
8398
        "uuid": instance.uuid,
8399
        }
8400

    
8401
      result[instance.name] = idict
8402

    
8403
    return result
8404

    
8405

    
8406
class LUSetInstanceParams(LogicalUnit):
8407
  """Modifies an instances's parameters.
8408

8409
  """
8410
  HPATH = "instance-modify"
8411
  HTYPE = constants.HTYPE_INSTANCE
8412
  _OP_REQP = [("instance_name", _TNEString)]
8413
  _OP_DEFS = [
8414
    ("nics", _EmptyList),
8415
    ("disks", _EmptyList),
8416
    ("beparams", _EmptyDict),
8417
    ("hvparams", _EmptyDict),
8418
    ("disk_template", None),
8419
    ("remote_node", None),
8420
    ("os_name", None),
8421
    ("force_variant", False),
8422
    ("osparams", None),
8423
    ("force", False),
8424
    ]
8425
  REQ_BGL = False
8426

    
8427
  def CheckArguments(self):
8428
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8429
            self.op.hvparams or self.op.beparams or self.op.os_name):
8430
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8431

    
8432
    if self.op.hvparams:
8433
      _CheckGlobalHvParams(self.op.hvparams)
8434

    
8435
    # Disk validation
8436
    disk_addremove = 0
8437
    for disk_op, disk_dict in self.op.disks:
8438
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8439
      if disk_op == constants.DDM_REMOVE:
8440
        disk_addremove += 1
8441
        continue
8442
      elif disk_op == constants.DDM_ADD:
8443
        disk_addremove += 1
8444
      else:
8445
        if not isinstance(disk_op, int):
8446
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8447
        if not isinstance(disk_dict, dict):
8448
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8449
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8450

    
8451
      if disk_op == constants.DDM_ADD:
8452
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8453
        if mode not in constants.DISK_ACCESS_SET:
8454
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8455
                                     errors.ECODE_INVAL)
8456
        size = disk_dict.get('size', None)
8457
        if size is None:
8458
          raise errors.OpPrereqError("Required disk parameter size missing",
8459
                                     errors.ECODE_INVAL)
8460
        try:
8461
          size = int(size)
8462
        except (TypeError, ValueError), err:
8463
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8464
                                     str(err), errors.ECODE_INVAL)
8465
        disk_dict['size'] = size
8466
      else:
8467
        # modification of disk
8468
        if 'size' in disk_dict:
8469
          raise errors.OpPrereqError("Disk size change not possible, use"
8470
                                     " grow-disk", errors.ECODE_INVAL)
8471

    
8472
    if disk_addremove > 1:
8473
      raise errors.OpPrereqError("Only one disk add or remove operation"
8474
                                 " supported at a time", errors.ECODE_INVAL)
8475

    
8476
    if self.op.disks and self.op.disk_template is not None:
8477
      raise errors.OpPrereqError("Disk template conversion and other disk"
8478
                                 " changes not supported at the same time",
8479
                                 errors.ECODE_INVAL)
8480

    
8481
    if self.op.disk_template:
8482
      _CheckDiskTemplate(self.op.disk_template)
8483
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8484
          self.op.remote_node is None):
8485
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8486
                                   " one requires specifying a secondary node",
8487
                                   errors.ECODE_INVAL)
8488

    
8489
    # NIC validation
8490
    nic_addremove = 0
8491
    for nic_op, nic_dict in self.op.nics:
8492
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8493
      if nic_op == constants.DDM_REMOVE:
8494
        nic_addremove += 1
8495
        continue
8496
      elif nic_op == constants.DDM_ADD:
8497
        nic_addremove += 1
8498
      else:
8499
        if not isinstance(nic_op, int):
8500
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8501
        if not isinstance(nic_dict, dict):
8502
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8503
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8504

    
8505
      # nic_dict should be a dict
8506
      nic_ip = nic_dict.get('ip', None)
8507
      if nic_ip is not None:
8508
        if nic_ip.lower() == constants.VALUE_NONE:
8509
          nic_dict['ip'] = None
8510
        else:
8511
          if not utils.IsValidIP(nic_ip):
8512
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8513
                                       errors.ECODE_INVAL)
8514

    
8515
      nic_bridge = nic_dict.get('bridge', None)
8516
      nic_link = nic_dict.get('link', None)
8517
      if nic_bridge and nic_link:
8518
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8519
                                   " at the same time", errors.ECODE_INVAL)
8520
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8521
        nic_dict['bridge'] = None
8522
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8523
        nic_dict['link'] = None
8524

    
8525
      if nic_op == constants.DDM_ADD:
8526
        nic_mac = nic_dict.get('mac', None)
8527
        if nic_mac is None:
8528
          nic_dict['mac'] = constants.VALUE_AUTO
8529

    
8530
      if 'mac' in nic_dict:
8531
        nic_mac = nic_dict['mac']
8532
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8533
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8534

    
8535
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8536
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8537
                                     " modifying an existing nic",
8538
                                     errors.ECODE_INVAL)
8539

    
8540
    if nic_addremove > 1:
8541
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8542
                                 " supported at a time", errors.ECODE_INVAL)
8543

    
8544
  def ExpandNames(self):
8545
    self._ExpandAndLockInstance()
8546
    self.needed_locks[locking.LEVEL_NODE] = []
8547
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8548

    
8549
  def DeclareLocks(self, level):
8550
    if level == locking.LEVEL_NODE:
8551
      self._LockInstancesNodes()
8552
      if self.op.disk_template and self.op.remote_node:
8553
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8554
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8555

    
8556
  def BuildHooksEnv(self):
8557
    """Build hooks env.
8558

8559
    This runs on the master, primary and secondaries.
8560

8561
    """
8562
    args = dict()
8563
    if constants.BE_MEMORY in self.be_new:
8564
      args['memory'] = self.be_new[constants.BE_MEMORY]
8565
    if constants.BE_VCPUS in self.be_new:
8566
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8567
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8568
    # information at all.
8569
    if self.op.nics:
8570
      args['nics'] = []
8571
      nic_override = dict(self.op.nics)
8572
      for idx, nic in enumerate(self.instance.nics):
8573
        if idx in nic_override:
8574
          this_nic_override = nic_override[idx]
8575
        else:
8576
          this_nic_override = {}
8577
        if 'ip' in this_nic_override:
8578
          ip = this_nic_override['ip']
8579
        else:
8580
          ip = nic.ip
8581
        if 'mac' in this_nic_override:
8582
          mac = this_nic_override['mac']
8583
        else:
8584
          mac = nic.mac
8585
        if idx in self.nic_pnew:
8586
          nicparams = self.nic_pnew[idx]
8587
        else:
8588
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8589
        mode = nicparams[constants.NIC_MODE]
8590
        link = nicparams[constants.NIC_LINK]
8591
        args['nics'].append((ip, mac, mode, link))
8592
      if constants.DDM_ADD in nic_override:
8593
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8594
        mac = nic_override[constants.DDM_ADD]['mac']
8595
        nicparams = self.nic_pnew[constants.DDM_ADD]
8596
        mode = nicparams[constants.NIC_MODE]
8597
        link = nicparams[constants.NIC_LINK]
8598
        args['nics'].append((ip, mac, mode, link))
8599
      elif constants.DDM_REMOVE in nic_override:
8600
        del args['nics'][-1]
8601

    
8602
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8603
    if self.op.disk_template:
8604
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8605
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8606
    return env, nl, nl
8607

    
8608
  def CheckPrereq(self):
8609
    """Check prerequisites.
8610

8611
    This only checks the instance list against the existing names.
8612

8613
    """
8614
    # checking the new params on the primary/secondary nodes
8615

    
8616
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8617
    cluster = self.cluster = self.cfg.GetClusterInfo()
8618
    assert self.instance is not None, \
8619
      "Cannot retrieve locked instance %s" % self.op.instance_name
8620
    pnode = instance.primary_node
8621
    nodelist = list(instance.all_nodes)
8622

    
8623
    # OS change
8624
    if self.op.os_name and not self.op.force:
8625
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8626
                      self.op.force_variant)
8627
      instance_os = self.op.os_name
8628
    else:
8629
      instance_os = instance.os
8630

    
8631
    if self.op.disk_template:
8632
      if instance.disk_template == self.op.disk_template:
8633
        raise errors.OpPrereqError("Instance already has disk template %s" %
8634
                                   instance.disk_template, errors.ECODE_INVAL)
8635

    
8636
      if (instance.disk_template,
8637
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8638
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8639
                                   " %s to %s" % (instance.disk_template,
8640
                                                  self.op.disk_template),
8641
                                   errors.ECODE_INVAL)
8642
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8643
        _CheckNodeOnline(self, self.op.remote_node)
8644
        _CheckNodeNotDrained(self, self.op.remote_node)
8645
        disks = [{"size": d.size} for d in instance.disks]
8646
        required = _ComputeDiskSize(self.op.disk_template, disks)
8647
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8648
        _CheckInstanceDown(self, instance, "cannot change disk template")
8649

    
8650
    # hvparams processing
8651
    if self.op.hvparams:
8652
      hv_type = instance.hypervisor
8653
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8654
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8655
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8656

    
8657
      # local check
8658
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8659
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8660
      self.hv_new = hv_new # the new actual values
8661
      self.hv_inst = i_hvdict # the new dict (without defaults)
8662
    else:
8663
      self.hv_new = self.hv_inst = {}
8664

    
8665
    # beparams processing
8666
    if self.op.beparams:
8667
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8668
                                   use_none=True)
8669
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8670
      be_new = cluster.SimpleFillBE(i_bedict)
8671
      self.be_new = be_new # the new actual values
8672
      self.be_inst = i_bedict # the new dict (without defaults)
8673
    else:
8674
      self.be_new = self.be_inst = {}
8675

    
8676
    # osparams processing
8677
    if self.op.osparams:
8678
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8679
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8680
      self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8681
      self.os_inst = i_osdict # the new dict (without defaults)
8682
    else:
8683
      self.os_new = self.os_inst = {}
8684

    
8685
    self.warn = []
8686

    
8687
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8688
      mem_check_list = [pnode]
8689
      if be_new[constants.BE_AUTO_BALANCE]:
8690
        # either we changed auto_balance to yes or it was from before
8691
        mem_check_list.extend(instance.secondary_nodes)
8692
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8693
                                                  instance.hypervisor)
8694
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8695
                                         instance.hypervisor)
8696
      pninfo = nodeinfo[pnode]
8697
      msg = pninfo.fail_msg
8698
      if msg:
8699
        # Assume the primary node is unreachable and go ahead
8700
        self.warn.append("Can't get info from primary node %s: %s" %
8701
                         (pnode,  msg))
8702
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8703
        self.warn.append("Node data from primary node %s doesn't contain"
8704
                         " free memory information" % pnode)
8705
      elif instance_info.fail_msg:
8706
        self.warn.append("Can't get instance runtime information: %s" %
8707
                        instance_info.fail_msg)
8708
      else:
8709
        if instance_info.payload:
8710
          current_mem = int(instance_info.payload['memory'])
8711
        else:
8712
          # Assume instance not running
8713
          # (there is a slight race condition here, but it's not very probable,
8714
          # and we have no other way to check)
8715
          current_mem = 0
8716
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8717
                    pninfo.payload['memory_free'])
8718
        if miss_mem > 0:
8719
          raise errors.OpPrereqError("This change will prevent the instance"
8720
                                     " from starting, due to %d MB of memory"
8721
                                     " missing on its primary node" % miss_mem,
8722
                                     errors.ECODE_NORES)
8723

    
8724
      if be_new[constants.BE_AUTO_BALANCE]:
8725
        for node, nres in nodeinfo.items():
8726
          if node not in instance.secondary_nodes:
8727
            continue
8728
          msg = nres.fail_msg
8729
          if msg:
8730
            self.warn.append("Can't get info from secondary node %s: %s" %
8731
                             (node, msg))
8732
          elif not isinstance(nres.payload.get('memory_free', None), int):
8733
            self.warn.append("Secondary node %s didn't return free"
8734
                             " memory information" % node)
8735
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8736
            self.warn.append("Not enough memory to failover instance to"
8737
                             " secondary node %s" % node)
8738

    
8739
    # NIC processing
8740
    self.nic_pnew = {}
8741
    self.nic_pinst = {}
8742
    for nic_op, nic_dict in self.op.nics:
8743
      if nic_op == constants.DDM_REMOVE:
8744
        if not instance.nics:
8745
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8746
                                     errors.ECODE_INVAL)
8747
        continue
8748
      if nic_op != constants.DDM_ADD:
8749
        # an existing nic
8750
        if not instance.nics:
8751
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8752
                                     " no NICs" % nic_op,
8753
                                     errors.ECODE_INVAL)
8754
        if nic_op < 0 or nic_op >= len(instance.nics):
8755
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8756
                                     " are 0 to %d" %
8757
                                     (nic_op, len(instance.nics) - 1),
8758
                                     errors.ECODE_INVAL)
8759
        old_nic_params = instance.nics[nic_op].nicparams
8760
        old_nic_ip = instance.nics[nic_op].ip
8761
      else:
8762
        old_nic_params = {}
8763
        old_nic_ip = None
8764

    
8765
      update_params_dict = dict([(key, nic_dict[key])
8766
                                 for key in constants.NICS_PARAMETERS
8767
                                 if key in nic_dict])
8768

    
8769
      if 'bridge' in nic_dict:
8770
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8771

    
8772
      new_nic_params = _GetUpdatedParams(old_nic_params,
8773
                                         update_params_dict)
8774
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8775
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8776
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8777
      self.nic_pinst[nic_op] = new_nic_params
8778
      self.nic_pnew[nic_op] = new_filled_nic_params
8779
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8780

    
8781
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8782
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8783
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8784
        if msg:
8785
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8786
          if self.op.force:
8787
            self.warn.append(msg)
8788
          else:
8789
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8790
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8791
        if 'ip' in nic_dict:
8792
          nic_ip = nic_dict['ip']
8793
        else:
8794
          nic_ip = old_nic_ip
8795
        if nic_ip is None:
8796
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8797
                                     ' on a routed nic', errors.ECODE_INVAL)
8798
      if 'mac' in nic_dict:
8799
        nic_mac = nic_dict['mac']
8800
        if nic_mac is None:
8801
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8802
                                     errors.ECODE_INVAL)
8803
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8804
          # otherwise generate the mac
8805
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8806
        else:
8807
          # or validate/reserve the current one
8808
          try:
8809
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8810
          except errors.ReservationError:
8811
            raise errors.OpPrereqError("MAC address %s already in use"
8812
                                       " in cluster" % nic_mac,
8813
                                       errors.ECODE_NOTUNIQUE)
8814

    
8815
    # DISK processing
8816
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8817
      raise errors.OpPrereqError("Disk operations not supported for"
8818
                                 " diskless instances",
8819
                                 errors.ECODE_INVAL)
8820
    for disk_op, _ in self.op.disks:
8821
      if disk_op == constants.DDM_REMOVE:
8822
        if len(instance.disks) == 1:
8823
          raise errors.OpPrereqError("Cannot remove the last disk of"
8824
                                     " an instance", errors.ECODE_INVAL)
8825
        _CheckInstanceDown(self, instance, "cannot remove disks")
8826

    
8827
      if (disk_op == constants.DDM_ADD and
8828
          len(instance.nics) >= constants.MAX_DISKS):
8829
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8830
                                   " add more" % constants.MAX_DISKS,
8831
                                   errors.ECODE_STATE)
8832
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8833
        # an existing disk
8834
        if disk_op < 0 or disk_op >= len(instance.disks):
8835
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8836
                                     " are 0 to %d" %
8837
                                     (disk_op, len(instance.disks)),
8838
                                     errors.ECODE_INVAL)
8839

    
8840
    return
8841

    
8842
  def _ConvertPlainToDrbd(self, feedback_fn):
8843
    """Converts an instance from plain to drbd.
8844

8845
    """
8846
    feedback_fn("Converting template to drbd")
8847
    instance = self.instance
8848
    pnode = instance.primary_node
8849
    snode = self.op.remote_node
8850

    
8851
    # create a fake disk info for _GenerateDiskTemplate
8852
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8853
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8854
                                      instance.name, pnode, [snode],
8855
                                      disk_info, None, None, 0)
8856
    info = _GetInstanceInfoText(instance)
8857
    feedback_fn("Creating aditional volumes...")
8858
    # first, create the missing data and meta devices
8859
    for disk in new_disks:
8860
      # unfortunately this is... not too nice
8861
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8862
                            info, True)
8863
      for child in disk.children:
8864
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8865
    # at this stage, all new LVs have been created, we can rename the
8866
    # old ones
8867
    feedback_fn("Renaming original volumes...")
8868
    rename_list = [(o, n.children[0].logical_id)
8869
                   for (o, n) in zip(instance.disks, new_disks)]
8870
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8871
    result.Raise("Failed to rename original LVs")
8872

    
8873
    feedback_fn("Initializing DRBD devices...")
8874
    # all child devices are in place, we can now create the DRBD devices
8875
    for disk in new_disks:
8876
      for node in [pnode, snode]:
8877
        f_create = node == pnode
8878
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8879

    
8880
    # at this point, the instance has been modified
8881
    instance.disk_template = constants.DT_DRBD8
8882
    instance.disks = new_disks
8883
    self.cfg.Update(instance, feedback_fn)
8884

    
8885
    # disks are created, waiting for sync
8886
    disk_abort = not _WaitForSync(self, instance)
8887
    if disk_abort:
8888
      raise errors.OpExecError("There are some degraded disks for"
8889
                               " this instance, please cleanup manually")
8890

    
8891
  def _ConvertDrbdToPlain(self, feedback_fn):
8892
    """Converts an instance from drbd to plain.
8893

8894
    """
8895
    instance = self.instance
8896
    assert len(instance.secondary_nodes) == 1
8897
    pnode = instance.primary_node
8898
    snode = instance.secondary_nodes[0]
8899
    feedback_fn("Converting template to plain")
8900

    
8901
    old_disks = instance.disks
8902
    new_disks = [d.children[0] for d in old_disks]
8903

    
8904
    # copy over size and mode
8905
    for parent, child in zip(old_disks, new_disks):
8906
      child.size = parent.size
8907
      child.mode = parent.mode
8908

    
8909
    # update instance structure
8910
    instance.disks = new_disks
8911
    instance.disk_template = constants.DT_PLAIN
8912
    self.cfg.Update(instance, feedback_fn)
8913

    
8914
    feedback_fn("Removing volumes on the secondary node...")
8915
    for disk in old_disks:
8916
      self.cfg.SetDiskID(disk, snode)
8917
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8918
      if msg:
8919
        self.LogWarning("Could not remove block device %s on node %s,"
8920
                        " continuing anyway: %s", disk.iv_name, snode, msg)
8921

    
8922
    feedback_fn("Removing unneeded volumes on the primary node...")
8923
    for idx, disk in enumerate(old_disks):
8924
      meta = disk.children[1]
8925
      self.cfg.SetDiskID(meta, pnode)
8926
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8927
      if msg:
8928
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
8929
                        " continuing anyway: %s", idx, pnode, msg)
8930

    
8931

    
8932
  def Exec(self, feedback_fn):
8933
    """Modifies an instance.
8934

8935
    All parameters take effect only at the next restart of the instance.
8936

8937
    """
8938
    # Process here the warnings from CheckPrereq, as we don't have a
8939
    # feedback_fn there.
8940
    for warn in self.warn:
8941
      feedback_fn("WARNING: %s" % warn)
8942

    
8943
    result = []
8944
    instance = self.instance
8945
    # disk changes
8946
    for disk_op, disk_dict in self.op.disks:
8947
      if disk_op == constants.DDM_REMOVE:
8948
        # remove the last disk
8949
        device = instance.disks.pop()
8950
        device_idx = len(instance.disks)
8951
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8952
          self.cfg.SetDiskID(disk, node)
8953
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8954
          if msg:
8955
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8956
                            " continuing anyway", device_idx, node, msg)
8957
        result.append(("disk/%d" % device_idx, "remove"))
8958
      elif disk_op == constants.DDM_ADD:
8959
        # add a new disk
8960
        if instance.disk_template == constants.DT_FILE:
8961
          file_driver, file_path = instance.disks[0].logical_id
8962
          file_path = os.path.dirname(file_path)
8963
        else:
8964
          file_driver = file_path = None
8965
        disk_idx_base = len(instance.disks)
8966
        new_disk = _GenerateDiskTemplate(self,
8967
                                         instance.disk_template,
8968
                                         instance.name, instance.primary_node,
8969
                                         instance.secondary_nodes,
8970
                                         [disk_dict],
8971
                                         file_path,
8972
                                         file_driver,
8973
                                         disk_idx_base)[0]
8974
        instance.disks.append(new_disk)
8975
        info = _GetInstanceInfoText(instance)
8976

    
8977
        logging.info("Creating volume %s for instance %s",
8978
                     new_disk.iv_name, instance.name)
8979
        # Note: this needs to be kept in sync with _CreateDisks
8980
        #HARDCODE
8981
        for node in instance.all_nodes:
8982
          f_create = node == instance.primary_node
8983
          try:
8984
            _CreateBlockDev(self, node, instance, new_disk,
8985
                            f_create, info, f_create)
8986
          except errors.OpExecError, err:
8987
            self.LogWarning("Failed to create volume %s (%s) on"
8988
                            " node %s: %s",
8989
                            new_disk.iv_name, new_disk, node, err)
8990
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8991
                       (new_disk.size, new_disk.mode)))
8992
      else:
8993
        # change a given disk
8994
        instance.disks[disk_op].mode = disk_dict['mode']
8995
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8996

    
8997
    if self.op.disk_template:
8998
      r_shut = _ShutdownInstanceDisks(self, instance)
8999
      if not r_shut:
9000
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9001
                                 " proceed with disk template conversion")
9002
      mode = (instance.disk_template, self.op.disk_template)
9003
      try:
9004
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9005
      except:
9006
        self.cfg.ReleaseDRBDMinors(instance.name)
9007
        raise
9008
      result.append(("disk_template", self.op.disk_template))
9009

    
9010
    # NIC changes
9011
    for nic_op, nic_dict in self.op.nics:
9012
      if nic_op == constants.DDM_REMOVE:
9013
        # remove the last nic
9014
        del instance.nics[-1]
9015
        result.append(("nic.%d" % len(instance.nics), "remove"))
9016
      elif nic_op == constants.DDM_ADD:
9017
        # mac and bridge should be set, by now
9018
        mac = nic_dict['mac']
9019
        ip = nic_dict.get('ip', None)
9020
        nicparams = self.nic_pinst[constants.DDM_ADD]
9021
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9022
        instance.nics.append(new_nic)
9023
        result.append(("nic.%d" % (len(instance.nics) - 1),
9024
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9025
                       (new_nic.mac, new_nic.ip,
9026
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9027
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9028
                       )))
9029
      else:
9030
        for key in 'mac', 'ip':
9031
          if key in nic_dict:
9032
            setattr(instance.nics[nic_op], key, nic_dict[key])
9033
        if nic_op in self.nic_pinst:
9034
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9035
        for key, val in nic_dict.iteritems():
9036
          result.append(("nic.%s/%d" % (key, nic_op), val))
9037

    
9038
    # hvparams changes
9039
    if self.op.hvparams:
9040
      instance.hvparams = self.hv_inst
9041
      for key, val in self.op.hvparams.iteritems():
9042
        result.append(("hv/%s" % key, val))
9043

    
9044
    # beparams changes
9045
    if self.op.beparams:
9046
      instance.beparams = self.be_inst
9047
      for key, val in self.op.beparams.iteritems():
9048
        result.append(("be/%s" % key, val))
9049

    
9050
    # OS change
9051
    if self.op.os_name:
9052
      instance.os = self.op.os_name
9053

    
9054
    # osparams changes
9055
    if self.op.osparams:
9056
      instance.osparams = self.os_inst
9057
      for key, val in self.op.osparams.iteritems():
9058
        result.append(("os/%s" % key, val))
9059

    
9060
    self.cfg.Update(instance, feedback_fn)
9061

    
9062
    return result
9063

    
9064
  _DISK_CONVERSIONS = {
9065
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9066
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9067
    }
9068

    
9069

    
9070
class LUQueryExports(NoHooksLU):
9071
  """Query the exports list
9072

9073
  """
9074
  _OP_REQP = [("nodes", _TListOf(_TNEString))]
9075
  REQ_BGL = False
9076

    
9077
  def ExpandNames(self):
9078
    self.needed_locks = {}
9079
    self.share_locks[locking.LEVEL_NODE] = 1
9080
    if not self.op.nodes:
9081
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9082
    else:
9083
      self.needed_locks[locking.LEVEL_NODE] = \
9084
        _GetWantedNodes(self, self.op.nodes)
9085

    
9086
  def Exec(self, feedback_fn):
9087
    """Compute the list of all the exported system images.
9088

9089
    @rtype: dict
9090
    @return: a dictionary with the structure node->(export-list)
9091
        where export-list is a list of the instances exported on
9092
        that node.
9093

9094
    """
9095
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9096
    rpcresult = self.rpc.call_export_list(self.nodes)
9097
    result = {}
9098
    for node in rpcresult:
9099
      if rpcresult[node].fail_msg:
9100
        result[node] = False
9101
      else:
9102
        result[node] = rpcresult[node].payload
9103

    
9104
    return result
9105

    
9106

    
9107
class LUPrepareExport(NoHooksLU):
9108
  """Prepares an instance for an export and returns useful information.
9109

9110
  """
9111
  _OP_REQP = [
9112
    ("instance_name", _TNEString),
9113
    ("mode", _TElemOf(constants.EXPORT_MODES)),
9114
    ]
9115
  REQ_BGL = False
9116

    
9117
  def ExpandNames(self):
9118
    self._ExpandAndLockInstance()
9119

    
9120
  def CheckPrereq(self):
9121
    """Check prerequisites.
9122

9123
    """
9124
    instance_name = self.op.instance_name
9125

    
9126
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9127
    assert self.instance is not None, \
9128
          "Cannot retrieve locked instance %s" % self.op.instance_name
9129
    _CheckNodeOnline(self, self.instance.primary_node)
9130

    
9131
    self._cds = _GetClusterDomainSecret()
9132

    
9133
  def Exec(self, feedback_fn):
9134
    """Prepares an instance for an export.
9135

9136
    """
9137
    instance = self.instance
9138

    
9139
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9140
      salt = utils.GenerateSecret(8)
9141

    
9142
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9143
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9144
                                              constants.RIE_CERT_VALIDITY)
9145
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9146

    
9147
      (name, cert_pem) = result.payload
9148

    
9149
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9150
                                             cert_pem)
9151

    
9152
      return {
9153
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9154
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9155
                          salt),
9156
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9157
        }
9158

    
9159
    return None
9160

    
9161

    
9162
class LUExportInstance(LogicalUnit):
9163
  """Export an instance to an image in the cluster.
9164

9165
  """
9166
  HPATH = "instance-export"
9167
  HTYPE = constants.HTYPE_INSTANCE
9168
  _OP_REQP = [
9169
    ("instance_name", _TNEString),
9170
    ("target_node", _TNEString),
9171
    ("shutdown", _TBool),
9172
    ("mode", _TElemOf(constants.EXPORT_MODES)),
9173
    ]
9174
  _OP_DEFS = [
9175
    ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT),
9176
    ("remove_instance", False),
9177
    ("ignore_remove_failures", False),
9178
    ("mode", constants.EXPORT_MODE_LOCAL),
9179
    ("x509_key_name", None),
9180
    ("destination_x509_ca", None),
9181
    ]
9182
  REQ_BGL = False
9183

    
9184
  def CheckArguments(self):
9185
    """Check the arguments.
9186

9187
    """
9188
    self.x509_key_name = self.op.x509_key_name
9189
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9190

    
9191
    if self.op.remove_instance and not self.op.shutdown:
9192
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9193
                                 " down before")
9194

    
9195
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9196
      if not self.x509_key_name:
9197
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9198
                                   errors.ECODE_INVAL)
9199

    
9200
      if not self.dest_x509_ca_pem:
9201
        raise errors.OpPrereqError("Missing destination X509 CA",
9202
                                   errors.ECODE_INVAL)
9203

    
9204
  def ExpandNames(self):
9205
    self._ExpandAndLockInstance()
9206

    
9207
    # Lock all nodes for local exports
9208
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9209
      # FIXME: lock only instance primary and destination node
9210
      #
9211
      # Sad but true, for now we have do lock all nodes, as we don't know where
9212
      # the previous export might be, and in this LU we search for it and
9213
      # remove it from its current node. In the future we could fix this by:
9214
      #  - making a tasklet to search (share-lock all), then create the
9215
      #    new one, then one to remove, after
9216
      #  - removing the removal operation altogether
9217
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9218

    
9219
  def DeclareLocks(self, level):
9220
    """Last minute lock declaration."""
9221
    # All nodes are locked anyway, so nothing to do here.
9222

    
9223
  def BuildHooksEnv(self):
9224
    """Build hooks env.
9225

9226
    This will run on the master, primary node and target node.
9227

9228
    """
9229
    env = {
9230
      "EXPORT_MODE": self.op.mode,
9231
      "EXPORT_NODE": self.op.target_node,
9232
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9233
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9234
      # TODO: Generic function for boolean env variables
9235
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9236
      }
9237

    
9238
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9239

    
9240
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9241

    
9242
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9243
      nl.append(self.op.target_node)
9244

    
9245
    return env, nl, nl
9246

    
9247
  def CheckPrereq(self):
9248
    """Check prerequisites.
9249

9250
    This checks that the instance and node names are valid.
9251

9252
    """
9253
    instance_name = self.op.instance_name
9254

    
9255
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9256
    assert self.instance is not None, \
9257
          "Cannot retrieve locked instance %s" % self.op.instance_name
9258
    _CheckNodeOnline(self, self.instance.primary_node)
9259

    
9260
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9261
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9262
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9263
      assert self.dst_node is not None
9264

    
9265
      _CheckNodeOnline(self, self.dst_node.name)
9266
      _CheckNodeNotDrained(self, self.dst_node.name)
9267

    
9268
      self._cds = None
9269
      self.dest_disk_info = None
9270
      self.dest_x509_ca = None
9271

    
9272
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9273
      self.dst_node = None
9274

    
9275
      if len(self.op.target_node) != len(self.instance.disks):
9276
        raise errors.OpPrereqError(("Received destination information for %s"
9277
                                    " disks, but instance %s has %s disks") %
9278
                                   (len(self.op.target_node), instance_name,
9279
                                    len(self.instance.disks)),
9280
                                   errors.ECODE_INVAL)
9281

    
9282
      cds = _GetClusterDomainSecret()
9283

    
9284
      # Check X509 key name
9285
      try:
9286
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9287
      except (TypeError, ValueError), err:
9288
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9289

    
9290
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9291
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9292
                                   errors.ECODE_INVAL)
9293

    
9294
      # Load and verify CA
9295
      try:
9296
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9297
      except OpenSSL.crypto.Error, err:
9298
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9299
                                   (err, ), errors.ECODE_INVAL)
9300

    
9301
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9302
      if errcode is not None:
9303
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9304
                                   (msg, ), errors.ECODE_INVAL)
9305

    
9306
      self.dest_x509_ca = cert
9307

    
9308
      # Verify target information
9309
      disk_info = []
9310
      for idx, disk_data in enumerate(self.op.target_node):
9311
        try:
9312
          (host, port, magic) = \
9313
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9314
        except errors.GenericError, err:
9315
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9316
                                     (idx, err), errors.ECODE_INVAL)
9317

    
9318
        disk_info.append((host, port, magic))
9319

    
9320
      assert len(disk_info) == len(self.op.target_node)
9321
      self.dest_disk_info = disk_info
9322

    
9323
    else:
9324
      raise errors.ProgrammerError("Unhandled export mode %r" %
9325
                                   self.op.mode)
9326

    
9327
    # instance disk type verification
9328
    # TODO: Implement export support for file-based disks
9329
    for disk in self.instance.disks:
9330
      if disk.dev_type == constants.LD_FILE:
9331
        raise errors.OpPrereqError("Export not supported for instances with"
9332
                                   " file-based disks", errors.ECODE_INVAL)
9333

    
9334
  def _CleanupExports(self, feedback_fn):
9335
    """Removes exports of current instance from all other nodes.
9336

9337
    If an instance in a cluster with nodes A..D was exported to node C, its
9338
    exports will be removed from the nodes A, B and D.
9339

9340
    """
9341
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9342

    
9343
    nodelist = self.cfg.GetNodeList()
9344
    nodelist.remove(self.dst_node.name)
9345

    
9346
    # on one-node clusters nodelist will be empty after the removal
9347
    # if we proceed the backup would be removed because OpQueryExports
9348
    # substitutes an empty list with the full cluster node list.
9349
    iname = self.instance.name
9350
    if nodelist:
9351
      feedback_fn("Removing old exports for instance %s" % iname)
9352
      exportlist = self.rpc.call_export_list(nodelist)
9353
      for node in exportlist:
9354
        if exportlist[node].fail_msg:
9355
          continue
9356
        if iname in exportlist[node].payload:
9357
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9358
          if msg:
9359
            self.LogWarning("Could not remove older export for instance %s"
9360
                            " on node %s: %s", iname, node, msg)
9361

    
9362
  def Exec(self, feedback_fn):
9363
    """Export an instance to an image in the cluster.
9364

9365
    """
9366
    assert self.op.mode in constants.EXPORT_MODES
9367

    
9368
    instance = self.instance
9369
    src_node = instance.primary_node
9370

    
9371
    if self.op.shutdown:
9372
      # shutdown the instance, but not the disks
9373
      feedback_fn("Shutting down instance %s" % instance.name)
9374
      result = self.rpc.call_instance_shutdown(src_node, instance,
9375
                                               self.op.shutdown_timeout)
9376
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9377
      result.Raise("Could not shutdown instance %s on"
9378
                   " node %s" % (instance.name, src_node))
9379

    
9380
    # set the disks ID correctly since call_instance_start needs the
9381
    # correct drbd minor to create the symlinks
9382
    for disk in instance.disks:
9383
      self.cfg.SetDiskID(disk, src_node)
9384

    
9385
    activate_disks = (not instance.admin_up)
9386

    
9387
    if activate_disks:
9388
      # Activate the instance disks if we'exporting a stopped instance
9389
      feedback_fn("Activating disks for %s" % instance.name)
9390
      _StartInstanceDisks(self, instance, None)
9391

    
9392
    try:
9393
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9394
                                                     instance)
9395

    
9396
      helper.CreateSnapshots()
9397
      try:
9398
        if (self.op.shutdown and instance.admin_up and
9399
            not self.op.remove_instance):
9400
          assert not activate_disks
9401
          feedback_fn("Starting instance %s" % instance.name)
9402
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9403
          msg = result.fail_msg
9404
          if msg:
9405
            feedback_fn("Failed to start instance: %s" % msg)
9406
            _ShutdownInstanceDisks(self, instance)
9407
            raise errors.OpExecError("Could not start instance: %s" % msg)
9408

    
9409
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9410
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9411
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9412
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9413
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9414

    
9415
          (key_name, _, _) = self.x509_key_name
9416

    
9417
          dest_ca_pem = \
9418
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9419
                                            self.dest_x509_ca)
9420

    
9421
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9422
                                                     key_name, dest_ca_pem,
9423
                                                     timeouts)
9424
      finally:
9425
        helper.Cleanup()
9426

    
9427
      # Check for backwards compatibility
9428
      assert len(dresults) == len(instance.disks)
9429
      assert compat.all(isinstance(i, bool) for i in dresults), \
9430
             "Not all results are boolean: %r" % dresults
9431

    
9432
    finally:
9433
      if activate_disks:
9434
        feedback_fn("Deactivating disks for %s" % instance.name)
9435
        _ShutdownInstanceDisks(self, instance)
9436

    
9437
    # Remove instance if requested
9438
    if self.op.remove_instance:
9439
      if not (compat.all(dresults) and fin_resu):
9440
        feedback_fn("Not removing instance %s as parts of the export failed" %
9441
                    instance.name)
9442
      else:
9443
        feedback_fn("Removing instance %s" % instance.name)
9444
        _RemoveInstance(self, feedback_fn, instance,
9445
                        self.op.ignore_remove_failures)
9446

    
9447
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9448
      self._CleanupExports(feedback_fn)
9449

    
9450
    return fin_resu, dresults
9451

    
9452

    
9453
class LURemoveExport(NoHooksLU):
9454
  """Remove exports related to the named instance.
9455

9456
  """
9457
  _OP_REQP = [("instance_name", _TNEString)]
9458
  REQ_BGL = False
9459

    
9460
  def ExpandNames(self):
9461
    self.needed_locks = {}
9462
    # We need all nodes to be locked in order for RemoveExport to work, but we
9463
    # don't need to lock the instance itself, as nothing will happen to it (and
9464
    # we can remove exports also for a removed instance)
9465
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9466

    
9467
  def Exec(self, feedback_fn):
9468
    """Remove any export.
9469

9470
    """
9471
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9472
    # If the instance was not found we'll try with the name that was passed in.
9473
    # This will only work if it was an FQDN, though.
9474
    fqdn_warn = False
9475
    if not instance_name:
9476
      fqdn_warn = True
9477
      instance_name = self.op.instance_name
9478

    
9479
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9480
    exportlist = self.rpc.call_export_list(locked_nodes)
9481
    found = False
9482
    for node in exportlist:
9483
      msg = exportlist[node].fail_msg
9484
      if msg:
9485
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9486
        continue
9487
      if instance_name in exportlist[node].payload:
9488
        found = True
9489
        result = self.rpc.call_export_remove(node, instance_name)
9490
        msg = result.fail_msg
9491
        if msg:
9492
          logging.error("Could not remove export for instance %s"
9493
                        " on node %s: %s", instance_name, node, msg)
9494

    
9495
    if fqdn_warn and not found:
9496
      feedback_fn("Export not found. If trying to remove an export belonging"
9497
                  " to a deleted instance please use its Fully Qualified"
9498
                  " Domain Name.")
9499

    
9500

    
9501
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9502
  """Generic tags LU.
9503

9504
  This is an abstract class which is the parent of all the other tags LUs.
9505

9506
  """
9507

    
9508
  def ExpandNames(self):
9509
    self.needed_locks = {}
9510
    if self.op.kind == constants.TAG_NODE:
9511
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9512
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9513
    elif self.op.kind == constants.TAG_INSTANCE:
9514
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9515
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9516

    
9517
  def CheckPrereq(self):
9518
    """Check prerequisites.
9519

9520
    """
9521
    if self.op.kind == constants.TAG_CLUSTER:
9522
      self.target = self.cfg.GetClusterInfo()
9523
    elif self.op.kind == constants.TAG_NODE:
9524
      self.target = self.cfg.GetNodeInfo(self.op.name)
9525
    elif self.op.kind == constants.TAG_INSTANCE:
9526
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9527
    else:
9528
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9529
                                 str(self.op.kind), errors.ECODE_INVAL)
9530

    
9531

    
9532
class LUGetTags(TagsLU):
9533
  """Returns the tags of a given object.
9534

9535
  """
9536
  _OP_REQP = [
9537
    ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9538
    ("name", _TNEString),
9539
    ]
9540
  REQ_BGL = False
9541

    
9542
  def Exec(self, feedback_fn):
9543
    """Returns the tag list.
9544

9545
    """
9546
    return list(self.target.GetTags())
9547

    
9548

    
9549
class LUSearchTags(NoHooksLU):
9550
  """Searches the tags for a given pattern.
9551

9552
  """
9553
  _OP_REQP = [("pattern", _TNEString)]
9554
  REQ_BGL = False
9555

    
9556
  def ExpandNames(self):
9557
    self.needed_locks = {}
9558

    
9559
  def CheckPrereq(self):
9560
    """Check prerequisites.
9561

9562
    This checks the pattern passed for validity by compiling it.
9563

9564
    """
9565
    try:
9566
      self.re = re.compile(self.op.pattern)
9567
    except re.error, err:
9568
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9569
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9570

    
9571
  def Exec(self, feedback_fn):
9572
    """Returns the tag list.
9573

9574
    """
9575
    cfg = self.cfg
9576
    tgts = [("/cluster", cfg.GetClusterInfo())]
9577
    ilist = cfg.GetAllInstancesInfo().values()
9578
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9579
    nlist = cfg.GetAllNodesInfo().values()
9580
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9581
    results = []
9582
    for path, target in tgts:
9583
      for tag in target.GetTags():
9584
        if self.re.search(tag):
9585
          results.append((path, tag))
9586
    return results
9587

    
9588

    
9589
class LUAddTags(TagsLU):
9590
  """Sets a tag on a given object.
9591

9592
  """
9593
  _OP_REQP = [
9594
    ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9595
    ("name", _TNEString),
9596
    ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9597
    ]
9598
  REQ_BGL = False
9599

    
9600
  def CheckPrereq(self):
9601
    """Check prerequisites.
9602

9603
    This checks the type and length of the tag name and value.
9604

9605
    """
9606
    TagsLU.CheckPrereq(self)
9607
    for tag in self.op.tags:
9608
      objects.TaggableObject.ValidateTag(tag)
9609

    
9610
  def Exec(self, feedback_fn):
9611
    """Sets the tag.
9612

9613
    """
9614
    try:
9615
      for tag in self.op.tags:
9616
        self.target.AddTag(tag)
9617
    except errors.TagError, err:
9618
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9619
    self.cfg.Update(self.target, feedback_fn)
9620

    
9621

    
9622
class LUDelTags(TagsLU):
9623
  """Delete a list of tags from a given object.
9624

9625
  """
9626
  _OP_REQP = [
9627
    ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9628
    ("name", _TNEString),
9629
    ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9630
    ]
9631
  REQ_BGL = False
9632

    
9633
  def CheckPrereq(self):
9634
    """Check prerequisites.
9635

9636
    This checks that we have the given tag.
9637

9638
    """
9639
    TagsLU.CheckPrereq(self)
9640
    for tag in self.op.tags:
9641
      objects.TaggableObject.ValidateTag(tag)
9642
    del_tags = frozenset(self.op.tags)
9643
    cur_tags = self.target.GetTags()
9644
    if not del_tags <= cur_tags:
9645
      diff_tags = del_tags - cur_tags
9646
      diff_names = ["'%s'" % tag for tag in diff_tags]
9647
      diff_names.sort()
9648
      raise errors.OpPrereqError("Tag(s) %s not found" %
9649
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9650

    
9651
  def Exec(self, feedback_fn):
9652
    """Remove the tag from the object.
9653

9654
    """
9655
    for tag in self.op.tags:
9656
      self.target.RemoveTag(tag)
9657
    self.cfg.Update(self.target, feedback_fn)
9658

    
9659

    
9660
class LUTestDelay(NoHooksLU):
9661
  """Sleep for a specified amount of time.
9662

9663
  This LU sleeps on the master and/or nodes for a specified amount of
9664
  time.
9665

9666
  """
9667
  _OP_REQP = [
9668
    ("duration", _TFloat),
9669
    ("on_master", _TBool),
9670
    ("on_nodes", _TListOf(_TNEString)),
9671
    ]
9672
  REQ_BGL = False
9673

    
9674
  def CheckArguments(self):
9675
    # TODO: convert to the type system
9676
    self.op.repeat = getattr(self.op, "repeat", 0)
9677
    if self.op.repeat < 0:
9678
      raise errors.OpPrereqError("Repetition count cannot be negative")
9679

    
9680
  def ExpandNames(self):
9681
    """Expand names and set required locks.
9682

9683
    This expands the node list, if any.
9684

9685
    """
9686
    self.needed_locks = {}
9687
    if self.op.on_nodes:
9688
      # _GetWantedNodes can be used here, but is not always appropriate to use
9689
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9690
      # more information.
9691
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9692
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9693

    
9694
  def _TestDelay(self):
9695
    """Do the actual sleep.
9696

9697
    """
9698
    if self.op.on_master:
9699
      if not utils.TestDelay(self.op.duration):
9700
        raise errors.OpExecError("Error during master delay test")
9701
    if self.op.on_nodes:
9702
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9703
      for node, node_result in result.items():
9704
        node_result.Raise("Failure during rpc call to node %s" % node)
9705

    
9706
  def Exec(self, feedback_fn):
9707
    """Execute the test delay opcode, with the wanted repetitions.
9708

9709
    """
9710
    if self.op.repeat == 0:
9711
      self._TestDelay()
9712
    else:
9713
      top_value = self.op.repeat - 1
9714
      for i in range(self.op.repeat):
9715
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9716
        self._TestDelay()
9717

    
9718

    
9719
class IAllocator(object):
9720
  """IAllocator framework.
9721

9722
  An IAllocator instance has three sets of attributes:
9723
    - cfg that is needed to query the cluster
9724
    - input data (all members of the _KEYS class attribute are required)
9725
    - four buffer attributes (in|out_data|text), that represent the
9726
      input (to the external script) in text and data structure format,
9727
      and the output from it, again in two formats
9728
    - the result variables from the script (success, info, nodes) for
9729
      easy usage
9730

9731
  """
9732
  # pylint: disable-msg=R0902
9733
  # lots of instance attributes
9734
  _ALLO_KEYS = [
9735
    "name", "mem_size", "disks", "disk_template",
9736
    "os", "tags", "nics", "vcpus", "hypervisor",
9737
    ]
9738
  _RELO_KEYS = [
9739
    "name", "relocate_from",
9740
    ]
9741
  _EVAC_KEYS = [
9742
    "evac_nodes",
9743
    ]
9744

    
9745
  def __init__(self, cfg, rpc, mode, **kwargs):
9746
    self.cfg = cfg
9747
    self.rpc = rpc
9748
    # init buffer variables
9749
    self.in_text = self.out_text = self.in_data = self.out_data = None
9750
    # init all input fields so that pylint is happy
9751
    self.mode = mode
9752
    self.mem_size = self.disks = self.disk_template = None
9753
    self.os = self.tags = self.nics = self.vcpus = None
9754
    self.hypervisor = None
9755
    self.relocate_from = None
9756
    self.name = None
9757
    self.evac_nodes = None
9758
    # computed fields
9759
    self.required_nodes = None
9760
    # init result fields
9761
    self.success = self.info = self.result = None
9762
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9763
      keyset = self._ALLO_KEYS
9764
      fn = self._AddNewInstance
9765
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9766
      keyset = self._RELO_KEYS
9767
      fn = self._AddRelocateInstance
9768
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9769
      keyset = self._EVAC_KEYS
9770
      fn = self._AddEvacuateNodes
9771
    else:
9772
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9773
                                   " IAllocator" % self.mode)
9774
    for key in kwargs:
9775
      if key not in keyset:
9776
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9777
                                     " IAllocator" % key)
9778
      setattr(self, key, kwargs[key])
9779

    
9780
    for key in keyset:
9781
      if key not in kwargs:
9782
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9783
                                     " IAllocator" % key)
9784
    self._BuildInputData(fn)
9785

    
9786
  def _ComputeClusterData(self):
9787
    """Compute the generic allocator input data.
9788

9789
    This is the data that is independent of the actual operation.
9790

9791
    """
9792
    cfg = self.cfg
9793
    cluster_info = cfg.GetClusterInfo()
9794
    # cluster data
9795
    data = {
9796
      "version": constants.IALLOCATOR_VERSION,
9797
      "cluster_name": cfg.GetClusterName(),
9798
      "cluster_tags": list(cluster_info.GetTags()),
9799
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9800
      # we don't have job IDs
9801
      }
9802
    iinfo = cfg.GetAllInstancesInfo().values()
9803
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9804

    
9805
    # node data
9806
    node_results = {}
9807
    node_list = cfg.GetNodeList()
9808

    
9809
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9810
      hypervisor_name = self.hypervisor
9811
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9812
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9813
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9814
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9815

    
9816
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9817
                                        hypervisor_name)
9818
    node_iinfo = \
9819
      self.rpc.call_all_instances_info(node_list,
9820
                                       cluster_info.enabled_hypervisors)
9821
    for nname, nresult in node_data.items():
9822
      # first fill in static (config-based) values
9823
      ninfo = cfg.GetNodeInfo(nname)
9824
      pnr = {
9825
        "tags": list(ninfo.GetTags()),
9826
        "primary_ip": ninfo.primary_ip,
9827
        "secondary_ip": ninfo.secondary_ip,
9828
        "offline": ninfo.offline,
9829
        "drained": ninfo.drained,
9830
        "master_candidate": ninfo.master_candidate,
9831
        }
9832

    
9833
      if not (ninfo.offline or ninfo.drained):
9834
        nresult.Raise("Can't get data for node %s" % nname)
9835
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9836
                                nname)
9837
        remote_info = nresult.payload
9838

    
9839
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9840
                     'vg_size', 'vg_free', 'cpu_total']:
9841
          if attr not in remote_info:
9842
            raise errors.OpExecError("Node '%s' didn't return attribute"
9843
                                     " '%s'" % (nname, attr))
9844
          if not isinstance(remote_info[attr], int):
9845
            raise errors.OpExecError("Node '%s' returned invalid value"
9846
                                     " for '%s': %s" %
9847
                                     (nname, attr, remote_info[attr]))
9848
        # compute memory used by primary instances
9849
        i_p_mem = i_p_up_mem = 0
9850
        for iinfo, beinfo in i_list:
9851
          if iinfo.primary_node == nname:
9852
            i_p_mem += beinfo[constants.BE_MEMORY]
9853
            if iinfo.name not in node_iinfo[nname].payload:
9854
              i_used_mem = 0
9855
            else:
9856
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9857
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9858
            remote_info['memory_free'] -= max(0, i_mem_diff)
9859

    
9860
            if iinfo.admin_up:
9861
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9862

    
9863
        # compute memory used by instances
9864
        pnr_dyn = {
9865
          "total_memory": remote_info['memory_total'],
9866
          "reserved_memory": remote_info['memory_dom0'],
9867
          "free_memory": remote_info['memory_free'],
9868
          "total_disk": remote_info['vg_size'],
9869
          "free_disk": remote_info['vg_free'],
9870
          "total_cpus": remote_info['cpu_total'],
9871
          "i_pri_memory": i_p_mem,
9872
          "i_pri_up_memory": i_p_up_mem,
9873
          }
9874
        pnr.update(pnr_dyn)
9875

    
9876
      node_results[nname] = pnr
9877
    data["nodes"] = node_results
9878

    
9879
    # instance data
9880
    instance_data = {}
9881
    for iinfo, beinfo in i_list:
9882
      nic_data = []
9883
      for nic in iinfo.nics:
9884
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9885
        nic_dict = {"mac": nic.mac,
9886
                    "ip": nic.ip,
9887
                    "mode": filled_params[constants.NIC_MODE],
9888
                    "link": filled_params[constants.NIC_LINK],
9889
                   }
9890
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9891
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9892
        nic_data.append(nic_dict)
9893
      pir = {
9894
        "tags": list(iinfo.GetTags()),
9895
        "admin_up": iinfo.admin_up,
9896
        "vcpus": beinfo[constants.BE_VCPUS],
9897
        "memory": beinfo[constants.BE_MEMORY],
9898
        "os": iinfo.os,
9899
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9900
        "nics": nic_data,
9901
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9902
        "disk_template": iinfo.disk_template,
9903
        "hypervisor": iinfo.hypervisor,
9904
        }
9905
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9906
                                                 pir["disks"])
9907
      instance_data[iinfo.name] = pir
9908

    
9909
    data["instances"] = instance_data
9910

    
9911
    self.in_data = data
9912

    
9913
  def _AddNewInstance(self):
9914
    """Add new instance data to allocator structure.
9915

9916
    This in combination with _AllocatorGetClusterData will create the
9917
    correct structure needed as input for the allocator.
9918

9919
    The checks for the completeness of the opcode must have already been
9920
    done.
9921

9922
    """
9923
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9924

    
9925
    if self.disk_template in constants.DTS_NET_MIRROR:
9926
      self.required_nodes = 2
9927
    else:
9928
      self.required_nodes = 1
9929
    request = {
9930
      "name": self.name,
9931
      "disk_template": self.disk_template,
9932
      "tags": self.tags,
9933
      "os": self.os,
9934
      "vcpus": self.vcpus,
9935
      "memory": self.mem_size,
9936
      "disks": self.disks,
9937
      "disk_space_total": disk_space,
9938
      "nics": self.nics,
9939
      "required_nodes": self.required_nodes,
9940
      }
9941
    return request
9942

    
9943
  def _AddRelocateInstance(self):
9944
    """Add relocate instance data to allocator structure.
9945

9946
    This in combination with _IAllocatorGetClusterData will create the
9947
    correct structure needed as input for the allocator.
9948

9949
    The checks for the completeness of the opcode must have already been
9950
    done.
9951

9952
    """
9953
    instance = self.cfg.GetInstanceInfo(self.name)
9954
    if instance is None:
9955
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
9956
                                   " IAllocator" % self.name)
9957

    
9958
    if instance.disk_template not in constants.DTS_NET_MIRROR:
9959
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9960
                                 errors.ECODE_INVAL)
9961

    
9962
    if len(instance.secondary_nodes) != 1:
9963
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
9964
                                 errors.ECODE_STATE)
9965

    
9966
    self.required_nodes = 1
9967
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
9968
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9969

    
9970
    request = {
9971
      "name": self.name,
9972
      "disk_space_total": disk_space,
9973
      "required_nodes": self.required_nodes,
9974
      "relocate_from": self.relocate_from,
9975
      }
9976
    return request
9977

    
9978
  def _AddEvacuateNodes(self):
9979
    """Add evacuate nodes data to allocator structure.
9980

9981
    """
9982
    request = {
9983
      "evac_nodes": self.evac_nodes
9984
      }
9985
    return request
9986

    
9987
  def _BuildInputData(self, fn):
9988
    """Build input data structures.
9989

9990
    """
9991
    self._ComputeClusterData()
9992

    
9993
    request = fn()
9994
    request["type"] = self.mode
9995
    self.in_data["request"] = request
9996

    
9997
    self.in_text = serializer.Dump(self.in_data)
9998

    
9999
  def Run(self, name, validate=True, call_fn=None):
10000
    """Run an instance allocator and return the results.
10001

10002
    """
10003
    if call_fn is None:
10004
      call_fn = self.rpc.call_iallocator_runner
10005

    
10006
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10007
    result.Raise("Failure while running the iallocator script")
10008

    
10009
    self.out_text = result.payload
10010
    if validate:
10011
      self._ValidateResult()
10012

    
10013
  def _ValidateResult(self):
10014
    """Process the allocator results.
10015

10016
    This will process and if successful save the result in
10017
    self.out_data and the other parameters.
10018

10019
    """
10020
    try:
10021
      rdict = serializer.Load(self.out_text)
10022
    except Exception, err:
10023
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10024

    
10025
    if not isinstance(rdict, dict):
10026
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10027

    
10028
    # TODO: remove backwards compatiblity in later versions
10029
    if "nodes" in rdict and "result" not in rdict:
10030
      rdict["result"] = rdict["nodes"]
10031
      del rdict["nodes"]
10032

    
10033
    for key in "success", "info", "result":
10034
      if key not in rdict:
10035
        raise errors.OpExecError("Can't parse iallocator results:"
10036
                                 " missing key '%s'" % key)
10037
      setattr(self, key, rdict[key])
10038

    
10039
    if not isinstance(rdict["result"], list):
10040
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10041
                               " is not a list")
10042
    self.out_data = rdict
10043

    
10044

    
10045
class LUTestAllocator(NoHooksLU):
10046
  """Run allocator tests.
10047

10048
  This LU runs the allocator tests
10049

10050
  """
10051
  _OP_REQP = [
10052
    ("direction", _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10053
    ("mode", _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10054
    ("name", _TNEString),
10055
    ("nics", _TOr(_TNone, _TListOf(
10056
      _TDictOf(_TElemOf(["mac", "ip", "bridge"]), _TNEString)))),
10057
    ("disks", _TOr(_TNone, _TList)),
10058
    ]
10059
  _OP_DEFS = [
10060
    ("hypervisor", None),
10061
    ("allocator", None),
10062
    ("nics", None),
10063
    ("disks", None),
10064
    ]
10065

    
10066
  def CheckPrereq(self):
10067
    """Check prerequisites.
10068

10069
    This checks the opcode parameters depending on the director and mode test.
10070

10071
    """
10072
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10073
      for attr in ["mem_size", "disks", "disk_template",
10074
                   "os", "tags", "nics", "vcpus"]:
10075
        if not hasattr(self.op, attr):
10076
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10077
                                     attr, errors.ECODE_INVAL)
10078
      iname = self.cfg.ExpandInstanceName(self.op.name)
10079
      if iname is not None:
10080
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10081
                                   iname, errors.ECODE_EXISTS)
10082
      if not isinstance(self.op.nics, list):
10083
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10084
                                   errors.ECODE_INVAL)
10085
      if not isinstance(self.op.disks, list):
10086
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10087
                                   errors.ECODE_INVAL)
10088
      for row in self.op.disks:
10089
        if (not isinstance(row, dict) or
10090
            "size" not in row or
10091
            not isinstance(row["size"], int) or
10092
            "mode" not in row or
10093
            row["mode"] not in ['r', 'w']):
10094
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10095
                                     " parameter", errors.ECODE_INVAL)
10096
      if self.op.hypervisor is None:
10097
        self.op.hypervisor = self.cfg.GetHypervisorType()
10098
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10099
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10100
      self.op.name = fname
10101
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10102
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10103
      if not hasattr(self.op, "evac_nodes"):
10104
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10105
                                   " opcode input", errors.ECODE_INVAL)
10106
    else:
10107
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10108
                                 self.op.mode, errors.ECODE_INVAL)
10109

    
10110
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10111
      if self.op.allocator is None:
10112
        raise errors.OpPrereqError("Missing allocator name",
10113
                                   errors.ECODE_INVAL)
10114
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10115
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10116
                                 self.op.direction, errors.ECODE_INVAL)
10117

    
10118
  def Exec(self, feedback_fn):
10119
    """Run the allocator test.
10120

10121
    """
10122
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10123
      ial = IAllocator(self.cfg, self.rpc,
10124
                       mode=self.op.mode,
10125
                       name=self.op.name,
10126
                       mem_size=self.op.mem_size,
10127
                       disks=self.op.disks,
10128
                       disk_template=self.op.disk_template,
10129
                       os=self.op.os,
10130
                       tags=self.op.tags,
10131
                       nics=self.op.nics,
10132
                       vcpus=self.op.vcpus,
10133
                       hypervisor=self.op.hypervisor,
10134
                       )
10135
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10136
      ial = IAllocator(self.cfg, self.rpc,
10137
                       mode=self.op.mode,
10138
                       name=self.op.name,
10139
                       relocate_from=list(self.relocate_from),
10140
                       )
10141
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10142
      ial = IAllocator(self.cfg, self.rpc,
10143
                       mode=self.op.mode,
10144
                       evac_nodes=self.op.evac_nodes)
10145
    else:
10146
      raise errors.ProgrammerError("Uncatched mode %s in"
10147
                                   " LUTestAllocator.Exec", self.op.mode)
10148

    
10149
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10150
      result = ial.in_text
10151
    else:
10152
      ial.Run(self.op.allocator, validate=False)
10153
      result = ial.out_text
10154
    return result