Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ fc31fadd

History | View | Annotate | Download (366.1 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42

    
43
from ganeti import ssh
44
from ganeti import utils
45
from ganeti import errors
46
from ganeti import hypervisor
47
from ganeti import locking
48
from ganeti import constants
49
from ganeti import objects
50
from ganeti import serializer
51
from ganeti import ssconf
52
from ganeti import uidpool
53
from ganeti import compat
54
from ganeti import masterd
55
from ganeti import netutils
56

    
57
import ganeti.masterd.instance # pylint: disable-msg=W0611
58

    
59

    
60
# Modifiable default values; need to define these here before the
61
# actual LUs
62

    
63
def _EmptyList():
64
  """Returns an empty list.
65

66
  """
67
  return []
68

    
69

    
70
def _EmptyDict():
71
  """Returns an empty dict.
72

73
  """
74
  return {}
75

    
76

    
77
#: The without-default default value
78
_NoDefault = object()
79

    
80

    
81
#: The no-type (value to complex to check it in the type system)
82
_NoType = object()
83

    
84

    
85
# Some basic types
86
def _TNotNone(val):
87
  """Checks if the given value is not None.
88

89
  """
90
  return val is not None
91

    
92

    
93
def _TNone(val):
94
  """Checks if the given value is None.
95

96
  """
97
  return val is None
98

    
99

    
100
def _TBool(val):
101
  """Checks if the given value is a boolean.
102

103
  """
104
  return isinstance(val, bool)
105

    
106

    
107
def _TInt(val):
108
  """Checks if the given value is an integer.
109

110
  """
111
  return isinstance(val, int)
112

    
113

    
114
def _TFloat(val):
115
  """Checks if the given value is a float.
116

117
  """
118
  return isinstance(val, float)
119

    
120

    
121
def _TString(val):
122
  """Checks if the given value is a string.
123

124
  """
125
  return isinstance(val, basestring)
126

    
127

    
128
def _TTrue(val):
129
  """Checks if a given value evaluates to a boolean True value.
130

131
  """
132
  return bool(val)
133

    
134

    
135
def _TElemOf(target_list):
136
  """Builds a function that checks if a given value is a member of a list.
137

138
  """
139
  return lambda val: val in target_list
140

    
141

    
142
# Container types
143
def _TList(val):
144
  """Checks if the given value is a list.
145

146
  """
147
  return isinstance(val, list)
148

    
149

    
150
def _TDict(val):
151
  """Checks if the given value is a dictionary.
152

153
  """
154
  return isinstance(val, dict)
155

    
156

    
157
# Combinator types
158
def _TAnd(*args):
159
  """Combine multiple functions using an AND operation.
160

161
  """
162
  def fn(val):
163
    return compat.all(t(val) for t in args)
164
  return fn
165

    
166

    
167
def _TOr(*args):
168
  """Combine multiple functions using an AND operation.
169

170
  """
171
  def fn(val):
172
    return compat.any(t(val) for t in args)
173
  return fn
174

    
175

    
176
# Type aliases
177

    
178
#: a non-empty string
179
_TNonEmptyString = _TAnd(_TString, _TTrue)
180

    
181

    
182
#: a maybe non-empty string
183
_TMaybeString = _TOr(_TNonEmptyString, _TNone)
184

    
185

    
186
#: a maybe boolean (bool or none)
187
_TMaybeBool = _TOr(_TBool, _TNone)
188

    
189

    
190
#: a positive integer
191
_TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
192

    
193
#: a strictly positive integer
194
_TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
195

    
196

    
197
def _TListOf(my_type):
198
  """Checks if a given value is a list with all elements of the same type.
199

200
  """
201
  return _TAnd(_TList,
202
               lambda lst: compat.all(my_type(v) for v in lst))
203

    
204

    
205
def _TDictOf(key_type, val_type):
206
  """Checks a dict type for the type of its key/values.
207

208
  """
209
  return _TAnd(_TDict,
210
               lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
211
                                and compat.all(val_type(v)
212
                                               for v in my_dict.values())))
213

    
214

    
215
# Common opcode attributes
216

    
217
#: output fields for a query operation
218
_POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
219

    
220

    
221
#: the shutdown timeout
222
_PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
223
                     _TPositiveInt)
224

    
225
#: the force parameter
226
_PForce = ("force", False, _TBool)
227

    
228
#: a required instance name (for single-instance LUs)
229
_PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
230

    
231

    
232
#: a required node name (for single-node LUs)
233
_PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
234

    
235
#: the migration type (live/non-live)
236
_PMigrationMode = ("mode", None, _TOr(_TNone,
237
                                      _TElemOf(constants.HT_MIGRATION_MODES)))
238

    
239
#: the obsolete 'live' mode (boolean)
240
_PMigrationLive = ("live", None, _TMaybeBool)
241

    
242

    
243
# End types
244
class LogicalUnit(object):
245
  """Logical Unit base class.
246

247
  Subclasses must follow these rules:
248
    - implement ExpandNames
249
    - implement CheckPrereq (except when tasklets are used)
250
    - implement Exec (except when tasklets are used)
251
    - implement BuildHooksEnv
252
    - redefine HPATH and HTYPE
253
    - optionally redefine their run requirements:
254
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
255

256
  Note that all commands require root permissions.
257

258
  @ivar dry_run_result: the value (if any) that will be returned to the caller
259
      in dry-run mode (signalled by opcode dry_run parameter)
260
  @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
261
      they should get if not already defined, and types they must match
262

263
  """
264
  HPATH = None
265
  HTYPE = None
266
  _OP_PARAMS = []
267
  REQ_BGL = True
268

    
269
  def __init__(self, processor, op, context, rpc):
270
    """Constructor for LogicalUnit.
271

272
    This needs to be overridden in derived classes in order to check op
273
    validity.
274

275
    """
276
    self.proc = processor
277
    self.op = op
278
    self.cfg = context.cfg
279
    self.context = context
280
    self.rpc = rpc
281
    # Dicts used to declare locking needs to mcpu
282
    self.needed_locks = None
283
    self.acquired_locks = {}
284
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
285
    self.add_locks = {}
286
    self.remove_locks = {}
287
    # Used to force good behavior when calling helper functions
288
    self.recalculate_locks = {}
289
    self.__ssh = None
290
    # logging
291
    self.Log = processor.Log # pylint: disable-msg=C0103
292
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
293
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
294
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
295
    # support for dry-run
296
    self.dry_run_result = None
297
    # support for generic debug attribute
298
    if (not hasattr(self.op, "debug_level") or
299
        not isinstance(self.op.debug_level, int)):
300
      self.op.debug_level = 0
301

    
302
    # Tasklets
303
    self.tasklets = None
304

    
305
    # The new kind-of-type-system
306
    op_id = self.op.OP_ID
307
    for attr_name, aval, test in self._OP_PARAMS:
308
      if not hasattr(op, attr_name):
309
        if aval == _NoDefault:
310
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
311
                                     (op_id, attr_name), errors.ECODE_INVAL)
312
        else:
313
          if callable(aval):
314
            dval = aval()
315
          else:
316
            dval = aval
317
          setattr(self.op, attr_name, dval)
318
      attr_val = getattr(op, attr_name)
319
      if test == _NoType:
320
        # no tests here
321
        continue
322
      if not callable(test):
323
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
324
                                     " given type is not a proper type (%s)" %
325
                                     (op_id, attr_name, test))
326
      if not test(attr_val):
327
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
328
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
329
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
330
                                   (op_id, attr_name), errors.ECODE_INVAL)
331

    
332
    self.CheckArguments()
333

    
334
  def __GetSSH(self):
335
    """Returns the SshRunner object
336

337
    """
338
    if not self.__ssh:
339
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
340
    return self.__ssh
341

    
342
  ssh = property(fget=__GetSSH)
343

    
344
  def CheckArguments(self):
345
    """Check syntactic validity for the opcode arguments.
346

347
    This method is for doing a simple syntactic check and ensure
348
    validity of opcode parameters, without any cluster-related
349
    checks. While the same can be accomplished in ExpandNames and/or
350
    CheckPrereq, doing these separate is better because:
351

352
      - ExpandNames is left as as purely a lock-related function
353
      - CheckPrereq is run after we have acquired locks (and possible
354
        waited for them)
355

356
    The function is allowed to change the self.op attribute so that
357
    later methods can no longer worry about missing parameters.
358

359
    """
360
    pass
361

    
362
  def ExpandNames(self):
363
    """Expand names for this LU.
364

365
    This method is called before starting to execute the opcode, and it should
366
    update all the parameters of the opcode to their canonical form (e.g. a
367
    short node name must be fully expanded after this method has successfully
368
    completed). This way locking, hooks, logging, ecc. can work correctly.
369

370
    LUs which implement this method must also populate the self.needed_locks
371
    member, as a dict with lock levels as keys, and a list of needed lock names
372
    as values. Rules:
373

374
      - use an empty dict if you don't need any lock
375
      - if you don't need any lock at a particular level omit that level
376
      - don't put anything for the BGL level
377
      - if you want all locks at a level use locking.ALL_SET as a value
378

379
    If you need to share locks (rather than acquire them exclusively) at one
380
    level you can modify self.share_locks, setting a true value (usually 1) for
381
    that level. By default locks are not shared.
382

383
    This function can also define a list of tasklets, which then will be
384
    executed in order instead of the usual LU-level CheckPrereq and Exec
385
    functions, if those are not defined by the LU.
386

387
    Examples::
388

389
      # Acquire all nodes and one instance
390
      self.needed_locks = {
391
        locking.LEVEL_NODE: locking.ALL_SET,
392
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
393
      }
394
      # Acquire just two nodes
395
      self.needed_locks = {
396
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
397
      }
398
      # Acquire no locks
399
      self.needed_locks = {} # No, you can't leave it to the default value None
400

401
    """
402
    # The implementation of this method is mandatory only if the new LU is
403
    # concurrent, so that old LUs don't need to be changed all at the same
404
    # time.
405
    if self.REQ_BGL:
406
      self.needed_locks = {} # Exclusive LUs don't need locks.
407
    else:
408
      raise NotImplementedError
409

    
410
  def DeclareLocks(self, level):
411
    """Declare LU locking needs for a level
412

413
    While most LUs can just declare their locking needs at ExpandNames time,
414
    sometimes there's the need to calculate some locks after having acquired
415
    the ones before. This function is called just before acquiring locks at a
416
    particular level, but after acquiring the ones at lower levels, and permits
417
    such calculations. It can be used to modify self.needed_locks, and by
418
    default it does nothing.
419

420
    This function is only called if you have something already set in
421
    self.needed_locks for the level.
422

423
    @param level: Locking level which is going to be locked
424
    @type level: member of ganeti.locking.LEVELS
425

426
    """
427

    
428
  def CheckPrereq(self):
429
    """Check prerequisites for this LU.
430

431
    This method should check that the prerequisites for the execution
432
    of this LU are fulfilled. It can do internode communication, but
433
    it should be idempotent - no cluster or system changes are
434
    allowed.
435

436
    The method should raise errors.OpPrereqError in case something is
437
    not fulfilled. Its return value is ignored.
438

439
    This method should also update all the parameters of the opcode to
440
    their canonical form if it hasn't been done by ExpandNames before.
441

442
    """
443
    if self.tasklets is not None:
444
      for (idx, tl) in enumerate(self.tasklets):
445
        logging.debug("Checking prerequisites for tasklet %s/%s",
446
                      idx + 1, len(self.tasklets))
447
        tl.CheckPrereq()
448
    else:
449
      pass
450

    
451
  def Exec(self, feedback_fn):
452
    """Execute the LU.
453

454
    This method should implement the actual work. It should raise
455
    errors.OpExecError for failures that are somewhat dealt with in
456
    code, or expected.
457

458
    """
459
    if self.tasklets is not None:
460
      for (idx, tl) in enumerate(self.tasklets):
461
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
462
        tl.Exec(feedback_fn)
463
    else:
464
      raise NotImplementedError
465

    
466
  def BuildHooksEnv(self):
467
    """Build hooks environment for this LU.
468

469
    This method should return a three-node tuple consisting of: a dict
470
    containing the environment that will be used for running the
471
    specific hook for this LU, a list of node names on which the hook
472
    should run before the execution, and a list of node names on which
473
    the hook should run after the execution.
474

475
    The keys of the dict must not have 'GANETI_' prefixed as this will
476
    be handled in the hooks runner. Also note additional keys will be
477
    added by the hooks runner. If the LU doesn't define any
478
    environment, an empty dict (and not None) should be returned.
479

480
    No nodes should be returned as an empty list (and not None).
481

482
    Note that if the HPATH for a LU class is None, this function will
483
    not be called.
484

485
    """
486
    raise NotImplementedError
487

    
488
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
489
    """Notify the LU about the results of its hooks.
490

491
    This method is called every time a hooks phase is executed, and notifies
492
    the Logical Unit about the hooks' result. The LU can then use it to alter
493
    its result based on the hooks.  By default the method does nothing and the
494
    previous result is passed back unchanged but any LU can define it if it
495
    wants to use the local cluster hook-scripts somehow.
496

497
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
498
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
499
    @param hook_results: the results of the multi-node hooks rpc call
500
    @param feedback_fn: function used send feedback back to the caller
501
    @param lu_result: the previous Exec result this LU had, or None
502
        in the PRE phase
503
    @return: the new Exec result, based on the previous result
504
        and hook results
505

506
    """
507
    # API must be kept, thus we ignore the unused argument and could
508
    # be a function warnings
509
    # pylint: disable-msg=W0613,R0201
510
    return lu_result
511

    
512
  def _ExpandAndLockInstance(self):
513
    """Helper function to expand and lock an instance.
514

515
    Many LUs that work on an instance take its name in self.op.instance_name
516
    and need to expand it and then declare the expanded name for locking. This
517
    function does it, and then updates self.op.instance_name to the expanded
518
    name. It also initializes needed_locks as a dict, if this hasn't been done
519
    before.
520

521
    """
522
    if self.needed_locks is None:
523
      self.needed_locks = {}
524
    else:
525
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
526
        "_ExpandAndLockInstance called with instance-level locks set"
527
    self.op.instance_name = _ExpandInstanceName(self.cfg,
528
                                                self.op.instance_name)
529
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
530

    
531
  def _LockInstancesNodes(self, primary_only=False):
532
    """Helper function to declare instances' nodes for locking.
533

534
    This function should be called after locking one or more instances to lock
535
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
536
    with all primary or secondary nodes for instances already locked and
537
    present in self.needed_locks[locking.LEVEL_INSTANCE].
538

539
    It should be called from DeclareLocks, and for safety only works if
540
    self.recalculate_locks[locking.LEVEL_NODE] is set.
541

542
    In the future it may grow parameters to just lock some instance's nodes, or
543
    to just lock primaries or secondary nodes, if needed.
544

545
    If should be called in DeclareLocks in a way similar to::
546

547
      if level == locking.LEVEL_NODE:
548
        self._LockInstancesNodes()
549

550
    @type primary_only: boolean
551
    @param primary_only: only lock primary nodes of locked instances
552

553
    """
554
    assert locking.LEVEL_NODE in self.recalculate_locks, \
555
      "_LockInstancesNodes helper function called with no nodes to recalculate"
556

    
557
    # TODO: check if we're really been called with the instance locks held
558

    
559
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
560
    # future we might want to have different behaviors depending on the value
561
    # of self.recalculate_locks[locking.LEVEL_NODE]
562
    wanted_nodes = []
563
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
564
      instance = self.context.cfg.GetInstanceInfo(instance_name)
565
      wanted_nodes.append(instance.primary_node)
566
      if not primary_only:
567
        wanted_nodes.extend(instance.secondary_nodes)
568

    
569
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
570
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
571
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
572
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
573

    
574
    del self.recalculate_locks[locking.LEVEL_NODE]
575

    
576

    
577
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
578
  """Simple LU which runs no hooks.
579

580
  This LU is intended as a parent for other LogicalUnits which will
581
  run no hooks, in order to reduce duplicate code.
582

583
  """
584
  HPATH = None
585
  HTYPE = None
586

    
587
  def BuildHooksEnv(self):
588
    """Empty BuildHooksEnv for NoHooksLu.
589

590
    This just raises an error.
591

592
    """
593
    assert False, "BuildHooksEnv called for NoHooksLUs"
594

    
595

    
596
class Tasklet:
597
  """Tasklet base class.
598

599
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
600
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
601
  tasklets know nothing about locks.
602

603
  Subclasses must follow these rules:
604
    - Implement CheckPrereq
605
    - Implement Exec
606

607
  """
608
  def __init__(self, lu):
609
    self.lu = lu
610

    
611
    # Shortcuts
612
    self.cfg = lu.cfg
613
    self.rpc = lu.rpc
614

    
615
  def CheckPrereq(self):
616
    """Check prerequisites for this tasklets.
617

618
    This method should check whether the prerequisites for the execution of
619
    this tasklet are fulfilled. It can do internode communication, but it
620
    should be idempotent - no cluster or system changes are allowed.
621

622
    The method should raise errors.OpPrereqError in case something is not
623
    fulfilled. Its return value is ignored.
624

625
    This method should also update all parameters to their canonical form if it
626
    hasn't been done before.
627

628
    """
629
    pass
630

    
631
  def Exec(self, feedback_fn):
632
    """Execute the tasklet.
633

634
    This method should implement the actual work. It should raise
635
    errors.OpExecError for failures that are somewhat dealt with in code, or
636
    expected.
637

638
    """
639
    raise NotImplementedError
640

    
641

    
642
def _GetWantedNodes(lu, nodes):
643
  """Returns list of checked and expanded node names.
644

645
  @type lu: L{LogicalUnit}
646
  @param lu: the logical unit on whose behalf we execute
647
  @type nodes: list
648
  @param nodes: list of node names or None for all nodes
649
  @rtype: list
650
  @return: the list of nodes, sorted
651
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
652

653
  """
654
  if not nodes:
655
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
656
      " non-empty list of nodes whose name is to be expanded.")
657

    
658
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
659
  return utils.NiceSort(wanted)
660

    
661

    
662
def _GetWantedInstances(lu, instances):
663
  """Returns list of checked and expanded instance names.
664

665
  @type lu: L{LogicalUnit}
666
  @param lu: the logical unit on whose behalf we execute
667
  @type instances: list
668
  @param instances: list of instance names or None for all instances
669
  @rtype: list
670
  @return: the list of instances, sorted
671
  @raise errors.OpPrereqError: if the instances parameter is wrong type
672
  @raise errors.OpPrereqError: if any of the passed instances is not found
673

674
  """
675
  if instances:
676
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
677
  else:
678
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
679
  return wanted
680

    
681

    
682
def _GetUpdatedParams(old_params, update_dict,
683
                      use_default=True, use_none=False):
684
  """Return the new version of a parameter dictionary.
685

686
  @type old_params: dict
687
  @param old_params: old parameters
688
  @type update_dict: dict
689
  @param update_dict: dict containing new parameter values, or
690
      constants.VALUE_DEFAULT to reset the parameter to its default
691
      value
692
  @param use_default: boolean
693
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
694
      values as 'to be deleted' values
695
  @param use_none: boolean
696
  @type use_none: whether to recognise C{None} values as 'to be
697
      deleted' values
698
  @rtype: dict
699
  @return: the new parameter dictionary
700

701
  """
702
  params_copy = copy.deepcopy(old_params)
703
  for key, val in update_dict.iteritems():
704
    if ((use_default and val == constants.VALUE_DEFAULT) or
705
        (use_none and val is None)):
706
      try:
707
        del params_copy[key]
708
      except KeyError:
709
        pass
710
    else:
711
      params_copy[key] = val
712
  return params_copy
713

    
714

    
715
def _CheckOutputFields(static, dynamic, selected):
716
  """Checks whether all selected fields are valid.
717

718
  @type static: L{utils.FieldSet}
719
  @param static: static fields set
720
  @type dynamic: L{utils.FieldSet}
721
  @param dynamic: dynamic fields set
722

723
  """
724
  f = utils.FieldSet()
725
  f.Extend(static)
726
  f.Extend(dynamic)
727

    
728
  delta = f.NonMatching(selected)
729
  if delta:
730
    raise errors.OpPrereqError("Unknown output fields selected: %s"
731
                               % ",".join(delta), errors.ECODE_INVAL)
732

    
733

    
734
def _CheckGlobalHvParams(params):
735
  """Validates that given hypervisor params are not global ones.
736

737
  This will ensure that instances don't get customised versions of
738
  global params.
739

740
  """
741
  used_globals = constants.HVC_GLOBALS.intersection(params)
742
  if used_globals:
743
    msg = ("The following hypervisor parameters are global and cannot"
744
           " be customized at instance level, please modify them at"
745
           " cluster level: %s" % utils.CommaJoin(used_globals))
746
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
747

    
748

    
749
def _CheckNodeOnline(lu, node):
750
  """Ensure that a given node is online.
751

752
  @param lu: the LU on behalf of which we make the check
753
  @param node: the node to check
754
  @raise errors.OpPrereqError: if the node is offline
755

756
  """
757
  if lu.cfg.GetNodeInfo(node).offline:
758
    raise errors.OpPrereqError("Can't use offline node %s" % node,
759
                               errors.ECODE_INVAL)
760

    
761

    
762
def _CheckNodeNotDrained(lu, node):
763
  """Ensure that a given node is not drained.
764

765
  @param lu: the LU on behalf of which we make the check
766
  @param node: the node to check
767
  @raise errors.OpPrereqError: if the node is drained
768

769
  """
770
  if lu.cfg.GetNodeInfo(node).drained:
771
    raise errors.OpPrereqError("Can't use drained node %s" % node,
772
                               errors.ECODE_INVAL)
773

    
774

    
775
def _CheckNodeHasOS(lu, node, os_name, force_variant):
776
  """Ensure that a node supports a given OS.
777

778
  @param lu: the LU on behalf of which we make the check
779
  @param node: the node to check
780
  @param os_name: the OS to query about
781
  @param force_variant: whether to ignore variant errors
782
  @raise errors.OpPrereqError: if the node is not supporting the OS
783

784
  """
785
  result = lu.rpc.call_os_get(node, os_name)
786
  result.Raise("OS '%s' not in supported OS list for node %s" %
787
               (os_name, node),
788
               prereq=True, ecode=errors.ECODE_INVAL)
789
  if not force_variant:
790
    _CheckOSVariant(result.payload, os_name)
791

    
792

    
793
def _RequireFileStorage():
794
  """Checks that file storage is enabled.
795

796
  @raise errors.OpPrereqError: when file storage is disabled
797

798
  """
799
  if not constants.ENABLE_FILE_STORAGE:
800
    raise errors.OpPrereqError("File storage disabled at configure time",
801
                               errors.ECODE_INVAL)
802

    
803

    
804
def _CheckDiskTemplate(template):
805
  """Ensure a given disk template is valid.
806

807
  """
808
  if template not in constants.DISK_TEMPLATES:
809
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
810
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
811
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
812
  if template == constants.DT_FILE:
813
    _RequireFileStorage()
814
  return True
815

    
816

    
817
def _CheckStorageType(storage_type):
818
  """Ensure a given storage type is valid.
819

820
  """
821
  if storage_type not in constants.VALID_STORAGE_TYPES:
822
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
823
                               errors.ECODE_INVAL)
824
  if storage_type == constants.ST_FILE:
825
    _RequireFileStorage()
826
  return True
827

    
828

    
829
def _GetClusterDomainSecret():
830
  """Reads the cluster domain secret.
831

832
  """
833
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
834
                               strict=True)
835

    
836

    
837
def _CheckInstanceDown(lu, instance, reason):
838
  """Ensure that an instance is not running."""
839
  if instance.admin_up:
840
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
841
                               (instance.name, reason), errors.ECODE_STATE)
842

    
843
  pnode = instance.primary_node
844
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
845
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
846
              prereq=True, ecode=errors.ECODE_ENVIRON)
847

    
848
  if instance.name in ins_l.payload:
849
    raise errors.OpPrereqError("Instance %s is running, %s" %
850
                               (instance.name, reason), errors.ECODE_STATE)
851

    
852

    
853
def _ExpandItemName(fn, name, kind):
854
  """Expand an item name.
855

856
  @param fn: the function to use for expansion
857
  @param name: requested item name
858
  @param kind: text description ('Node' or 'Instance')
859
  @return: the resolved (full) name
860
  @raise errors.OpPrereqError: if the item is not found
861

862
  """
863
  full_name = fn(name)
864
  if full_name is None:
865
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
866
                               errors.ECODE_NOENT)
867
  return full_name
868

    
869

    
870
def _ExpandNodeName(cfg, name):
871
  """Wrapper over L{_ExpandItemName} for nodes."""
872
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
873

    
874

    
875
def _ExpandInstanceName(cfg, name):
876
  """Wrapper over L{_ExpandItemName} for instance."""
877
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
878

    
879

    
880
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
881
                          memory, vcpus, nics, disk_template, disks,
882
                          bep, hvp, hypervisor_name):
883
  """Builds instance related env variables for hooks
884

885
  This builds the hook environment from individual variables.
886

887
  @type name: string
888
  @param name: the name of the instance
889
  @type primary_node: string
890
  @param primary_node: the name of the instance's primary node
891
  @type secondary_nodes: list
892
  @param secondary_nodes: list of secondary nodes as strings
893
  @type os_type: string
894
  @param os_type: the name of the instance's OS
895
  @type status: boolean
896
  @param status: the should_run status of the instance
897
  @type memory: string
898
  @param memory: the memory size of the instance
899
  @type vcpus: string
900
  @param vcpus: the count of VCPUs the instance has
901
  @type nics: list
902
  @param nics: list of tuples (ip, mac, mode, link) representing
903
      the NICs the instance has
904
  @type disk_template: string
905
  @param disk_template: the disk template of the instance
906
  @type disks: list
907
  @param disks: the list of (size, mode) pairs
908
  @type bep: dict
909
  @param bep: the backend parameters for the instance
910
  @type hvp: dict
911
  @param hvp: the hypervisor parameters for the instance
912
  @type hypervisor_name: string
913
  @param hypervisor_name: the hypervisor for the instance
914
  @rtype: dict
915
  @return: the hook environment for this instance
916

917
  """
918
  if status:
919
    str_status = "up"
920
  else:
921
    str_status = "down"
922
  env = {
923
    "OP_TARGET": name,
924
    "INSTANCE_NAME": name,
925
    "INSTANCE_PRIMARY": primary_node,
926
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
927
    "INSTANCE_OS_TYPE": os_type,
928
    "INSTANCE_STATUS": str_status,
929
    "INSTANCE_MEMORY": memory,
930
    "INSTANCE_VCPUS": vcpus,
931
    "INSTANCE_DISK_TEMPLATE": disk_template,
932
    "INSTANCE_HYPERVISOR": hypervisor_name,
933
  }
934

    
935
  if nics:
936
    nic_count = len(nics)
937
    for idx, (ip, mac, mode, link) in enumerate(nics):
938
      if ip is None:
939
        ip = ""
940
      env["INSTANCE_NIC%d_IP" % idx] = ip
941
      env["INSTANCE_NIC%d_MAC" % idx] = mac
942
      env["INSTANCE_NIC%d_MODE" % idx] = mode
943
      env["INSTANCE_NIC%d_LINK" % idx] = link
944
      if mode == constants.NIC_MODE_BRIDGED:
945
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
946
  else:
947
    nic_count = 0
948

    
949
  env["INSTANCE_NIC_COUNT"] = nic_count
950

    
951
  if disks:
952
    disk_count = len(disks)
953
    for idx, (size, mode) in enumerate(disks):
954
      env["INSTANCE_DISK%d_SIZE" % idx] = size
955
      env["INSTANCE_DISK%d_MODE" % idx] = mode
956
  else:
957
    disk_count = 0
958

    
959
  env["INSTANCE_DISK_COUNT"] = disk_count
960

    
961
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
962
    for key, value in source.items():
963
      env["INSTANCE_%s_%s" % (kind, key)] = value
964

    
965
  return env
966

    
967

    
968
def _NICListToTuple(lu, nics):
969
  """Build a list of nic information tuples.
970

971
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
972
  value in LUQueryInstanceData.
973

974
  @type lu:  L{LogicalUnit}
975
  @param lu: the logical unit on whose behalf we execute
976
  @type nics: list of L{objects.NIC}
977
  @param nics: list of nics to convert to hooks tuples
978

979
  """
980
  hooks_nics = []
981
  cluster = lu.cfg.GetClusterInfo()
982
  for nic in nics:
983
    ip = nic.ip
984
    mac = nic.mac
985
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
986
    mode = filled_params[constants.NIC_MODE]
987
    link = filled_params[constants.NIC_LINK]
988
    hooks_nics.append((ip, mac, mode, link))
989
  return hooks_nics
990

    
991

    
992
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
993
  """Builds instance related env variables for hooks from an object.
994

995
  @type lu: L{LogicalUnit}
996
  @param lu: the logical unit on whose behalf we execute
997
  @type instance: L{objects.Instance}
998
  @param instance: the instance for which we should build the
999
      environment
1000
  @type override: dict
1001
  @param override: dictionary with key/values that will override
1002
      our values
1003
  @rtype: dict
1004
  @return: the hook environment dictionary
1005

1006
  """
1007
  cluster = lu.cfg.GetClusterInfo()
1008
  bep = cluster.FillBE(instance)
1009
  hvp = cluster.FillHV(instance)
1010
  args = {
1011
    'name': instance.name,
1012
    'primary_node': instance.primary_node,
1013
    'secondary_nodes': instance.secondary_nodes,
1014
    'os_type': instance.os,
1015
    'status': instance.admin_up,
1016
    'memory': bep[constants.BE_MEMORY],
1017
    'vcpus': bep[constants.BE_VCPUS],
1018
    'nics': _NICListToTuple(lu, instance.nics),
1019
    'disk_template': instance.disk_template,
1020
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1021
    'bep': bep,
1022
    'hvp': hvp,
1023
    'hypervisor_name': instance.hypervisor,
1024
  }
1025
  if override:
1026
    args.update(override)
1027
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1028

    
1029

    
1030
def _AdjustCandidatePool(lu, exceptions):
1031
  """Adjust the candidate pool after node operations.
1032

1033
  """
1034
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1035
  if mod_list:
1036
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1037
               utils.CommaJoin(node.name for node in mod_list))
1038
    for name in mod_list:
1039
      lu.context.ReaddNode(name)
1040
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1041
  if mc_now > mc_max:
1042
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1043
               (mc_now, mc_max))
1044

    
1045

    
1046
def _DecideSelfPromotion(lu, exceptions=None):
1047
  """Decide whether I should promote myself as a master candidate.
1048

1049
  """
1050
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1051
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1052
  # the new node will increase mc_max with one, so:
1053
  mc_should = min(mc_should + 1, cp_size)
1054
  return mc_now < mc_should
1055

    
1056

    
1057
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1058
  """Check that the brigdes needed by a list of nics exist.
1059

1060
  """
1061
  cluster = lu.cfg.GetClusterInfo()
1062
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1063
  brlist = [params[constants.NIC_LINK] for params in paramslist
1064
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1065
  if brlist:
1066
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1067
    result.Raise("Error checking bridges on destination node '%s'" %
1068
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1069

    
1070

    
1071
def _CheckInstanceBridgesExist(lu, instance, node=None):
1072
  """Check that the brigdes needed by an instance exist.
1073

1074
  """
1075
  if node is None:
1076
    node = instance.primary_node
1077
  _CheckNicsBridgesExist(lu, instance.nics, node)
1078

    
1079

    
1080
def _CheckOSVariant(os_obj, name):
1081
  """Check whether an OS name conforms to the os variants specification.
1082

1083
  @type os_obj: L{objects.OS}
1084
  @param os_obj: OS object to check
1085
  @type name: string
1086
  @param name: OS name passed by the user, to check for validity
1087

1088
  """
1089
  if not os_obj.supported_variants:
1090
    return
1091
  variant = objects.OS.GetVariant(name)
1092
  if not variant:
1093
    raise errors.OpPrereqError("OS name must include a variant",
1094
                               errors.ECODE_INVAL)
1095

    
1096
  if variant not in os_obj.supported_variants:
1097
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1098

    
1099

    
1100
def _GetNodeInstancesInner(cfg, fn):
1101
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1102

    
1103

    
1104
def _GetNodeInstances(cfg, node_name):
1105
  """Returns a list of all primary and secondary instances on a node.
1106

1107
  """
1108

    
1109
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1110

    
1111

    
1112
def _GetNodePrimaryInstances(cfg, node_name):
1113
  """Returns primary instances on a node.
1114

1115
  """
1116
  return _GetNodeInstancesInner(cfg,
1117
                                lambda inst: node_name == inst.primary_node)
1118

    
1119

    
1120
def _GetNodeSecondaryInstances(cfg, node_name):
1121
  """Returns secondary instances on a node.
1122

1123
  """
1124
  return _GetNodeInstancesInner(cfg,
1125
                                lambda inst: node_name in inst.secondary_nodes)
1126

    
1127

    
1128
def _GetStorageTypeArgs(cfg, storage_type):
1129
  """Returns the arguments for a storage type.
1130

1131
  """
1132
  # Special case for file storage
1133
  if storage_type == constants.ST_FILE:
1134
    # storage.FileStorage wants a list of storage directories
1135
    return [[cfg.GetFileStorageDir()]]
1136

    
1137
  return []
1138

    
1139

    
1140
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1141
  faulty = []
1142

    
1143
  for dev in instance.disks:
1144
    cfg.SetDiskID(dev, node_name)
1145

    
1146
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1147
  result.Raise("Failed to get disk status from node %s" % node_name,
1148
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1149

    
1150
  for idx, bdev_status in enumerate(result.payload):
1151
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1152
      faulty.append(idx)
1153

    
1154
  return faulty
1155

    
1156

    
1157
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1158
  """Check the sanity of iallocator and node arguments and use the
1159
  cluster-wide iallocator if appropriate.
1160

1161
  Check that at most one of (iallocator, node) is specified. If none is
1162
  specified, then the LU's opcode's iallocator slot is filled with the
1163
  cluster-wide default iallocator.
1164

1165
  @type iallocator_slot: string
1166
  @param iallocator_slot: the name of the opcode iallocator slot
1167
  @type node_slot: string
1168
  @param node_slot: the name of the opcode target node slot
1169

1170
  """
1171
  node = getattr(lu.op, node_slot, None)
1172
  iallocator = getattr(lu.op, iallocator_slot, None)
1173

    
1174
  if node is not None and iallocator is not None:
1175
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1176
                               errors.ECODE_INVAL)
1177
  elif node is None and iallocator is None:
1178
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1179
    if default_iallocator:
1180
      setattr(lu.op, iallocator_slot, default_iallocator)
1181
    else:
1182
      raise errors.OpPrereqError("No iallocator or node given and no"
1183
                                 " cluster-wide default iallocator found."
1184
                                 " Please specify either an iallocator or a"
1185
                                 " node, or set a cluster-wide default"
1186
                                 " iallocator.")
1187

    
1188

    
1189
class LUPostInitCluster(LogicalUnit):
1190
  """Logical unit for running hooks after cluster initialization.
1191

1192
  """
1193
  HPATH = "cluster-init"
1194
  HTYPE = constants.HTYPE_CLUSTER
1195

    
1196
  def BuildHooksEnv(self):
1197
    """Build hooks env.
1198

1199
    """
1200
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1201
    mn = self.cfg.GetMasterNode()
1202
    return env, [], [mn]
1203

    
1204
  def Exec(self, feedback_fn):
1205
    """Nothing to do.
1206

1207
    """
1208
    return True
1209

    
1210

    
1211
class LUDestroyCluster(LogicalUnit):
1212
  """Logical unit for destroying the cluster.
1213

1214
  """
1215
  HPATH = "cluster-destroy"
1216
  HTYPE = constants.HTYPE_CLUSTER
1217

    
1218
  def BuildHooksEnv(self):
1219
    """Build hooks env.
1220

1221
    """
1222
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1223
    return env, [], []
1224

    
1225
  def CheckPrereq(self):
1226
    """Check prerequisites.
1227

1228
    This checks whether the cluster is empty.
1229

1230
    Any errors are signaled by raising errors.OpPrereqError.
1231

1232
    """
1233
    master = self.cfg.GetMasterNode()
1234

    
1235
    nodelist = self.cfg.GetNodeList()
1236
    if len(nodelist) != 1 or nodelist[0] != master:
1237
      raise errors.OpPrereqError("There are still %d node(s) in"
1238
                                 " this cluster." % (len(nodelist) - 1),
1239
                                 errors.ECODE_INVAL)
1240
    instancelist = self.cfg.GetInstanceList()
1241
    if instancelist:
1242
      raise errors.OpPrereqError("There are still %d instance(s) in"
1243
                                 " this cluster." % len(instancelist),
1244
                                 errors.ECODE_INVAL)
1245

    
1246
  def Exec(self, feedback_fn):
1247
    """Destroys the cluster.
1248

1249
    """
1250
    master = self.cfg.GetMasterNode()
1251
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1252

    
1253
    # Run post hooks on master node before it's removed
1254
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1255
    try:
1256
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1257
    except:
1258
      # pylint: disable-msg=W0702
1259
      self.LogWarning("Errors occurred running hooks on %s" % master)
1260

    
1261
    result = self.rpc.call_node_stop_master(master, False)
1262
    result.Raise("Could not disable the master role")
1263

    
1264
    if modify_ssh_setup:
1265
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1266
      utils.CreateBackup(priv_key)
1267
      utils.CreateBackup(pub_key)
1268

    
1269
    return master
1270

    
1271

    
1272
def _VerifyCertificate(filename):
1273
  """Verifies a certificate for LUVerifyCluster.
1274

1275
  @type filename: string
1276
  @param filename: Path to PEM file
1277

1278
  """
1279
  try:
1280
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1281
                                           utils.ReadFile(filename))
1282
  except Exception, err: # pylint: disable-msg=W0703
1283
    return (LUVerifyCluster.ETYPE_ERROR,
1284
            "Failed to load X509 certificate %s: %s" % (filename, err))
1285

    
1286
  (errcode, msg) = \
1287
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1288
                                constants.SSL_CERT_EXPIRATION_ERROR)
1289

    
1290
  if msg:
1291
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1292
  else:
1293
    fnamemsg = None
1294

    
1295
  if errcode is None:
1296
    return (None, fnamemsg)
1297
  elif errcode == utils.CERT_WARNING:
1298
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1299
  elif errcode == utils.CERT_ERROR:
1300
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1301

    
1302
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1303

    
1304

    
1305
class LUVerifyCluster(LogicalUnit):
1306
  """Verifies the cluster status.
1307

1308
  """
1309
  HPATH = "cluster-verify"
1310
  HTYPE = constants.HTYPE_CLUSTER
1311
  _OP_PARAMS = [
1312
    ("skip_checks", _EmptyList,
1313
     _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1314
    ("verbose", False, _TBool),
1315
    ("error_codes", False, _TBool),
1316
    ("debug_simulate_errors", False, _TBool),
1317
    ]
1318
  REQ_BGL = False
1319

    
1320
  TCLUSTER = "cluster"
1321
  TNODE = "node"
1322
  TINSTANCE = "instance"
1323

    
1324
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1325
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1326
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1327
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1328
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1329
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1330
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1331
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1332
  ENODEDRBD = (TNODE, "ENODEDRBD")
1333
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1334
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1335
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1336
  ENODEHV = (TNODE, "ENODEHV")
1337
  ENODELVM = (TNODE, "ENODELVM")
1338
  ENODEN1 = (TNODE, "ENODEN1")
1339
  ENODENET = (TNODE, "ENODENET")
1340
  ENODEOS = (TNODE, "ENODEOS")
1341
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1342
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1343
  ENODERPC = (TNODE, "ENODERPC")
1344
  ENODESSH = (TNODE, "ENODESSH")
1345
  ENODEVERSION = (TNODE, "ENODEVERSION")
1346
  ENODESETUP = (TNODE, "ENODESETUP")
1347
  ENODETIME = (TNODE, "ENODETIME")
1348

    
1349
  ETYPE_FIELD = "code"
1350
  ETYPE_ERROR = "ERROR"
1351
  ETYPE_WARNING = "WARNING"
1352

    
1353
  class NodeImage(object):
1354
    """A class representing the logical and physical status of a node.
1355

1356
    @type name: string
1357
    @ivar name: the node name to which this object refers
1358
    @ivar volumes: a structure as returned from
1359
        L{ganeti.backend.GetVolumeList} (runtime)
1360
    @ivar instances: a list of running instances (runtime)
1361
    @ivar pinst: list of configured primary instances (config)
1362
    @ivar sinst: list of configured secondary instances (config)
1363
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1364
        of this node (config)
1365
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1366
    @ivar dfree: free disk, as reported by the node (runtime)
1367
    @ivar offline: the offline status (config)
1368
    @type rpc_fail: boolean
1369
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1370
        not whether the individual keys were correct) (runtime)
1371
    @type lvm_fail: boolean
1372
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1373
    @type hyp_fail: boolean
1374
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1375
    @type ghost: boolean
1376
    @ivar ghost: whether this is a known node or not (config)
1377
    @type os_fail: boolean
1378
    @ivar os_fail: whether the RPC call didn't return valid OS data
1379
    @type oslist: list
1380
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1381

1382
    """
1383
    def __init__(self, offline=False, name=None):
1384
      self.name = name
1385
      self.volumes = {}
1386
      self.instances = []
1387
      self.pinst = []
1388
      self.sinst = []
1389
      self.sbp = {}
1390
      self.mfree = 0
1391
      self.dfree = 0
1392
      self.offline = offline
1393
      self.rpc_fail = False
1394
      self.lvm_fail = False
1395
      self.hyp_fail = False
1396
      self.ghost = False
1397
      self.os_fail = False
1398
      self.oslist = {}
1399

    
1400
  def ExpandNames(self):
1401
    self.needed_locks = {
1402
      locking.LEVEL_NODE: locking.ALL_SET,
1403
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1404
    }
1405
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1406

    
1407
  def _Error(self, ecode, item, msg, *args, **kwargs):
1408
    """Format an error message.
1409

1410
    Based on the opcode's error_codes parameter, either format a
1411
    parseable error code, or a simpler error string.
1412

1413
    This must be called only from Exec and functions called from Exec.
1414

1415
    """
1416
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1417
    itype, etxt = ecode
1418
    # first complete the msg
1419
    if args:
1420
      msg = msg % args
1421
    # then format the whole message
1422
    if self.op.error_codes:
1423
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1424
    else:
1425
      if item:
1426
        item = " " + item
1427
      else:
1428
        item = ""
1429
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1430
    # and finally report it via the feedback_fn
1431
    self._feedback_fn("  - %s" % msg)
1432

    
1433
  def _ErrorIf(self, cond, *args, **kwargs):
1434
    """Log an error message if the passed condition is True.
1435

1436
    """
1437
    cond = bool(cond) or self.op.debug_simulate_errors
1438
    if cond:
1439
      self._Error(*args, **kwargs)
1440
    # do not mark the operation as failed for WARN cases only
1441
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1442
      self.bad = self.bad or cond
1443

    
1444
  def _VerifyNode(self, ninfo, nresult):
1445
    """Perform some basic validation on data returned from a node.
1446

1447
      - check the result data structure is well formed and has all the
1448
        mandatory fields
1449
      - check ganeti version
1450

1451
    @type ninfo: L{objects.Node}
1452
    @param ninfo: the node to check
1453
    @param nresult: the results from the node
1454
    @rtype: boolean
1455
    @return: whether overall this call was successful (and we can expect
1456
         reasonable values in the respose)
1457

1458
    """
1459
    node = ninfo.name
1460
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1461

    
1462
    # main result, nresult should be a non-empty dict
1463
    test = not nresult or not isinstance(nresult, dict)
1464
    _ErrorIf(test, self.ENODERPC, node,
1465
                  "unable to verify node: no data returned")
1466
    if test:
1467
      return False
1468

    
1469
    # compares ganeti version
1470
    local_version = constants.PROTOCOL_VERSION
1471
    remote_version = nresult.get("version", None)
1472
    test = not (remote_version and
1473
                isinstance(remote_version, (list, tuple)) and
1474
                len(remote_version) == 2)
1475
    _ErrorIf(test, self.ENODERPC, node,
1476
             "connection to node returned invalid data")
1477
    if test:
1478
      return False
1479

    
1480
    test = local_version != remote_version[0]
1481
    _ErrorIf(test, self.ENODEVERSION, node,
1482
             "incompatible protocol versions: master %s,"
1483
             " node %s", local_version, remote_version[0])
1484
    if test:
1485
      return False
1486

    
1487
    # node seems compatible, we can actually try to look into its results
1488

    
1489
    # full package version
1490
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1491
                  self.ENODEVERSION, node,
1492
                  "software version mismatch: master %s, node %s",
1493
                  constants.RELEASE_VERSION, remote_version[1],
1494
                  code=self.ETYPE_WARNING)
1495

    
1496
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1497
    if isinstance(hyp_result, dict):
1498
      for hv_name, hv_result in hyp_result.iteritems():
1499
        test = hv_result is not None
1500
        _ErrorIf(test, self.ENODEHV, node,
1501
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1502

    
1503

    
1504
    test = nresult.get(constants.NV_NODESETUP,
1505
                           ["Missing NODESETUP results"])
1506
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1507
             "; ".join(test))
1508

    
1509
    return True
1510

    
1511
  def _VerifyNodeTime(self, ninfo, nresult,
1512
                      nvinfo_starttime, nvinfo_endtime):
1513
    """Check the node time.
1514

1515
    @type ninfo: L{objects.Node}
1516
    @param ninfo: the node to check
1517
    @param nresult: the remote results for the node
1518
    @param nvinfo_starttime: the start time of the RPC call
1519
    @param nvinfo_endtime: the end time of the RPC call
1520

1521
    """
1522
    node = ninfo.name
1523
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1524

    
1525
    ntime = nresult.get(constants.NV_TIME, None)
1526
    try:
1527
      ntime_merged = utils.MergeTime(ntime)
1528
    except (ValueError, TypeError):
1529
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1530
      return
1531

    
1532
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1533
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1534
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1535
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1536
    else:
1537
      ntime_diff = None
1538

    
1539
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1540
             "Node time diverges by at least %s from master node time",
1541
             ntime_diff)
1542

    
1543
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1544
    """Check the node time.
1545

1546
    @type ninfo: L{objects.Node}
1547
    @param ninfo: the node to check
1548
    @param nresult: the remote results for the node
1549
    @param vg_name: the configured VG name
1550

1551
    """
1552
    if vg_name is None:
1553
      return
1554

    
1555
    node = ninfo.name
1556
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1557

    
1558
    # checks vg existence and size > 20G
1559
    vglist = nresult.get(constants.NV_VGLIST, None)
1560
    test = not vglist
1561
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1562
    if not test:
1563
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1564
                                            constants.MIN_VG_SIZE)
1565
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1566

    
1567
    # check pv names
1568
    pvlist = nresult.get(constants.NV_PVLIST, None)
1569
    test = pvlist is None
1570
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1571
    if not test:
1572
      # check that ':' is not present in PV names, since it's a
1573
      # special character for lvcreate (denotes the range of PEs to
1574
      # use on the PV)
1575
      for _, pvname, owner_vg in pvlist:
1576
        test = ":" in pvname
1577
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1578
                 " '%s' of VG '%s'", pvname, owner_vg)
1579

    
1580
  def _VerifyNodeNetwork(self, ninfo, nresult):
1581
    """Check the node time.
1582

1583
    @type ninfo: L{objects.Node}
1584
    @param ninfo: the node to check
1585
    @param nresult: the remote results for the node
1586

1587
    """
1588
    node = ninfo.name
1589
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1590

    
1591
    test = constants.NV_NODELIST not in nresult
1592
    _ErrorIf(test, self.ENODESSH, node,
1593
             "node hasn't returned node ssh connectivity data")
1594
    if not test:
1595
      if nresult[constants.NV_NODELIST]:
1596
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1597
          _ErrorIf(True, self.ENODESSH, node,
1598
                   "ssh communication with node '%s': %s", a_node, a_msg)
1599

    
1600
    test = constants.NV_NODENETTEST not in nresult
1601
    _ErrorIf(test, self.ENODENET, node,
1602
             "node hasn't returned node tcp connectivity data")
1603
    if not test:
1604
      if nresult[constants.NV_NODENETTEST]:
1605
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1606
        for anode in nlist:
1607
          _ErrorIf(True, self.ENODENET, node,
1608
                   "tcp communication with node '%s': %s",
1609
                   anode, nresult[constants.NV_NODENETTEST][anode])
1610

    
1611
    test = constants.NV_MASTERIP not in nresult
1612
    _ErrorIf(test, self.ENODENET, node,
1613
             "node hasn't returned node master IP reachability data")
1614
    if not test:
1615
      if not nresult[constants.NV_MASTERIP]:
1616
        if node == self.master_node:
1617
          msg = "the master node cannot reach the master IP (not configured?)"
1618
        else:
1619
          msg = "cannot reach the master IP"
1620
        _ErrorIf(True, self.ENODENET, node, msg)
1621

    
1622

    
1623
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1624
    """Verify an instance.
1625

1626
    This function checks to see if the required block devices are
1627
    available on the instance's node.
1628

1629
    """
1630
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1631
    node_current = instanceconfig.primary_node
1632

    
1633
    node_vol_should = {}
1634
    instanceconfig.MapLVsByNode(node_vol_should)
1635

    
1636
    for node in node_vol_should:
1637
      n_img = node_image[node]
1638
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1639
        # ignore missing volumes on offline or broken nodes
1640
        continue
1641
      for volume in node_vol_should[node]:
1642
        test = volume not in n_img.volumes
1643
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1644
                 "volume %s missing on node %s", volume, node)
1645

    
1646
    if instanceconfig.admin_up:
1647
      pri_img = node_image[node_current]
1648
      test = instance not in pri_img.instances and not pri_img.offline
1649
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1650
               "instance not running on its primary node %s",
1651
               node_current)
1652

    
1653
    for node, n_img in node_image.items():
1654
      if (not node == node_current):
1655
        test = instance in n_img.instances
1656
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1657
                 "instance should not run on node %s", node)
1658

    
1659
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1660
    """Verify if there are any unknown volumes in the cluster.
1661

1662
    The .os, .swap and backup volumes are ignored. All other volumes are
1663
    reported as unknown.
1664

1665
    @type reserved: L{ganeti.utils.FieldSet}
1666
    @param reserved: a FieldSet of reserved volume names
1667

1668
    """
1669
    for node, n_img in node_image.items():
1670
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1671
        # skip non-healthy nodes
1672
        continue
1673
      for volume in n_img.volumes:
1674
        test = ((node not in node_vol_should or
1675
                volume not in node_vol_should[node]) and
1676
                not reserved.Matches(volume))
1677
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1678
                      "volume %s is unknown", volume)
1679

    
1680
  def _VerifyOrphanInstances(self, instancelist, node_image):
1681
    """Verify the list of running instances.
1682

1683
    This checks what instances are running but unknown to the cluster.
1684

1685
    """
1686
    for node, n_img in node_image.items():
1687
      for o_inst in n_img.instances:
1688
        test = o_inst not in instancelist
1689
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1690
                      "instance %s on node %s should not exist", o_inst, node)
1691

    
1692
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1693
    """Verify N+1 Memory Resilience.
1694

1695
    Check that if one single node dies we can still start all the
1696
    instances it was primary for.
1697

1698
    """
1699
    for node, n_img in node_image.items():
1700
      # This code checks that every node which is now listed as
1701
      # secondary has enough memory to host all instances it is
1702
      # supposed to should a single other node in the cluster fail.
1703
      # FIXME: not ready for failover to an arbitrary node
1704
      # FIXME: does not support file-backed instances
1705
      # WARNING: we currently take into account down instances as well
1706
      # as up ones, considering that even if they're down someone
1707
      # might want to start them even in the event of a node failure.
1708
      for prinode, instances in n_img.sbp.items():
1709
        needed_mem = 0
1710
        for instance in instances:
1711
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1712
          if bep[constants.BE_AUTO_BALANCE]:
1713
            needed_mem += bep[constants.BE_MEMORY]
1714
        test = n_img.mfree < needed_mem
1715
        self._ErrorIf(test, self.ENODEN1, node,
1716
                      "not enough memory on to accommodate"
1717
                      " failovers should peer node %s fail", prinode)
1718

    
1719
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1720
                       master_files):
1721
    """Verifies and computes the node required file checksums.
1722

1723
    @type ninfo: L{objects.Node}
1724
    @param ninfo: the node to check
1725
    @param nresult: the remote results for the node
1726
    @param file_list: required list of files
1727
    @param local_cksum: dictionary of local files and their checksums
1728
    @param master_files: list of files that only masters should have
1729

1730
    """
1731
    node = ninfo.name
1732
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1733

    
1734
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1735
    test = not isinstance(remote_cksum, dict)
1736
    _ErrorIf(test, self.ENODEFILECHECK, node,
1737
             "node hasn't returned file checksum data")
1738
    if test:
1739
      return
1740

    
1741
    for file_name in file_list:
1742
      node_is_mc = ninfo.master_candidate
1743
      must_have = (file_name not in master_files) or node_is_mc
1744
      # missing
1745
      test1 = file_name not in remote_cksum
1746
      # invalid checksum
1747
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1748
      # existing and good
1749
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1750
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1751
               "file '%s' missing", file_name)
1752
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1753
               "file '%s' has wrong checksum", file_name)
1754
      # not candidate and this is not a must-have file
1755
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1756
               "file '%s' should not exist on non master"
1757
               " candidates (and the file is outdated)", file_name)
1758
      # all good, except non-master/non-must have combination
1759
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1760
               "file '%s' should not exist"
1761
               " on non master candidates", file_name)
1762

    
1763
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1764
                      drbd_map):
1765
    """Verifies and the node DRBD status.
1766

1767
    @type ninfo: L{objects.Node}
1768
    @param ninfo: the node to check
1769
    @param nresult: the remote results for the node
1770
    @param instanceinfo: the dict of instances
1771
    @param drbd_helper: the configured DRBD usermode helper
1772
    @param drbd_map: the DRBD map as returned by
1773
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1774

1775
    """
1776
    node = ninfo.name
1777
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1778

    
1779
    if drbd_helper:
1780
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1781
      test = (helper_result == None)
1782
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1783
               "no drbd usermode helper returned")
1784
      if helper_result:
1785
        status, payload = helper_result
1786
        test = not status
1787
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1788
                 "drbd usermode helper check unsuccessful: %s", payload)
1789
        test = status and (payload != drbd_helper)
1790
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1791
                 "wrong drbd usermode helper: %s", payload)
1792

    
1793
    # compute the DRBD minors
1794
    node_drbd = {}
1795
    for minor, instance in drbd_map[node].items():
1796
      test = instance not in instanceinfo
1797
      _ErrorIf(test, self.ECLUSTERCFG, None,
1798
               "ghost instance '%s' in temporary DRBD map", instance)
1799
        # ghost instance should not be running, but otherwise we
1800
        # don't give double warnings (both ghost instance and
1801
        # unallocated minor in use)
1802
      if test:
1803
        node_drbd[minor] = (instance, False)
1804
      else:
1805
        instance = instanceinfo[instance]
1806
        node_drbd[minor] = (instance.name, instance.admin_up)
1807

    
1808
    # and now check them
1809
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1810
    test = not isinstance(used_minors, (tuple, list))
1811
    _ErrorIf(test, self.ENODEDRBD, node,
1812
             "cannot parse drbd status file: %s", str(used_minors))
1813
    if test:
1814
      # we cannot check drbd status
1815
      return
1816

    
1817
    for minor, (iname, must_exist) in node_drbd.items():
1818
      test = minor not in used_minors and must_exist
1819
      _ErrorIf(test, self.ENODEDRBD, node,
1820
               "drbd minor %d of instance %s is not active", minor, iname)
1821
    for minor in used_minors:
1822
      test = minor not in node_drbd
1823
      _ErrorIf(test, self.ENODEDRBD, node,
1824
               "unallocated drbd minor %d is in use", minor)
1825

    
1826
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1827
    """Builds the node OS structures.
1828

1829
    @type ninfo: L{objects.Node}
1830
    @param ninfo: the node to check
1831
    @param nresult: the remote results for the node
1832
    @param nimg: the node image object
1833

1834
    """
1835
    node = ninfo.name
1836
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1837

    
1838
    remote_os = nresult.get(constants.NV_OSLIST, None)
1839
    test = (not isinstance(remote_os, list) or
1840
            not compat.all(isinstance(v, list) and len(v) == 7
1841
                           for v in remote_os))
1842

    
1843
    _ErrorIf(test, self.ENODEOS, node,
1844
             "node hasn't returned valid OS data")
1845

    
1846
    nimg.os_fail = test
1847

    
1848
    if test:
1849
      return
1850

    
1851
    os_dict = {}
1852

    
1853
    for (name, os_path, status, diagnose,
1854
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1855

    
1856
      if name not in os_dict:
1857
        os_dict[name] = []
1858

    
1859
      # parameters is a list of lists instead of list of tuples due to
1860
      # JSON lacking a real tuple type, fix it:
1861
      parameters = [tuple(v) for v in parameters]
1862
      os_dict[name].append((os_path, status, diagnose,
1863
                            set(variants), set(parameters), set(api_ver)))
1864

    
1865
    nimg.oslist = os_dict
1866

    
1867
  def _VerifyNodeOS(self, ninfo, nimg, base):
1868
    """Verifies the node OS list.
1869

1870
    @type ninfo: L{objects.Node}
1871
    @param ninfo: the node to check
1872
    @param nimg: the node image object
1873
    @param base: the 'template' node we match against (e.g. from the master)
1874

1875
    """
1876
    node = ninfo.name
1877
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1878

    
1879
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1880

    
1881
    for os_name, os_data in nimg.oslist.items():
1882
      assert os_data, "Empty OS status for OS %s?!" % os_name
1883
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1884
      _ErrorIf(not f_status, self.ENODEOS, node,
1885
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1886
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1887
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1888
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1889
      # this will catched in backend too
1890
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1891
               and not f_var, self.ENODEOS, node,
1892
               "OS %s with API at least %d does not declare any variant",
1893
               os_name, constants.OS_API_V15)
1894
      # comparisons with the 'base' image
1895
      test = os_name not in base.oslist
1896
      _ErrorIf(test, self.ENODEOS, node,
1897
               "Extra OS %s not present on reference node (%s)",
1898
               os_name, base.name)
1899
      if test:
1900
        continue
1901
      assert base.oslist[os_name], "Base node has empty OS status?"
1902
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1903
      if not b_status:
1904
        # base OS is invalid, skipping
1905
        continue
1906
      for kind, a, b in [("API version", f_api, b_api),
1907
                         ("variants list", f_var, b_var),
1908
                         ("parameters", f_param, b_param)]:
1909
        _ErrorIf(a != b, self.ENODEOS, node,
1910
                 "OS %s %s differs from reference node %s: %s vs. %s",
1911
                 kind, os_name, base.name,
1912
                 utils.CommaJoin(a), utils.CommaJoin(b))
1913

    
1914
    # check any missing OSes
1915
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1916
    _ErrorIf(missing, self.ENODEOS, node,
1917
             "OSes present on reference node %s but missing on this node: %s",
1918
             base.name, utils.CommaJoin(missing))
1919

    
1920
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1921
    """Verifies and updates the node volume data.
1922

1923
    This function will update a L{NodeImage}'s internal structures
1924
    with data from the remote call.
1925

1926
    @type ninfo: L{objects.Node}
1927
    @param ninfo: the node to check
1928
    @param nresult: the remote results for the node
1929
    @param nimg: the node image object
1930
    @param vg_name: the configured VG name
1931

1932
    """
1933
    node = ninfo.name
1934
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1935

    
1936
    nimg.lvm_fail = True
1937
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1938
    if vg_name is None:
1939
      pass
1940
    elif isinstance(lvdata, basestring):
1941
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1942
               utils.SafeEncode(lvdata))
1943
    elif not isinstance(lvdata, dict):
1944
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1945
    else:
1946
      nimg.volumes = lvdata
1947
      nimg.lvm_fail = False
1948

    
1949
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1950
    """Verifies and updates the node instance list.
1951

1952
    If the listing was successful, then updates this node's instance
1953
    list. Otherwise, it marks the RPC call as failed for the instance
1954
    list key.
1955

1956
    @type ninfo: L{objects.Node}
1957
    @param ninfo: the node to check
1958
    @param nresult: the remote results for the node
1959
    @param nimg: the node image object
1960

1961
    """
1962
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1963
    test = not isinstance(idata, list)
1964
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1965
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1966
    if test:
1967
      nimg.hyp_fail = True
1968
    else:
1969
      nimg.instances = idata
1970

    
1971
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1972
    """Verifies and computes a node information map
1973

1974
    @type ninfo: L{objects.Node}
1975
    @param ninfo: the node to check
1976
    @param nresult: the remote results for the node
1977
    @param nimg: the node image object
1978
    @param vg_name: the configured VG name
1979

1980
    """
1981
    node = ninfo.name
1982
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1983

    
1984
    # try to read free memory (from the hypervisor)
1985
    hv_info = nresult.get(constants.NV_HVINFO, None)
1986
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1987
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1988
    if not test:
1989
      try:
1990
        nimg.mfree = int(hv_info["memory_free"])
1991
      except (ValueError, TypeError):
1992
        _ErrorIf(True, self.ENODERPC, node,
1993
                 "node returned invalid nodeinfo, check hypervisor")
1994

    
1995
    # FIXME: devise a free space model for file based instances as well
1996
    if vg_name is not None:
1997
      test = (constants.NV_VGLIST not in nresult or
1998
              vg_name not in nresult[constants.NV_VGLIST])
1999
      _ErrorIf(test, self.ENODELVM, node,
2000
               "node didn't return data for the volume group '%s'"
2001
               " - it is either missing or broken", vg_name)
2002
      if not test:
2003
        try:
2004
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2005
        except (ValueError, TypeError):
2006
          _ErrorIf(True, self.ENODERPC, node,
2007
                   "node returned invalid LVM info, check LVM status")
2008

    
2009
  def BuildHooksEnv(self):
2010
    """Build hooks env.
2011

2012
    Cluster-Verify hooks just ran in the post phase and their failure makes
2013
    the output be logged in the verify output and the verification to fail.
2014

2015
    """
2016
    all_nodes = self.cfg.GetNodeList()
2017
    env = {
2018
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2019
      }
2020
    for node in self.cfg.GetAllNodesInfo().values():
2021
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2022

    
2023
    return env, [], all_nodes
2024

    
2025
  def Exec(self, feedback_fn):
2026
    """Verify integrity of cluster, performing various test on nodes.
2027

2028
    """
2029
    self.bad = False
2030
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2031
    verbose = self.op.verbose
2032
    self._feedback_fn = feedback_fn
2033
    feedback_fn("* Verifying global settings")
2034
    for msg in self.cfg.VerifyConfig():
2035
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2036

    
2037
    # Check the cluster certificates
2038
    for cert_filename in constants.ALL_CERT_FILES:
2039
      (errcode, msg) = _VerifyCertificate(cert_filename)
2040
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2041

    
2042
    vg_name = self.cfg.GetVGName()
2043
    drbd_helper = self.cfg.GetDRBDHelper()
2044
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2045
    cluster = self.cfg.GetClusterInfo()
2046
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2047
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2048
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2049
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2050
                        for iname in instancelist)
2051
    i_non_redundant = [] # Non redundant instances
2052
    i_non_a_balanced = [] # Non auto-balanced instances
2053
    n_offline = 0 # Count of offline nodes
2054
    n_drained = 0 # Count of nodes being drained
2055
    node_vol_should = {}
2056

    
2057
    # FIXME: verify OS list
2058
    # do local checksums
2059
    master_files = [constants.CLUSTER_CONF_FILE]
2060
    master_node = self.master_node = self.cfg.GetMasterNode()
2061
    master_ip = self.cfg.GetMasterIP()
2062

    
2063
    file_names = ssconf.SimpleStore().GetFileList()
2064
    file_names.extend(constants.ALL_CERT_FILES)
2065
    file_names.extend(master_files)
2066
    if cluster.modify_etc_hosts:
2067
      file_names.append(constants.ETC_HOSTS)
2068

    
2069
    local_checksums = utils.FingerprintFiles(file_names)
2070

    
2071
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2072
    node_verify_param = {
2073
      constants.NV_FILELIST: file_names,
2074
      constants.NV_NODELIST: [node.name for node in nodeinfo
2075
                              if not node.offline],
2076
      constants.NV_HYPERVISOR: hypervisors,
2077
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2078
                                  node.secondary_ip) for node in nodeinfo
2079
                                 if not node.offline],
2080
      constants.NV_INSTANCELIST: hypervisors,
2081
      constants.NV_VERSION: None,
2082
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2083
      constants.NV_NODESETUP: None,
2084
      constants.NV_TIME: None,
2085
      constants.NV_MASTERIP: (master_node, master_ip),
2086
      constants.NV_OSLIST: None,
2087
      }
2088

    
2089
    if vg_name is not None:
2090
      node_verify_param[constants.NV_VGLIST] = None
2091
      node_verify_param[constants.NV_LVLIST] = vg_name
2092
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2093
      node_verify_param[constants.NV_DRBDLIST] = None
2094

    
2095
    if drbd_helper:
2096
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2097

    
2098
    # Build our expected cluster state
2099
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2100
                                                 name=node.name))
2101
                      for node in nodeinfo)
2102

    
2103
    for instance in instancelist:
2104
      inst_config = instanceinfo[instance]
2105

    
2106
      for nname in inst_config.all_nodes:
2107
        if nname not in node_image:
2108
          # ghost node
2109
          gnode = self.NodeImage(name=nname)
2110
          gnode.ghost = True
2111
          node_image[nname] = gnode
2112

    
2113
      inst_config.MapLVsByNode(node_vol_should)
2114

    
2115
      pnode = inst_config.primary_node
2116
      node_image[pnode].pinst.append(instance)
2117

    
2118
      for snode in inst_config.secondary_nodes:
2119
        nimg = node_image[snode]
2120
        nimg.sinst.append(instance)
2121
        if pnode not in nimg.sbp:
2122
          nimg.sbp[pnode] = []
2123
        nimg.sbp[pnode].append(instance)
2124

    
2125
    # At this point, we have the in-memory data structures complete,
2126
    # except for the runtime information, which we'll gather next
2127

    
2128
    # Due to the way our RPC system works, exact response times cannot be
2129
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2130
    # time before and after executing the request, we can at least have a time
2131
    # window.
2132
    nvinfo_starttime = time.time()
2133
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2134
                                           self.cfg.GetClusterName())
2135
    nvinfo_endtime = time.time()
2136

    
2137
    all_drbd_map = self.cfg.ComputeDRBDMap()
2138

    
2139
    feedback_fn("* Verifying node status")
2140

    
2141
    refos_img = None
2142

    
2143
    for node_i in nodeinfo:
2144
      node = node_i.name
2145
      nimg = node_image[node]
2146

    
2147
      if node_i.offline:
2148
        if verbose:
2149
          feedback_fn("* Skipping offline node %s" % (node,))
2150
        n_offline += 1
2151
        continue
2152

    
2153
      if node == master_node:
2154
        ntype = "master"
2155
      elif node_i.master_candidate:
2156
        ntype = "master candidate"
2157
      elif node_i.drained:
2158
        ntype = "drained"
2159
        n_drained += 1
2160
      else:
2161
        ntype = "regular"
2162
      if verbose:
2163
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2164

    
2165
      msg = all_nvinfo[node].fail_msg
2166
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2167
      if msg:
2168
        nimg.rpc_fail = True
2169
        continue
2170

    
2171
      nresult = all_nvinfo[node].payload
2172

    
2173
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2174
      self._VerifyNodeNetwork(node_i, nresult)
2175
      self._VerifyNodeLVM(node_i, nresult, vg_name)
2176
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2177
                            master_files)
2178
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2179
                           all_drbd_map)
2180
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2181

    
2182
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2183
      self._UpdateNodeInstances(node_i, nresult, nimg)
2184
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2185
      self._UpdateNodeOS(node_i, nresult, nimg)
2186
      if not nimg.os_fail:
2187
        if refos_img is None:
2188
          refos_img = nimg
2189
        self._VerifyNodeOS(node_i, nimg, refos_img)
2190

    
2191
    feedback_fn("* Verifying instance status")
2192
    for instance in instancelist:
2193
      if verbose:
2194
        feedback_fn("* Verifying instance %s" % instance)
2195
      inst_config = instanceinfo[instance]
2196
      self._VerifyInstance(instance, inst_config, node_image)
2197
      inst_nodes_offline = []
2198

    
2199
      pnode = inst_config.primary_node
2200
      pnode_img = node_image[pnode]
2201
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2202
               self.ENODERPC, pnode, "instance %s, connection to"
2203
               " primary node failed", instance)
2204

    
2205
      if pnode_img.offline:
2206
        inst_nodes_offline.append(pnode)
2207

    
2208
      # If the instance is non-redundant we cannot survive losing its primary
2209
      # node, so we are not N+1 compliant. On the other hand we have no disk
2210
      # templates with more than one secondary so that situation is not well
2211
      # supported either.
2212
      # FIXME: does not support file-backed instances
2213
      if not inst_config.secondary_nodes:
2214
        i_non_redundant.append(instance)
2215
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2216
               instance, "instance has multiple secondary nodes: %s",
2217
               utils.CommaJoin(inst_config.secondary_nodes),
2218
               code=self.ETYPE_WARNING)
2219

    
2220
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2221
        i_non_a_balanced.append(instance)
2222

    
2223
      for snode in inst_config.secondary_nodes:
2224
        s_img = node_image[snode]
2225
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2226
                 "instance %s, connection to secondary node failed", instance)
2227

    
2228
        if s_img.offline:
2229
          inst_nodes_offline.append(snode)
2230

    
2231
      # warn that the instance lives on offline nodes
2232
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2233
               "instance lives on offline node(s) %s",
2234
               utils.CommaJoin(inst_nodes_offline))
2235
      # ... or ghost nodes
2236
      for node in inst_config.all_nodes:
2237
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2238
                 "instance lives on ghost node %s", node)
2239

    
2240
    feedback_fn("* Verifying orphan volumes")
2241
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2242
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2243

    
2244
    feedback_fn("* Verifying orphan instances")
2245
    self._VerifyOrphanInstances(instancelist, node_image)
2246

    
2247
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2248
      feedback_fn("* Verifying N+1 Memory redundancy")
2249
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2250

    
2251
    feedback_fn("* Other Notes")
2252
    if i_non_redundant:
2253
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2254
                  % len(i_non_redundant))
2255

    
2256
    if i_non_a_balanced:
2257
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2258
                  % len(i_non_a_balanced))
2259

    
2260
    if n_offline:
2261
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2262

    
2263
    if n_drained:
2264
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2265

    
2266
    return not self.bad
2267

    
2268
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2269
    """Analyze the post-hooks' result
2270

2271
    This method analyses the hook result, handles it, and sends some
2272
    nicely-formatted feedback back to the user.
2273

2274
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2275
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2276
    @param hooks_results: the results of the multi-node hooks rpc call
2277
    @param feedback_fn: function used send feedback back to the caller
2278
    @param lu_result: previous Exec result
2279
    @return: the new Exec result, based on the previous result
2280
        and hook results
2281

2282
    """
2283
    # We only really run POST phase hooks, and are only interested in
2284
    # their results
2285
    if phase == constants.HOOKS_PHASE_POST:
2286
      # Used to change hooks' output to proper indentation
2287
      indent_re = re.compile('^', re.M)
2288
      feedback_fn("* Hooks Results")
2289
      assert hooks_results, "invalid result from hooks"
2290

    
2291
      for node_name in hooks_results:
2292
        res = hooks_results[node_name]
2293
        msg = res.fail_msg
2294
        test = msg and not res.offline
2295
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2296
                      "Communication failure in hooks execution: %s", msg)
2297
        if res.offline or msg:
2298
          # No need to investigate payload if node is offline or gave an error.
2299
          # override manually lu_result here as _ErrorIf only
2300
          # overrides self.bad
2301
          lu_result = 1
2302
          continue
2303
        for script, hkr, output in res.payload:
2304
          test = hkr == constants.HKR_FAIL
2305
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2306
                        "Script %s failed, output:", script)
2307
          if test:
2308
            output = indent_re.sub('      ', output)
2309
            feedback_fn("%s" % output)
2310
            lu_result = 0
2311

    
2312
      return lu_result
2313

    
2314

    
2315
class LUVerifyDisks(NoHooksLU):
2316
  """Verifies the cluster disks status.
2317

2318
  """
2319
  REQ_BGL = False
2320

    
2321
  def ExpandNames(self):
2322
    self.needed_locks = {
2323
      locking.LEVEL_NODE: locking.ALL_SET,
2324
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2325
    }
2326
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2327

    
2328
  def Exec(self, feedback_fn):
2329
    """Verify integrity of cluster disks.
2330

2331
    @rtype: tuple of three items
2332
    @return: a tuple of (dict of node-to-node_error, list of instances
2333
        which need activate-disks, dict of instance: (node, volume) for
2334
        missing volumes
2335

2336
    """
2337
    result = res_nodes, res_instances, res_missing = {}, [], {}
2338

    
2339
    vg_name = self.cfg.GetVGName()
2340
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2341
    instances = [self.cfg.GetInstanceInfo(name)
2342
                 for name in self.cfg.GetInstanceList()]
2343

    
2344
    nv_dict = {}
2345
    for inst in instances:
2346
      inst_lvs = {}
2347
      if (not inst.admin_up or
2348
          inst.disk_template not in constants.DTS_NET_MIRROR):
2349
        continue
2350
      inst.MapLVsByNode(inst_lvs)
2351
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2352
      for node, vol_list in inst_lvs.iteritems():
2353
        for vol in vol_list:
2354
          nv_dict[(node, vol)] = inst
2355

    
2356
    if not nv_dict:
2357
      return result
2358

    
2359
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2360

    
2361
    for node in nodes:
2362
      # node_volume
2363
      node_res = node_lvs[node]
2364
      if node_res.offline:
2365
        continue
2366
      msg = node_res.fail_msg
2367
      if msg:
2368
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2369
        res_nodes[node] = msg
2370
        continue
2371

    
2372
      lvs = node_res.payload
2373
      for lv_name, (_, _, lv_online) in lvs.items():
2374
        inst = nv_dict.pop((node, lv_name), None)
2375
        if (not lv_online and inst is not None
2376
            and inst.name not in res_instances):
2377
          res_instances.append(inst.name)
2378

    
2379
    # any leftover items in nv_dict are missing LVs, let's arrange the
2380
    # data better
2381
    for key, inst in nv_dict.iteritems():
2382
      if inst.name not in res_missing:
2383
        res_missing[inst.name] = []
2384
      res_missing[inst.name].append(key)
2385

    
2386
    return result
2387

    
2388

    
2389
class LURepairDiskSizes(NoHooksLU):
2390
  """Verifies the cluster disks sizes.
2391

2392
  """
2393
  _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2394
  REQ_BGL = False
2395

    
2396
  def ExpandNames(self):
2397
    if self.op.instances:
2398
      self.wanted_names = []
2399
      for name in self.op.instances:
2400
        full_name = _ExpandInstanceName(self.cfg, name)
2401
        self.wanted_names.append(full_name)
2402
      self.needed_locks = {
2403
        locking.LEVEL_NODE: [],
2404
        locking.LEVEL_INSTANCE: self.wanted_names,
2405
        }
2406
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2407
    else:
2408
      self.wanted_names = None
2409
      self.needed_locks = {
2410
        locking.LEVEL_NODE: locking.ALL_SET,
2411
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2412
        }
2413
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2414

    
2415
  def DeclareLocks(self, level):
2416
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2417
      self._LockInstancesNodes(primary_only=True)
2418

    
2419
  def CheckPrereq(self):
2420
    """Check prerequisites.
2421

2422
    This only checks the optional instance list against the existing names.
2423

2424
    """
2425
    if self.wanted_names is None:
2426
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2427

    
2428
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2429
                             in self.wanted_names]
2430

    
2431
  def _EnsureChildSizes(self, disk):
2432
    """Ensure children of the disk have the needed disk size.
2433

2434
    This is valid mainly for DRBD8 and fixes an issue where the
2435
    children have smaller disk size.
2436

2437
    @param disk: an L{ganeti.objects.Disk} object
2438

2439
    """
2440
    if disk.dev_type == constants.LD_DRBD8:
2441
      assert disk.children, "Empty children for DRBD8?"
2442
      fchild = disk.children[0]
2443
      mismatch = fchild.size < disk.size
2444
      if mismatch:
2445
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2446
                     fchild.size, disk.size)
2447
        fchild.size = disk.size
2448

    
2449
      # and we recurse on this child only, not on the metadev
2450
      return self._EnsureChildSizes(fchild) or mismatch
2451
    else:
2452
      return False
2453

    
2454
  def Exec(self, feedback_fn):
2455
    """Verify the size of cluster disks.
2456

2457
    """
2458
    # TODO: check child disks too
2459
    # TODO: check differences in size between primary/secondary nodes
2460
    per_node_disks = {}
2461
    for instance in self.wanted_instances:
2462
      pnode = instance.primary_node
2463
      if pnode not in per_node_disks:
2464
        per_node_disks[pnode] = []
2465
      for idx, disk in enumerate(instance.disks):
2466
        per_node_disks[pnode].append((instance, idx, disk))
2467

    
2468
    changed = []
2469
    for node, dskl in per_node_disks.items():
2470
      newl = [v[2].Copy() for v in dskl]
2471
      for dsk in newl:
2472
        self.cfg.SetDiskID(dsk, node)
2473
      result = self.rpc.call_blockdev_getsizes(node, newl)
2474
      if result.fail_msg:
2475
        self.LogWarning("Failure in blockdev_getsizes call to node"
2476
                        " %s, ignoring", node)
2477
        continue
2478
      if len(result.data) != len(dskl):
2479
        self.LogWarning("Invalid result from node %s, ignoring node results",
2480
                        node)
2481
        continue
2482
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2483
        if size is None:
2484
          self.LogWarning("Disk %d of instance %s did not return size"
2485
                          " information, ignoring", idx, instance.name)
2486
          continue
2487
        if not isinstance(size, (int, long)):
2488
          self.LogWarning("Disk %d of instance %s did not return valid"
2489
                          " size information, ignoring", idx, instance.name)
2490
          continue
2491
        size = size >> 20
2492
        if size != disk.size:
2493
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2494
                       " correcting: recorded %d, actual %d", idx,
2495
                       instance.name, disk.size, size)
2496
          disk.size = size
2497
          self.cfg.Update(instance, feedback_fn)
2498
          changed.append((instance.name, idx, size))
2499
        if self._EnsureChildSizes(disk):
2500
          self.cfg.Update(instance, feedback_fn)
2501
          changed.append((instance.name, idx, disk.size))
2502
    return changed
2503

    
2504

    
2505
class LURenameCluster(LogicalUnit):
2506
  """Rename the cluster.
2507

2508
  """
2509
  HPATH = "cluster-rename"
2510
  HTYPE = constants.HTYPE_CLUSTER
2511
  _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2512

    
2513
  def BuildHooksEnv(self):
2514
    """Build hooks env.
2515

2516
    """
2517
    env = {
2518
      "OP_TARGET": self.cfg.GetClusterName(),
2519
      "NEW_NAME": self.op.name,
2520
      }
2521
    mn = self.cfg.GetMasterNode()
2522
    all_nodes = self.cfg.GetNodeList()
2523
    return env, [mn], all_nodes
2524

    
2525
  def CheckPrereq(self):
2526
    """Verify that the passed name is a valid one.
2527

2528
    """
2529
    hostname = netutils.GetHostInfo(self.op.name)
2530

    
2531
    new_name = hostname.name
2532
    self.ip = new_ip = hostname.ip
2533
    old_name = self.cfg.GetClusterName()
2534
    old_ip = self.cfg.GetMasterIP()
2535
    if new_name == old_name and new_ip == old_ip:
2536
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2537
                                 " cluster has changed",
2538
                                 errors.ECODE_INVAL)
2539
    if new_ip != old_ip:
2540
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2541
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2542
                                   " reachable on the network. Aborting." %
2543
                                   new_ip, errors.ECODE_NOTUNIQUE)
2544

    
2545
    self.op.name = new_name
2546

    
2547
  def Exec(self, feedback_fn):
2548
    """Rename the cluster.
2549

2550
    """
2551
    clustername = self.op.name
2552
    ip = self.ip
2553

    
2554
    # shutdown the master IP
2555
    master = self.cfg.GetMasterNode()
2556
    result = self.rpc.call_node_stop_master(master, False)
2557
    result.Raise("Could not disable the master role")
2558

    
2559
    try:
2560
      cluster = self.cfg.GetClusterInfo()
2561
      cluster.cluster_name = clustername
2562
      cluster.master_ip = ip
2563
      self.cfg.Update(cluster, feedback_fn)
2564

    
2565
      # update the known hosts file
2566
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2567
      node_list = self.cfg.GetNodeList()
2568
      try:
2569
        node_list.remove(master)
2570
      except ValueError:
2571
        pass
2572
      result = self.rpc.call_upload_file(node_list,
2573
                                         constants.SSH_KNOWN_HOSTS_FILE)
2574
      for to_node, to_result in result.iteritems():
2575
        msg = to_result.fail_msg
2576
        if msg:
2577
          msg = ("Copy of file %s to node %s failed: %s" %
2578
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2579
          self.proc.LogWarning(msg)
2580

    
2581
    finally:
2582
      result = self.rpc.call_node_start_master(master, False, False)
2583
      msg = result.fail_msg
2584
      if msg:
2585
        self.LogWarning("Could not re-enable the master role on"
2586
                        " the master, please restart manually: %s", msg)
2587

    
2588
    return clustername
2589

    
2590

    
2591
class LUSetClusterParams(LogicalUnit):
2592
  """Change the parameters of the cluster.
2593

2594
  """
2595
  HPATH = "cluster-modify"
2596
  HTYPE = constants.HTYPE_CLUSTER
2597
  _OP_PARAMS = [
2598
    ("vg_name", None, _TMaybeString),
2599
    ("enabled_hypervisors", None,
2600
     _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2601
    ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2602
    ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2603
    ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2604
    ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2605
    ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2606
    ("uid_pool", None, _NoType),
2607
    ("add_uids", None, _NoType),
2608
    ("remove_uids", None, _NoType),
2609
    ("maintain_node_health", None, _TMaybeBool),
2610
    ("nicparams", None, _TOr(_TDict, _TNone)),
2611
    ("drbd_helper", None, _TOr(_TString, _TNone)),
2612
    ("default_iallocator", None, _TMaybeString),
2613
    ("reserved_lvs", None, _TOr(_TListOf(_TNonEmptyString), _TNone)),
2614
    ]
2615
  REQ_BGL = False
2616

    
2617
  def CheckArguments(self):
2618
    """Check parameters
2619

2620
    """
2621
    if self.op.uid_pool:
2622
      uidpool.CheckUidPool(self.op.uid_pool)
2623

    
2624
    if self.op.add_uids:
2625
      uidpool.CheckUidPool(self.op.add_uids)
2626

    
2627
    if self.op.remove_uids:
2628
      uidpool.CheckUidPool(self.op.remove_uids)
2629

    
2630
  def ExpandNames(self):
2631
    # FIXME: in the future maybe other cluster params won't require checking on
2632
    # all nodes to be modified.
2633
    self.needed_locks = {
2634
      locking.LEVEL_NODE: locking.ALL_SET,
2635
    }
2636
    self.share_locks[locking.LEVEL_NODE] = 1
2637

    
2638
  def BuildHooksEnv(self):
2639
    """Build hooks env.
2640

2641
    """
2642
    env = {
2643
      "OP_TARGET": self.cfg.GetClusterName(),
2644
      "NEW_VG_NAME": self.op.vg_name,
2645
      }
2646
    mn = self.cfg.GetMasterNode()
2647
    return env, [mn], [mn]
2648

    
2649
  def CheckPrereq(self):
2650
    """Check prerequisites.
2651

2652
    This checks whether the given params don't conflict and
2653
    if the given volume group is valid.
2654

2655
    """
2656
    if self.op.vg_name is not None and not self.op.vg_name:
2657
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2658
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2659
                                   " instances exist", errors.ECODE_INVAL)
2660

    
2661
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2662
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2663
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2664
                                   " drbd-based instances exist",
2665
                                   errors.ECODE_INVAL)
2666

    
2667
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2668

    
2669
    # if vg_name not None, checks given volume group on all nodes
2670
    if self.op.vg_name:
2671
      vglist = self.rpc.call_vg_list(node_list)
2672
      for node in node_list:
2673
        msg = vglist[node].fail_msg
2674
        if msg:
2675
          # ignoring down node
2676
          self.LogWarning("Error while gathering data on node %s"
2677
                          " (ignoring node): %s", node, msg)
2678
          continue
2679
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2680
                                              self.op.vg_name,
2681
                                              constants.MIN_VG_SIZE)
2682
        if vgstatus:
2683
          raise errors.OpPrereqError("Error on node '%s': %s" %
2684
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2685

    
2686
    if self.op.drbd_helper:
2687
      # checks given drbd helper on all nodes
2688
      helpers = self.rpc.call_drbd_helper(node_list)
2689
      for node in node_list:
2690
        ninfo = self.cfg.GetNodeInfo(node)
2691
        if ninfo.offline:
2692
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2693
          continue
2694
        msg = helpers[node].fail_msg
2695
        if msg:
2696
          raise errors.OpPrereqError("Error checking drbd helper on node"
2697
                                     " '%s': %s" % (node, msg),
2698
                                     errors.ECODE_ENVIRON)
2699
        node_helper = helpers[node].payload
2700
        if node_helper != self.op.drbd_helper:
2701
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2702
                                     (node, node_helper), errors.ECODE_ENVIRON)
2703

    
2704
    self.cluster = cluster = self.cfg.GetClusterInfo()
2705
    # validate params changes
2706
    if self.op.beparams:
2707
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2708
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2709

    
2710
    if self.op.nicparams:
2711
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2712
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2713
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2714
      nic_errors = []
2715

    
2716
      # check all instances for consistency
2717
      for instance in self.cfg.GetAllInstancesInfo().values():
2718
        for nic_idx, nic in enumerate(instance.nics):
2719
          params_copy = copy.deepcopy(nic.nicparams)
2720
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2721

    
2722
          # check parameter syntax
2723
          try:
2724
            objects.NIC.CheckParameterSyntax(params_filled)
2725
          except errors.ConfigurationError, err:
2726
            nic_errors.append("Instance %s, nic/%d: %s" %
2727
                              (instance.name, nic_idx, err))
2728

    
2729
          # if we're moving instances to routed, check that they have an ip
2730
          target_mode = params_filled[constants.NIC_MODE]
2731
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2732
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2733
                              (instance.name, nic_idx))
2734
      if nic_errors:
2735
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2736
                                   "\n".join(nic_errors))
2737

    
2738
    # hypervisor list/parameters
2739
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2740
    if self.op.hvparams:
2741
      for hv_name, hv_dict in self.op.hvparams.items():
2742
        if hv_name not in self.new_hvparams:
2743
          self.new_hvparams[hv_name] = hv_dict
2744
        else:
2745
          self.new_hvparams[hv_name].update(hv_dict)
2746

    
2747
    # os hypervisor parameters
2748
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2749
    if self.op.os_hvp:
2750
      for os_name, hvs in self.op.os_hvp.items():
2751
        if os_name not in self.new_os_hvp:
2752
          self.new_os_hvp[os_name] = hvs
2753
        else:
2754
          for hv_name, hv_dict in hvs.items():
2755
            if hv_name not in self.new_os_hvp[os_name]:
2756
              self.new_os_hvp[os_name][hv_name] = hv_dict
2757
            else:
2758
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2759

    
2760
    # os parameters
2761
    self.new_osp = objects.FillDict(cluster.osparams, {})
2762
    if self.op.osparams:
2763
      for os_name, osp in self.op.osparams.items():
2764
        if os_name not in self.new_osp:
2765
          self.new_osp[os_name] = {}
2766

    
2767
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2768
                                                  use_none=True)
2769

    
2770
        if not self.new_osp[os_name]:
2771
          # we removed all parameters
2772
          del self.new_osp[os_name]
2773
        else:
2774
          # check the parameter validity (remote check)
2775
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2776
                         os_name, self.new_osp[os_name])
2777

    
2778
    # changes to the hypervisor list
2779
    if self.op.enabled_hypervisors is not None:
2780
      self.hv_list = self.op.enabled_hypervisors
2781
      for hv in self.hv_list:
2782
        # if the hypervisor doesn't already exist in the cluster
2783
        # hvparams, we initialize it to empty, and then (in both
2784
        # cases) we make sure to fill the defaults, as we might not
2785
        # have a complete defaults list if the hypervisor wasn't
2786
        # enabled before
2787
        if hv not in new_hvp:
2788
          new_hvp[hv] = {}
2789
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2790
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2791
    else:
2792
      self.hv_list = cluster.enabled_hypervisors
2793

    
2794
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2795
      # either the enabled list has changed, or the parameters have, validate
2796
      for hv_name, hv_params in self.new_hvparams.items():
2797
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2798
            (self.op.enabled_hypervisors and
2799
             hv_name in self.op.enabled_hypervisors)):
2800
          # either this is a new hypervisor, or its parameters have changed
2801
          hv_class = hypervisor.GetHypervisor(hv_name)
2802
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2803
          hv_class.CheckParameterSyntax(hv_params)
2804
          _CheckHVParams(self, node_list, hv_name, hv_params)
2805

    
2806
    if self.op.os_hvp:
2807
      # no need to check any newly-enabled hypervisors, since the
2808
      # defaults have already been checked in the above code-block
2809
      for os_name, os_hvp in self.new_os_hvp.items():
2810
        for hv_name, hv_params in os_hvp.items():
2811
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2812
          # we need to fill in the new os_hvp on top of the actual hv_p
2813
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2814
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2815
          hv_class = hypervisor.GetHypervisor(hv_name)
2816
          hv_class.CheckParameterSyntax(new_osp)
2817
          _CheckHVParams(self, node_list, hv_name, new_osp)
2818

    
2819
    if self.op.default_iallocator:
2820
      alloc_script = utils.FindFile(self.op.default_iallocator,
2821
                                    constants.IALLOCATOR_SEARCH_PATH,
2822
                                    os.path.isfile)
2823
      if alloc_script is None:
2824
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2825
                                   " specified" % self.op.default_iallocator,
2826
                                   errors.ECODE_INVAL)
2827

    
2828
  def Exec(self, feedback_fn):
2829
    """Change the parameters of the cluster.
2830

2831
    """
2832
    if self.op.vg_name is not None:
2833
      new_volume = self.op.vg_name
2834
      if not new_volume:
2835
        new_volume = None
2836
      if new_volume != self.cfg.GetVGName():
2837
        self.cfg.SetVGName(new_volume)
2838
      else:
2839
        feedback_fn("Cluster LVM configuration already in desired"
2840
                    " state, not changing")
2841
    if self.op.drbd_helper is not None:
2842
      new_helper = self.op.drbd_helper
2843
      if not new_helper:
2844
        new_helper = None
2845
      if new_helper != self.cfg.GetDRBDHelper():
2846
        self.cfg.SetDRBDHelper(new_helper)
2847
      else:
2848
        feedback_fn("Cluster DRBD helper already in desired state,"
2849
                    " not changing")
2850
    if self.op.hvparams:
2851
      self.cluster.hvparams = self.new_hvparams
2852
    if self.op.os_hvp:
2853
      self.cluster.os_hvp = self.new_os_hvp
2854
    if self.op.enabled_hypervisors is not None:
2855
      self.cluster.hvparams = self.new_hvparams
2856
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2857
    if self.op.beparams:
2858
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2859
    if self.op.nicparams:
2860
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2861
    if self.op.osparams:
2862
      self.cluster.osparams = self.new_osp
2863

    
2864
    if self.op.candidate_pool_size is not None:
2865
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2866
      # we need to update the pool size here, otherwise the save will fail
2867
      _AdjustCandidatePool(self, [])
2868

    
2869
    if self.op.maintain_node_health is not None:
2870
      self.cluster.maintain_node_health = self.op.maintain_node_health
2871

    
2872
    if self.op.add_uids is not None:
2873
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2874

    
2875
    if self.op.remove_uids is not None:
2876
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2877

    
2878
    if self.op.uid_pool is not None:
2879
      self.cluster.uid_pool = self.op.uid_pool
2880

    
2881
    if self.op.default_iallocator is not None:
2882
      self.cluster.default_iallocator = self.op.default_iallocator
2883

    
2884
    if self.op.reserved_lvs is not None:
2885
      self.cluster.reserved_lvs = self.op.reserved_lvs
2886

    
2887
    self.cfg.Update(self.cluster, feedback_fn)
2888

    
2889

    
2890
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2891
  """Distribute additional files which are part of the cluster configuration.
2892

2893
  ConfigWriter takes care of distributing the config and ssconf files, but
2894
  there are more files which should be distributed to all nodes. This function
2895
  makes sure those are copied.
2896

2897
  @param lu: calling logical unit
2898
  @param additional_nodes: list of nodes not in the config to distribute to
2899

2900
  """
2901
  # 1. Gather target nodes
2902
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2903
  dist_nodes = lu.cfg.GetOnlineNodeList()
2904
  if additional_nodes is not None:
2905
    dist_nodes.extend(additional_nodes)
2906
  if myself.name in dist_nodes:
2907
    dist_nodes.remove(myself.name)
2908

    
2909
  # 2. Gather files to distribute
2910
  dist_files = set([constants.ETC_HOSTS,
2911
                    constants.SSH_KNOWN_HOSTS_FILE,
2912
                    constants.RAPI_CERT_FILE,
2913
                    constants.RAPI_USERS_FILE,
2914
                    constants.CONFD_HMAC_KEY,
2915
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2916
                   ])
2917

    
2918
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2919
  for hv_name in enabled_hypervisors:
2920
    hv_class = hypervisor.GetHypervisor(hv_name)
2921
    dist_files.update(hv_class.GetAncillaryFiles())
2922

    
2923
  # 3. Perform the files upload
2924
  for fname in dist_files:
2925
    if os.path.exists(fname):
2926
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2927
      for to_node, to_result in result.items():
2928
        msg = to_result.fail_msg
2929
        if msg:
2930
          msg = ("Copy of file %s to node %s failed: %s" %
2931
                 (fname, to_node, msg))
2932
          lu.proc.LogWarning(msg)
2933

    
2934

    
2935
class LURedistributeConfig(NoHooksLU):
2936
  """Force the redistribution of cluster configuration.
2937

2938
  This is a very simple LU.
2939

2940
  """
2941
  REQ_BGL = False
2942

    
2943
  def ExpandNames(self):
2944
    self.needed_locks = {
2945
      locking.LEVEL_NODE: locking.ALL_SET,
2946
    }
2947
    self.share_locks[locking.LEVEL_NODE] = 1
2948

    
2949
  def Exec(self, feedback_fn):
2950
    """Redistribute the configuration.
2951

2952
    """
2953
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2954
    _RedistributeAncillaryFiles(self)
2955

    
2956

    
2957
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2958
  """Sleep and poll for an instance's disk to sync.
2959

2960
  """
2961
  if not instance.disks or disks is not None and not disks:
2962
    return True
2963

    
2964
  disks = _ExpandCheckDisks(instance, disks)
2965

    
2966
  if not oneshot:
2967
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2968

    
2969
  node = instance.primary_node
2970

    
2971
  for dev in disks:
2972
    lu.cfg.SetDiskID(dev, node)
2973

    
2974
  # TODO: Convert to utils.Retry
2975

    
2976
  retries = 0
2977
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2978
  while True:
2979
    max_time = 0
2980
    done = True
2981
    cumul_degraded = False
2982
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2983
    msg = rstats.fail_msg
2984
    if msg:
2985
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2986
      retries += 1
2987
      if retries >= 10:
2988
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2989
                                 " aborting." % node)
2990
      time.sleep(6)
2991
      continue
2992
    rstats = rstats.payload
2993
    retries = 0
2994
    for i, mstat in enumerate(rstats):
2995
      if mstat is None:
2996
        lu.LogWarning("Can't compute data for node %s/%s",
2997
                           node, disks[i].iv_name)
2998
        continue
2999

    
3000
      cumul_degraded = (cumul_degraded or
3001
                        (mstat.is_degraded and mstat.sync_percent is None))
3002
      if mstat.sync_percent is not None:
3003
        done = False
3004
        if mstat.estimated_time is not None:
3005
          rem_time = ("%s remaining (estimated)" %
3006
                      utils.FormatSeconds(mstat.estimated_time))
3007
          max_time = mstat.estimated_time
3008
        else:
3009
          rem_time = "no time estimate"
3010
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3011
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3012

    
3013
    # if we're done but degraded, let's do a few small retries, to
3014
    # make sure we see a stable and not transient situation; therefore
3015
    # we force restart of the loop
3016
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3017
      logging.info("Degraded disks found, %d retries left", degr_retries)
3018
      degr_retries -= 1
3019
      time.sleep(1)
3020
      continue
3021

    
3022
    if done or oneshot:
3023
      break
3024

    
3025
    time.sleep(min(60, max_time))
3026

    
3027
  if done:
3028
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3029
  return not cumul_degraded
3030

    
3031

    
3032
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3033
  """Check that mirrors are not degraded.
3034

3035
  The ldisk parameter, if True, will change the test from the
3036
  is_degraded attribute (which represents overall non-ok status for
3037
  the device(s)) to the ldisk (representing the local storage status).
3038

3039
  """
3040
  lu.cfg.SetDiskID(dev, node)
3041

    
3042
  result = True
3043

    
3044
  if on_primary or dev.AssembleOnSecondary():
3045
    rstats = lu.rpc.call_blockdev_find(node, dev)
3046
    msg = rstats.fail_msg
3047
    if msg:
3048
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3049
      result = False
3050
    elif not rstats.payload:
3051
      lu.LogWarning("Can't find disk on node %s", node)
3052
      result = False
3053
    else:
3054
      if ldisk:
3055
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3056
      else:
3057
        result = result and not rstats.payload.is_degraded
3058

    
3059
  if dev.children:
3060
    for child in dev.children:
3061
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3062

    
3063
  return result
3064

    
3065

    
3066
class LUDiagnoseOS(NoHooksLU):
3067
  """Logical unit for OS diagnose/query.
3068

3069
  """
3070
  _OP_PARAMS = [
3071
    _POutputFields,
3072
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3073
    ]
3074
  REQ_BGL = False
3075
  _FIELDS_STATIC = utils.FieldSet()
3076
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
3077
                                   "parameters", "api_versions")
3078

    
3079
  def CheckArguments(self):
3080
    if self.op.names:
3081
      raise errors.OpPrereqError("Selective OS query not supported",
3082
                                 errors.ECODE_INVAL)
3083

    
3084
    _CheckOutputFields(static=self._FIELDS_STATIC,
3085
                       dynamic=self._FIELDS_DYNAMIC,
3086
                       selected=self.op.output_fields)
3087

    
3088
  def ExpandNames(self):
3089
    # Lock all nodes, in shared mode
3090
    # Temporary removal of locks, should be reverted later
3091
    # TODO: reintroduce locks when they are lighter-weight
3092
    self.needed_locks = {}
3093
    #self.share_locks[locking.LEVEL_NODE] = 1
3094
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3095

    
3096
  @staticmethod
3097
  def _DiagnoseByOS(rlist):
3098
    """Remaps a per-node return list into an a per-os per-node dictionary
3099

3100
    @param rlist: a map with node names as keys and OS objects as values
3101

3102
    @rtype: dict
3103
    @return: a dictionary with osnames as keys and as value another
3104
        map, with nodes as keys and tuples of (path, status, diagnose,
3105
        variants, parameters, api_versions) as values, eg::
3106

3107
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3108
                                     (/srv/..., False, "invalid api")],
3109
                           "node2": [(/srv/..., True, "", [], [])]}
3110
          }
3111

3112
    """
3113
    all_os = {}
3114
    # we build here the list of nodes that didn't fail the RPC (at RPC
3115
    # level), so that nodes with a non-responding node daemon don't
3116
    # make all OSes invalid
3117
    good_nodes = [node_name for node_name in rlist
3118
                  if not rlist[node_name].fail_msg]
3119
    for node_name, nr in rlist.items():
3120
      if nr.fail_msg or not nr.payload:
3121
        continue
3122
      for (name, path, status, diagnose, variants,
3123
           params, api_versions) in nr.payload:
3124
        if name not in all_os:
3125
          # build a list of nodes for this os containing empty lists
3126
          # for each node in node_list
3127
          all_os[name] = {}
3128
          for nname in good_nodes:
3129
            all_os[name][nname] = []
3130
        # convert params from [name, help] to (name, help)
3131
        params = [tuple(v) for v in params]
3132
        all_os[name][node_name].append((path, status, diagnose,
3133
                                        variants, params, api_versions))
3134
    return all_os
3135

    
3136
  def Exec(self, feedback_fn):
3137
    """Compute the list of OSes.
3138

3139
    """
3140
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3141
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3142
    pol = self._DiagnoseByOS(node_data)
3143
    output = []
3144

    
3145
    for os_name, os_data in pol.items():
3146
      row = []
3147
      valid = True
3148
      (variants, params, api_versions) = null_state = (set(), set(), set())
3149
      for idx, osl in enumerate(os_data.values()):
3150
        valid = bool(valid and osl and osl[0][1])
3151
        if not valid:
3152
          (variants, params, api_versions) = null_state
3153
          break
3154
        node_variants, node_params, node_api = osl[0][3:6]
3155
        if idx == 0: # first entry
3156
          variants = set(node_variants)
3157
          params = set(node_params)
3158
          api_versions = set(node_api)
3159
        else: # keep consistency
3160
          variants.intersection_update(node_variants)
3161
          params.intersection_update(node_params)
3162
          api_versions.intersection_update(node_api)
3163

    
3164
      for field in self.op.output_fields:
3165
        if field == "name":
3166
          val = os_name
3167
        elif field == "valid":
3168
          val = valid
3169
        elif field == "node_status":
3170
          # this is just a copy of the dict
3171
          val = {}
3172
          for node_name, nos_list in os_data.items():
3173
            val[node_name] = nos_list
3174
        elif field == "variants":
3175
          val = list(variants)
3176
        elif field == "parameters":
3177
          val = list(params)
3178
        elif field == "api_versions":
3179
          val = list(api_versions)
3180
        else:
3181
          raise errors.ParameterError(field)
3182
        row.append(val)
3183
      output.append(row)
3184

    
3185
    return output
3186

    
3187

    
3188
class LURemoveNode(LogicalUnit):
3189
  """Logical unit for removing a node.
3190

3191
  """
3192
  HPATH = "node-remove"
3193
  HTYPE = constants.HTYPE_NODE
3194
  _OP_PARAMS = [
3195
    _PNodeName,
3196
    ]
3197

    
3198
  def BuildHooksEnv(self):
3199
    """Build hooks env.
3200

3201
    This doesn't run on the target node in the pre phase as a failed
3202
    node would then be impossible to remove.
3203

3204
    """
3205
    env = {
3206
      "OP_TARGET": self.op.node_name,
3207
      "NODE_NAME": self.op.node_name,
3208
      }
3209
    all_nodes = self.cfg.GetNodeList()
3210
    try:
3211
      all_nodes.remove(self.op.node_name)
3212
    except ValueError:
3213
      logging.warning("Node %s which is about to be removed not found"
3214
                      " in the all nodes list", self.op.node_name)
3215
    return env, all_nodes, all_nodes
3216

    
3217
  def CheckPrereq(self):
3218
    """Check prerequisites.
3219

3220
    This checks:
3221
     - the node exists in the configuration
3222
     - it does not have primary or secondary instances
3223
     - it's not the master
3224

3225
    Any errors are signaled by raising errors.OpPrereqError.
3226

3227
    """
3228
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3229
    node = self.cfg.GetNodeInfo(self.op.node_name)
3230
    assert node is not None
3231

    
3232
    instance_list = self.cfg.GetInstanceList()
3233

    
3234
    masternode = self.cfg.GetMasterNode()
3235
    if node.name == masternode:
3236
      raise errors.OpPrereqError("Node is the master node,"
3237
                                 " you need to failover first.",
3238
                                 errors.ECODE_INVAL)
3239

    
3240
    for instance_name in instance_list:
3241
      instance = self.cfg.GetInstanceInfo(instance_name)
3242
      if node.name in instance.all_nodes:
3243
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3244
                                   " please remove first." % instance_name,
3245
                                   errors.ECODE_INVAL)
3246
    self.op.node_name = node.name
3247
    self.node = node
3248

    
3249
  def Exec(self, feedback_fn):
3250
    """Removes the node from the cluster.
3251

3252
    """
3253
    node = self.node
3254
    logging.info("Stopping the node daemon and removing configs from node %s",
3255
                 node.name)
3256

    
3257
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3258

    
3259
    # Promote nodes to master candidate as needed
3260
    _AdjustCandidatePool(self, exceptions=[node.name])
3261
    self.context.RemoveNode(node.name)
3262

    
3263
    # Run post hooks on the node before it's removed
3264
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3265
    try:
3266
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3267
    except:
3268
      # pylint: disable-msg=W0702
3269
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3270

    
3271
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3272
    msg = result.fail_msg
3273
    if msg:
3274
      self.LogWarning("Errors encountered on the remote node while leaving"
3275
                      " the cluster: %s", msg)
3276

    
3277
    # Remove node from our /etc/hosts
3278
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3279
      # FIXME: this should be done via an rpc call to node daemon
3280
      utils.RemoveHostFromEtcHosts(node.name)
3281
      _RedistributeAncillaryFiles(self)
3282

    
3283

    
3284
class LUQueryNodes(NoHooksLU):
3285
  """Logical unit for querying nodes.
3286

3287
  """
3288
  # pylint: disable-msg=W0142
3289
  _OP_PARAMS = [
3290
    _POutputFields,
3291
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3292
    ("use_locking", False, _TBool),
3293
    ]
3294
  REQ_BGL = False
3295

    
3296
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3297
                    "master_candidate", "offline", "drained"]
3298

    
3299
  _FIELDS_DYNAMIC = utils.FieldSet(
3300
    "dtotal", "dfree",
3301
    "mtotal", "mnode", "mfree",
3302
    "bootid",
3303
    "ctotal", "cnodes", "csockets",
3304
    )
3305

    
3306
  _FIELDS_STATIC = utils.FieldSet(*[
3307
    "pinst_cnt", "sinst_cnt",
3308
    "pinst_list", "sinst_list",
3309
    "pip", "sip", "tags",
3310
    "master",
3311
    "role"] + _SIMPLE_FIELDS
3312
    )
3313

    
3314
  def CheckArguments(self):
3315
    _CheckOutputFields(static=self._FIELDS_STATIC,
3316
                       dynamic=self._FIELDS_DYNAMIC,
3317
                       selected=self.op.output_fields)
3318

    
3319
  def ExpandNames(self):
3320
    self.needed_locks = {}
3321
    self.share_locks[locking.LEVEL_NODE] = 1
3322

    
3323
    if self.op.names:
3324
      self.wanted = _GetWantedNodes(self, self.op.names)
3325
    else:
3326
      self.wanted = locking.ALL_SET
3327

    
3328
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3329
    self.do_locking = self.do_node_query and self.op.use_locking
3330
    if self.do_locking:
3331
      # if we don't request only static fields, we need to lock the nodes
3332
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
3333

    
3334
  def Exec(self, feedback_fn):
3335
    """Computes the list of nodes and their attributes.
3336

3337
    """
3338
    all_info = self.cfg.GetAllNodesInfo()
3339
    if self.do_locking:
3340
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
3341
    elif self.wanted != locking.ALL_SET:
3342
      nodenames = self.wanted
3343
      missing = set(nodenames).difference(all_info.keys())
3344
      if missing:
3345
        raise errors.OpExecError(
3346
          "Some nodes were removed before retrieving their data: %s" % missing)
3347
    else:
3348
      nodenames = all_info.keys()
3349

    
3350
    nodenames = utils.NiceSort(nodenames)
3351
    nodelist = [all_info[name] for name in nodenames]
3352

    
3353
    # begin data gathering
3354

    
3355
    if self.do_node_query:
3356
      live_data = {}
3357
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3358
                                          self.cfg.GetHypervisorType())
3359
      for name in nodenames:
3360
        nodeinfo = node_data[name]
3361
        if not nodeinfo.fail_msg and nodeinfo.payload:
3362
          nodeinfo = nodeinfo.payload
3363
          fn = utils.TryConvert
3364
          live_data[name] = {
3365
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3366
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3367
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
3368
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3369
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3370
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3371
            "bootid": nodeinfo.get('bootid', None),
3372
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3373
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3374
            }
3375
        else:
3376
          live_data[name] = {}
3377
    else:
3378
      live_data = dict.fromkeys(nodenames, {})
3379

    
3380
    node_to_primary = dict([(name, set()) for name in nodenames])
3381
    node_to_secondary = dict([(name, set()) for name in nodenames])
3382

    
3383
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3384
                             "sinst_cnt", "sinst_list"))
3385
    if inst_fields & frozenset(self.op.output_fields):
3386
      inst_data = self.cfg.GetAllInstancesInfo()
3387

    
3388
      for inst in inst_data.values():
3389
        if inst.primary_node in node_to_primary:
3390
          node_to_primary[inst.primary_node].add(inst.name)
3391
        for secnode in inst.secondary_nodes:
3392
          if secnode in node_to_secondary:
3393
            node_to_secondary[secnode].add(inst.name)
3394

    
3395
    master_node = self.cfg.GetMasterNode()
3396

    
3397
    # end data gathering
3398

    
3399
    output = []
3400
    for node in nodelist:
3401
      node_output = []
3402
      for field in self.op.output_fields:
3403
        if field in self._SIMPLE_FIELDS:
3404
          val = getattr(node, field)
3405
        elif field == "pinst_list":
3406
          val = list(node_to_primary[node.name])
3407
        elif field == "sinst_list":
3408
          val = list(node_to_secondary[node.name])
3409
        elif field == "pinst_cnt":
3410
          val = len(node_to_primary[node.name])
3411
        elif field == "sinst_cnt":
3412
          val = len(node_to_secondary[node.name])
3413
        elif field == "pip":
3414
          val = node.primary_ip
3415
        elif field == "sip":
3416
          val = node.secondary_ip
3417
        elif field == "tags":
3418
          val = list(node.GetTags())
3419
        elif field == "master":
3420
          val = node.name == master_node
3421
        elif self._FIELDS_DYNAMIC.Matches(field):
3422
          val = live_data[node.name].get(field, None)
3423
        elif field == "role":
3424
          if node.name == master_node:
3425
            val = "M"
3426
          elif node.master_candidate:
3427
            val = "C"
3428
          elif node.drained:
3429
            val = "D"
3430
          elif node.offline:
3431
            val = "O"
3432
          else:
3433
            val = "R"
3434
        else:
3435
          raise errors.ParameterError(field)
3436
        node_output.append(val)
3437
      output.append(node_output)
3438

    
3439
    return output
3440

    
3441

    
3442
class LUQueryNodeVolumes(NoHooksLU):
3443
  """Logical unit for getting volumes on node(s).
3444

3445
  """
3446
  _OP_PARAMS = [
3447
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3448
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3449
    ]
3450
  REQ_BGL = False
3451
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3452
  _FIELDS_STATIC = utils.FieldSet("node")
3453

    
3454
  def CheckArguments(self):
3455
    _CheckOutputFields(static=self._FIELDS_STATIC,
3456
                       dynamic=self._FIELDS_DYNAMIC,
3457
                       selected=self.op.output_fields)
3458

    
3459
  def ExpandNames(self):
3460
    self.needed_locks = {}
3461
    self.share_locks[locking.LEVEL_NODE] = 1
3462
    if not self.op.nodes:
3463
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3464
    else:
3465
      self.needed_locks[locking.LEVEL_NODE] = \
3466
        _GetWantedNodes(self, self.op.nodes)
3467

    
3468
  def Exec(self, feedback_fn):
3469
    """Computes the list of nodes and their attributes.
3470

3471
    """
3472
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3473
    volumes = self.rpc.call_node_volumes(nodenames)
3474

    
3475
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3476
             in self.cfg.GetInstanceList()]
3477

    
3478
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3479

    
3480
    output = []
3481
    for node in nodenames:
3482
      nresult = volumes[node]
3483
      if nresult.offline:
3484
        continue
3485
      msg = nresult.fail_msg
3486
      if msg:
3487
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3488
        continue
3489

    
3490
      node_vols = nresult.payload[:]
3491
      node_vols.sort(key=lambda vol: vol['dev'])
3492

    
3493
      for vol in node_vols:
3494
        node_output = []
3495
        for field in self.op.output_fields:
3496
          if field == "node":
3497
            val = node
3498
          elif field == "phys":
3499
            val = vol['dev']
3500
          elif field == "vg":
3501
            val = vol['vg']
3502
          elif field == "name":
3503
            val = vol['name']
3504
          elif field == "size":
3505
            val = int(float(vol['size']))
3506
          elif field == "instance":
3507
            for inst in ilist:
3508
              if node not in lv_by_node[inst]:
3509
                continue
3510
              if vol['name'] in lv_by_node[inst][node]:
3511
                val = inst.name
3512
                break
3513
            else:
3514
              val = '-'
3515
          else:
3516
            raise errors.ParameterError(field)
3517
          node_output.append(str(val))
3518

    
3519
        output.append(node_output)
3520

    
3521
    return output
3522

    
3523

    
3524
class LUQueryNodeStorage(NoHooksLU):
3525
  """Logical unit for getting information on storage units on node(s).
3526

3527
  """
3528
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3529
  _OP_PARAMS = [
3530
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3531
    ("storage_type", _NoDefault, _CheckStorageType),
3532
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3533
    ("name", None, _TMaybeString),
3534
    ]
3535
  REQ_BGL = False
3536

    
3537
  def CheckArguments(self):
3538
    _CheckOutputFields(static=self._FIELDS_STATIC,
3539
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3540
                       selected=self.op.output_fields)
3541

    
3542
  def ExpandNames(self):
3543
    self.needed_locks = {}
3544
    self.share_locks[locking.LEVEL_NODE] = 1
3545

    
3546
    if self.op.nodes:
3547
      self.needed_locks[locking.LEVEL_NODE] = \
3548
        _GetWantedNodes(self, self.op.nodes)
3549
    else:
3550
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3551

    
3552
  def Exec(self, feedback_fn):
3553
    """Computes the list of nodes and their attributes.
3554

3555
    """
3556
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3557

    
3558
    # Always get name to sort by
3559
    if constants.SF_NAME in self.op.output_fields:
3560
      fields = self.op.output_fields[:]
3561
    else:
3562
      fields = [constants.SF_NAME] + self.op.output_fields
3563

    
3564
    # Never ask for node or type as it's only known to the LU
3565
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3566
      while extra in fields:
3567
        fields.remove(extra)
3568

    
3569
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3570
    name_idx = field_idx[constants.SF_NAME]
3571

    
3572
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3573
    data = self.rpc.call_storage_list(self.nodes,
3574
                                      self.op.storage_type, st_args,
3575
                                      self.op.name, fields)
3576

    
3577
    result = []
3578

    
3579
    for node in utils.NiceSort(self.nodes):
3580
      nresult = data[node]
3581
      if nresult.offline:
3582
        continue
3583

    
3584
      msg = nresult.fail_msg
3585
      if msg:
3586
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3587
        continue
3588

    
3589
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3590

    
3591
      for name in utils.NiceSort(rows.keys()):
3592
        row = rows[name]
3593

    
3594
        out = []
3595

    
3596
        for field in self.op.output_fields:
3597
          if field == constants.SF_NODE:
3598
            val = node
3599
          elif field == constants.SF_TYPE:
3600
            val = self.op.storage_type
3601
          elif field in field_idx:
3602
            val = row[field_idx[field]]
3603
          else:
3604
            raise errors.ParameterError(field)
3605

    
3606
          out.append(val)
3607

    
3608
        result.append(out)
3609

    
3610
    return result
3611

    
3612

    
3613
class LUModifyNodeStorage(NoHooksLU):
3614
  """Logical unit for modifying a storage volume on a node.
3615

3616
  """
3617
  _OP_PARAMS = [
3618
    _PNodeName,
3619
    ("storage_type", _NoDefault, _CheckStorageType),
3620
    ("name", _NoDefault, _TNonEmptyString),
3621
    ("changes", _NoDefault, _TDict),
3622
    ]
3623
  REQ_BGL = False
3624

    
3625
  def CheckArguments(self):
3626
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3627

    
3628
    storage_type = self.op.storage_type
3629

    
3630
    try:
3631
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3632
    except KeyError:
3633
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3634
                                 " modified" % storage_type,
3635
                                 errors.ECODE_INVAL)
3636

    
3637
    diff = set(self.op.changes.keys()) - modifiable
3638
    if diff:
3639
      raise errors.OpPrereqError("The following fields can not be modified for"
3640
                                 " storage units of type '%s': %r" %
3641
                                 (storage_type, list(diff)),
3642
                                 errors.ECODE_INVAL)
3643

    
3644
  def ExpandNames(self):
3645
    self.needed_locks = {
3646
      locking.LEVEL_NODE: self.op.node_name,
3647
      }
3648

    
3649
  def Exec(self, feedback_fn):
3650
    """Computes the list of nodes and their attributes.
3651

3652
    """
3653
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3654
    result = self.rpc.call_storage_modify(self.op.node_name,
3655
                                          self.op.storage_type, st_args,
3656
                                          self.op.name, self.op.changes)
3657
    result.Raise("Failed to modify storage unit '%s' on %s" %
3658
                 (self.op.name, self.op.node_name))
3659

    
3660

    
3661
class LUAddNode(LogicalUnit):
3662
  """Logical unit for adding node to the cluster.
3663

3664
  """
3665
  HPATH = "node-add"
3666
  HTYPE = constants.HTYPE_NODE
3667
  _OP_PARAMS = [
3668
    _PNodeName,
3669
    ("primary_ip", None, _NoType),
3670
    ("secondary_ip", None, _TMaybeString),
3671
    ("readd", False, _TBool),
3672
    ]
3673

    
3674
  def CheckArguments(self):
3675
    # validate/normalize the node name
3676
    self.op.node_name = netutils.HostInfo.NormalizeName(self.op.node_name)
3677

    
3678
  def BuildHooksEnv(self):
3679
    """Build hooks env.
3680

3681
    This will run on all nodes before, and on all nodes + the new node after.
3682

3683
    """
3684
    env = {
3685
      "OP_TARGET": self.op.node_name,
3686
      "NODE_NAME": self.op.node_name,
3687
      "NODE_PIP": self.op.primary_ip,
3688
      "NODE_SIP": self.op.secondary_ip,
3689
      }
3690
    nodes_0 = self.cfg.GetNodeList()
3691
    nodes_1 = nodes_0 + [self.op.node_name, ]
3692
    return env, nodes_0, nodes_1
3693

    
3694
  def CheckPrereq(self):
3695
    """Check prerequisites.
3696

3697
    This checks:
3698
     - the new node is not already in the config
3699
     - it is resolvable
3700
     - its parameters (single/dual homed) matches the cluster
3701

3702
    Any errors are signaled by raising errors.OpPrereqError.
3703

3704
    """
3705
    node_name = self.op.node_name
3706
    cfg = self.cfg
3707

    
3708
    dns_data = netutils.GetHostInfo(node_name)
3709

    
3710
    node = dns_data.name
3711
    primary_ip = self.op.primary_ip = dns_data.ip
3712
    if self.op.secondary_ip is None:
3713
      self.op.secondary_ip = primary_ip
3714
    if not netutils.IsValidIP4(self.op.secondary_ip):
3715
      raise errors.OpPrereqError("Invalid secondary IP given",
3716
                                 errors.ECODE_INVAL)
3717
    secondary_ip = self.op.secondary_ip
3718

    
3719
    node_list = cfg.GetNodeList()
3720
    if not self.op.readd and node in node_list:
3721
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3722
                                 node, errors.ECODE_EXISTS)
3723
    elif self.op.readd and node not in node_list:
3724
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3725
                                 errors.ECODE_NOENT)
3726

    
3727
    self.changed_primary_ip = False
3728

    
3729
    for existing_node_name in node_list:
3730
      existing_node = cfg.GetNodeInfo(existing_node_name)
3731

    
3732
      if self.op.readd and node == existing_node_name:
3733
        if existing_node.secondary_ip != secondary_ip:
3734
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3735
                                     " address configuration as before",
3736
                                     errors.ECODE_INVAL)
3737
        if existing_node.primary_ip != primary_ip:
3738
          self.changed_primary_ip = True
3739

    
3740
        continue
3741

    
3742
      if (existing_node.primary_ip == primary_ip or
3743
          existing_node.secondary_ip == primary_ip or
3744
          existing_node.primary_ip == secondary_ip or
3745
          existing_node.secondary_ip == secondary_ip):
3746
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3747
                                   " existing node %s" % existing_node.name,
3748
                                   errors.ECODE_NOTUNIQUE)
3749

    
3750
    # check that the type of the node (single versus dual homed) is the
3751
    # same as for the master
3752
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3753
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3754
    newbie_singlehomed = secondary_ip == primary_ip
3755
    if master_singlehomed != newbie_singlehomed:
3756
      if master_singlehomed:
3757
        raise errors.OpPrereqError("The master has no private ip but the"
3758
                                   " new node has one",
3759
                                   errors.ECODE_INVAL)
3760
      else:
3761
        raise errors.OpPrereqError("The master has a private ip but the"
3762
                                   " new node doesn't have one",
3763
                                   errors.ECODE_INVAL)
3764

    
3765
    # checks reachability
3766
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3767
      raise errors.OpPrereqError("Node not reachable by ping",
3768
                                 errors.ECODE_ENVIRON)
3769

    
3770
    if not newbie_singlehomed:
3771
      # check reachability from my secondary ip to newbie's secondary ip
3772
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3773
                           source=myself.secondary_ip):
3774
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3775
                                   " based ping to noded port",
3776
                                   errors.ECODE_ENVIRON)
3777

    
3778
    if self.op.readd:
3779
      exceptions = [node]
3780
    else:
3781
      exceptions = []
3782

    
3783
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3784

    
3785
    if self.op.readd:
3786
      self.new_node = self.cfg.GetNodeInfo(node)
3787
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3788
    else:
3789
      self.new_node = objects.Node(name=node,
3790
                                   primary_ip=primary_ip,
3791
                                   secondary_ip=secondary_ip,
3792
                                   master_candidate=self.master_candidate,
3793
                                   offline=False, drained=False)
3794

    
3795
  def Exec(self, feedback_fn):
3796
    """Adds the new node to the cluster.
3797

3798
    """
3799
    new_node = self.new_node
3800
    node = new_node.name
3801

    
3802
    # for re-adds, reset the offline/drained/master-candidate flags;
3803
    # we need to reset here, otherwise offline would prevent RPC calls
3804
    # later in the procedure; this also means that if the re-add
3805
    # fails, we are left with a non-offlined, broken node
3806
    if self.op.readd:
3807
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3808
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3809
      # if we demote the node, we do cleanup later in the procedure
3810
      new_node.master_candidate = self.master_candidate
3811
      if self.changed_primary_ip:
3812
        new_node.primary_ip = self.op.primary_ip
3813

    
3814
    # notify the user about any possible mc promotion
3815
    if new_node.master_candidate:
3816
      self.LogInfo("Node will be a master candidate")
3817

    
3818
    # check connectivity
3819
    result = self.rpc.call_version([node])[node]
3820
    result.Raise("Can't get version information from node %s" % node)
3821
    if constants.PROTOCOL_VERSION == result.payload:
3822
      logging.info("Communication to node %s fine, sw version %s match",
3823
                   node, result.payload)
3824
    else:
3825
      raise errors.OpExecError("Version mismatch master version %s,"
3826
                               " node version %s" %
3827
                               (constants.PROTOCOL_VERSION, result.payload))
3828

    
3829
    # setup ssh on node
3830
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3831
      logging.info("Copy ssh key to node %s", node)
3832
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3833
      keyarray = []
3834
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3835
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3836
                  priv_key, pub_key]
3837

    
3838
      for i in keyfiles:
3839
        keyarray.append(utils.ReadFile(i))
3840

    
3841
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3842
                                      keyarray[2], keyarray[3], keyarray[4],
3843
                                      keyarray[5])
3844
      result.Raise("Cannot transfer ssh keys to the new node")
3845

    
3846
    # Add node to our /etc/hosts, and add key to known_hosts
3847
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3848
      # FIXME: this should be done via an rpc call to node daemon
3849
      utils.AddHostToEtcHosts(new_node.name)
3850

    
3851
    if new_node.secondary_ip != new_node.primary_ip:
3852
      result = self.rpc.call_node_has_ip_address(new_node.name,
3853
                                                 new_node.secondary_ip)
3854
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3855
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3856
      if not result.payload:
3857
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3858
                                 " you gave (%s). Please fix and re-run this"
3859
                                 " command." % new_node.secondary_ip)
3860

    
3861
    node_verify_list = [self.cfg.GetMasterNode()]
3862
    node_verify_param = {
3863
      constants.NV_NODELIST: [node],
3864
      # TODO: do a node-net-test as well?
3865
    }
3866

    
3867
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3868
                                       self.cfg.GetClusterName())
3869
    for verifier in node_verify_list:
3870
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3871
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3872
      if nl_payload:
3873
        for failed in nl_payload:
3874
          feedback_fn("ssh/hostname verification failed"
3875
                      " (checking from %s): %s" %
3876
                      (verifier, nl_payload[failed]))
3877
        raise errors.OpExecError("ssh/hostname verification failed.")
3878

    
3879
    if self.op.readd:
3880
      _RedistributeAncillaryFiles(self)
3881
      self.context.ReaddNode(new_node)
3882
      # make sure we redistribute the config
3883
      self.cfg.Update(new_node, feedback_fn)
3884
      # and make sure the new node will not have old files around
3885
      if not new_node.master_candidate:
3886
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3887
        msg = result.fail_msg
3888
        if msg:
3889
          self.LogWarning("Node failed to demote itself from master"
3890
                          " candidate status: %s" % msg)
3891
    else:
3892
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3893
      self.context.AddNode(new_node, self.proc.GetECId())
3894

    
3895

    
3896
class LUSetNodeParams(LogicalUnit):
3897
  """Modifies the parameters of a node.
3898

3899
  """
3900
  HPATH = "node-modify"
3901
  HTYPE = constants.HTYPE_NODE
3902
  _OP_PARAMS = [
3903
    _PNodeName,
3904
    ("master_candidate", None, _TMaybeBool),
3905
    ("offline", None, _TMaybeBool),
3906
    ("drained", None, _TMaybeBool),
3907
    ("auto_promote", False, _TBool),
3908
    _PForce,
3909
    ]
3910
  REQ_BGL = False
3911

    
3912
  def CheckArguments(self):
3913
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3914
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3915
    if all_mods.count(None) == 3:
3916
      raise errors.OpPrereqError("Please pass at least one modification",
3917
                                 errors.ECODE_INVAL)
3918
    if all_mods.count(True) > 1:
3919
      raise errors.OpPrereqError("Can't set the node into more than one"
3920
                                 " state at the same time",
3921
                                 errors.ECODE_INVAL)
3922

    
3923
    # Boolean value that tells us whether we're offlining or draining the node
3924
    self.offline_or_drain = (self.op.offline == True or
3925
                             self.op.drained == True)
3926
    self.deoffline_or_drain = (self.op.offline == False or
3927
                               self.op.drained == False)
3928
    self.might_demote = (self.op.master_candidate == False or
3929
                         self.offline_or_drain)
3930

    
3931
    self.lock_all = self.op.auto_promote and self.might_demote
3932

    
3933

    
3934
  def ExpandNames(self):
3935
    if self.lock_all:
3936
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3937
    else:
3938
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3939

    
3940
  def BuildHooksEnv(self):
3941
    """Build hooks env.
3942

3943
    This runs on the master node.
3944

3945
    """
3946
    env = {
3947
      "OP_TARGET": self.op.node_name,
3948
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3949
      "OFFLINE": str(self.op.offline),
3950
      "DRAINED": str(self.op.drained),
3951
      }
3952
    nl = [self.cfg.GetMasterNode(),
3953
          self.op.node_name]
3954
    return env, nl, nl
3955

    
3956
  def CheckPrereq(self):
3957
    """Check prerequisites.
3958

3959
    This only checks the instance list against the existing names.
3960

3961
    """
3962
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3963

    
3964
    if (self.op.master_candidate is not None or
3965
        self.op.drained is not None or
3966
        self.op.offline is not None):
3967
      # we can't change the master's node flags
3968
      if self.op.node_name == self.cfg.GetMasterNode():
3969
        raise errors.OpPrereqError("The master role can be changed"
3970
                                   " only via master-failover",
3971
                                   errors.ECODE_INVAL)
3972

    
3973

    
3974
    if node.master_candidate and self.might_demote and not self.lock_all:
3975
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3976
      # check if after removing the current node, we're missing master
3977
      # candidates
3978
      (mc_remaining, mc_should, _) = \
3979
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3980
      if mc_remaining < mc_should:
3981
        raise errors.OpPrereqError("Not enough master candidates, please"
3982
                                   " pass auto_promote to allow promotion",
3983
                                   errors.ECODE_INVAL)
3984

    
3985
    if (self.op.master_candidate == True and
3986
        ((node.offline and not self.op.offline == False) or
3987
         (node.drained and not self.op.drained == False))):
3988
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3989
                                 " to master_candidate" % node.name,
3990
                                 errors.ECODE_INVAL)
3991

    
3992
    # If we're being deofflined/drained, we'll MC ourself if needed
3993
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3994
        self.op.master_candidate == True and not node.master_candidate):
3995
      self.op.master_candidate = _DecideSelfPromotion(self)
3996
      if self.op.master_candidate:
3997
        self.LogInfo("Autopromoting node to master candidate")
3998

    
3999
    return
4000

    
4001
  def Exec(self, feedback_fn):
4002
    """Modifies a node.
4003

4004
    """
4005
    node = self.node
4006

    
4007
    result = []
4008
    changed_mc = False
4009

    
4010
    if self.op.offline is not None:
4011
      node.offline = self.op.offline
4012
      result.append(("offline", str(self.op.offline)))
4013
      if self.op.offline == True:
4014
        if node.master_candidate:
4015
          node.master_candidate = False
4016
          changed_mc = True
4017
          result.append(("master_candidate", "auto-demotion due to offline"))
4018
        if node.drained:
4019
          node.drained = False
4020
          result.append(("drained", "clear drained status due to offline"))
4021

    
4022
    if self.op.master_candidate is not None:
4023
      node.master_candidate = self.op.master_candidate
4024
      changed_mc = True
4025
      result.append(("master_candidate", str(self.op.master_candidate)))
4026
      if self.op.master_candidate == False:
4027
        rrc = self.rpc.call_node_demote_from_mc(node.name)
4028
        msg = rrc.fail_msg
4029
        if msg:
4030
          self.LogWarning("Node failed to demote itself: %s" % msg)
4031

    
4032
    if self.op.drained is not None:
4033
      node.drained = self.op.drained
4034
      result.append(("drained", str(self.op.drained)))
4035
      if self.op.drained == True:
4036
        if node.master_candidate:
4037
          node.master_candidate = False
4038
          changed_mc = True
4039
          result.append(("master_candidate", "auto-demotion due to drain"))
4040
          rrc = self.rpc.call_node_demote_from_mc(node.name)
4041
          msg = rrc.fail_msg
4042
          if msg:
4043
            self.LogWarning("Node failed to demote itself: %s" % msg)
4044
        if node.offline:
4045
          node.offline = False
4046
          result.append(("offline", "clear offline status due to drain"))
4047

    
4048
    # we locked all nodes, we adjust the CP before updating this node
4049
    if self.lock_all:
4050
      _AdjustCandidatePool(self, [node.name])
4051

    
4052
    # this will trigger configuration file update, if needed
4053
    self.cfg.Update(node, feedback_fn)
4054

    
4055
    # this will trigger job queue propagation or cleanup
4056
    if changed_mc:
4057
      self.context.ReaddNode(node)
4058

    
4059
    return result
4060

    
4061

    
4062
class LUPowercycleNode(NoHooksLU):
4063
  """Powercycles a node.
4064

4065
  """
4066
  _OP_PARAMS = [
4067
    _PNodeName,
4068
    _PForce,
4069
    ]
4070
  REQ_BGL = False
4071

    
4072
  def CheckArguments(self):
4073
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4074
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4075
      raise errors.OpPrereqError("The node is the master and the force"
4076
                                 " parameter was not set",
4077
                                 errors.ECODE_INVAL)
4078

    
4079
  def ExpandNames(self):
4080
    """Locking for PowercycleNode.
4081

4082
    This is a last-resort option and shouldn't block on other
4083
    jobs. Therefore, we grab no locks.
4084

4085
    """
4086
    self.needed_locks = {}
4087

    
4088
  def Exec(self, feedback_fn):
4089
    """Reboots a node.
4090

4091
    """
4092
    result = self.rpc.call_node_powercycle(self.op.node_name,
4093
                                           self.cfg.GetHypervisorType())
4094
    result.Raise("Failed to schedule the reboot")
4095
    return result.payload
4096

    
4097

    
4098
class LUQueryClusterInfo(NoHooksLU):
4099
  """Query cluster configuration.
4100

4101
  """
4102
  REQ_BGL = False
4103

    
4104
  def ExpandNames(self):
4105
    self.needed_locks = {}
4106

    
4107
  def Exec(self, feedback_fn):
4108
    """Return cluster config.
4109

4110
    """
4111
    cluster = self.cfg.GetClusterInfo()
4112
    os_hvp = {}
4113

    
4114
    # Filter just for enabled hypervisors
4115
    for os_name, hv_dict in cluster.os_hvp.items():
4116
      os_hvp[os_name] = {}
4117
      for hv_name, hv_params in hv_dict.items():
4118
        if hv_name in cluster.enabled_hypervisors:
4119
          os_hvp[os_name][hv_name] = hv_params
4120

    
4121
    result = {
4122
      "software_version": constants.RELEASE_VERSION,
4123
      "protocol_version": constants.PROTOCOL_VERSION,
4124
      "config_version": constants.CONFIG_VERSION,
4125
      "os_api_version": max(constants.OS_API_VERSIONS),
4126
      "export_version": constants.EXPORT_VERSION,
4127
      "architecture": (platform.architecture()[0], platform.machine()),
4128
      "name": cluster.cluster_name,
4129
      "master": cluster.master_node,
4130
      "default_hypervisor": cluster.enabled_hypervisors[0],
4131
      "enabled_hypervisors": cluster.enabled_hypervisors,
4132
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4133
                        for hypervisor_name in cluster.enabled_hypervisors]),
4134
      "os_hvp": os_hvp,
4135
      "beparams": cluster.beparams,
4136
      "osparams": cluster.osparams,
4137
      "nicparams": cluster.nicparams,
4138
      "candidate_pool_size": cluster.candidate_pool_size,
4139
      "master_netdev": cluster.master_netdev,
4140
      "volume_group_name": cluster.volume_group_name,
4141
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4142
      "file_storage_dir": cluster.file_storage_dir,
4143
      "maintain_node_health": cluster.maintain_node_health,
4144
      "ctime": cluster.ctime,
4145
      "mtime": cluster.mtime,
4146
      "uuid": cluster.uuid,
4147
      "tags": list(cluster.GetTags()),
4148
      "uid_pool": cluster.uid_pool,
4149
      "default_iallocator": cluster.default_iallocator,
4150
      "reserved_lvs": cluster.reserved_lvs,
4151
      }
4152

    
4153
    return result
4154

    
4155

    
4156
class LUQueryConfigValues(NoHooksLU):
4157
  """Return configuration values.
4158

4159
  """
4160
  _OP_PARAMS = [_POutputFields]
4161
  REQ_BGL = False
4162
  _FIELDS_DYNAMIC = utils.FieldSet()
4163
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4164
                                  "watcher_pause")
4165

    
4166
  def CheckArguments(self):
4167
    _CheckOutputFields(static=self._FIELDS_STATIC,
4168
                       dynamic=self._FIELDS_DYNAMIC,
4169
                       selected=self.op.output_fields)
4170

    
4171
  def ExpandNames(self):
4172
    self.needed_locks = {}
4173

    
4174
  def Exec(self, feedback_fn):
4175
    """Dump a representation of the cluster config to the standard output.
4176

4177
    """
4178
    values = []
4179
    for field in self.op.output_fields:
4180
      if field == "cluster_name":
4181
        entry = self.cfg.GetClusterName()
4182
      elif field == "master_node":
4183
        entry = self.cfg.GetMasterNode()
4184
      elif field == "drain_flag":
4185
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4186
      elif field == "watcher_pause":
4187
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4188
      else:
4189
        raise errors.ParameterError(field)
4190
      values.append(entry)
4191
    return values
4192

    
4193

    
4194
class LUActivateInstanceDisks(NoHooksLU):
4195
  """Bring up an instance's disks.
4196

4197
  """
4198
  _OP_PARAMS = [
4199
    _PInstanceName,
4200
    ("ignore_size", False, _TBool),
4201
    ]
4202
  REQ_BGL = False
4203

    
4204
  def ExpandNames(self):
4205
    self._ExpandAndLockInstance()
4206
    self.needed_locks[locking.LEVEL_NODE] = []
4207
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4208

    
4209
  def DeclareLocks(self, level):
4210
    if level == locking.LEVEL_NODE:
4211
      self._LockInstancesNodes()
4212

    
4213
  def CheckPrereq(self):
4214
    """Check prerequisites.
4215

4216
    This checks that the instance is in the cluster.
4217

4218
    """
4219
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4220
    assert self.instance is not None, \
4221
      "Cannot retrieve locked instance %s" % self.op.instance_name
4222
    _CheckNodeOnline(self, self.instance.primary_node)
4223

    
4224
  def Exec(self, feedback_fn):
4225
    """Activate the disks.
4226

4227
    """
4228
    disks_ok, disks_info = \
4229
              _AssembleInstanceDisks(self, self.instance,
4230
                                     ignore_size=self.op.ignore_size)
4231
    if not disks_ok:
4232
      raise errors.OpExecError("Cannot activate block devices")
4233

    
4234
    return disks_info
4235

    
4236

    
4237
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4238
                           ignore_size=False):
4239
  """Prepare the block devices for an instance.
4240

4241
  This sets up the block devices on all nodes.
4242

4243
  @type lu: L{LogicalUnit}
4244
  @param lu: the logical unit on whose behalf we execute
4245
  @type instance: L{objects.Instance}
4246
  @param instance: the instance for whose disks we assemble
4247
  @type disks: list of L{objects.Disk} or None
4248
  @param disks: which disks to assemble (or all, if None)
4249
  @type ignore_secondaries: boolean
4250
  @param ignore_secondaries: if true, errors on secondary nodes
4251
      won't result in an error return from the function
4252
  @type ignore_size: boolean
4253
  @param ignore_size: if true, the current known size of the disk
4254
      will not be used during the disk activation, useful for cases
4255
      when the size is wrong
4256
  @return: False if the operation failed, otherwise a list of
4257
      (host, instance_visible_name, node_visible_name)
4258
      with the mapping from node devices to instance devices
4259

4260
  """
4261
  device_info = []
4262
  disks_ok = True
4263
  iname = instance.name
4264
  disks = _ExpandCheckDisks(instance, disks)
4265

    
4266
  # With the two passes mechanism we try to reduce the window of
4267
  # opportunity for the race condition of switching DRBD to primary
4268
  # before handshaking occured, but we do not eliminate it
4269

    
4270
  # The proper fix would be to wait (with some limits) until the
4271
  # connection has been made and drbd transitions from WFConnection
4272
  # into any other network-connected state (Connected, SyncTarget,
4273
  # SyncSource, etc.)
4274

    
4275
  # 1st pass, assemble on all nodes in secondary mode
4276
  for inst_disk in disks:
4277
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4278
      if ignore_size:
4279
        node_disk = node_disk.Copy()
4280
        node_disk.UnsetSize()
4281
      lu.cfg.SetDiskID(node_disk, node)
4282
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4283
      msg = result.fail_msg
4284
      if msg:
4285
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4286
                           " (is_primary=False, pass=1): %s",
4287
                           inst_disk.iv_name, node, msg)
4288
        if not ignore_secondaries:
4289
          disks_ok = False
4290

    
4291
  # FIXME: race condition on drbd migration to primary
4292

    
4293
  # 2nd pass, do only the primary node
4294
  for inst_disk in disks:
4295
    dev_path = None
4296

    
4297
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4298
      if node != instance.primary_node:
4299
        continue
4300
      if ignore_size:
4301
        node_disk = node_disk.Copy()
4302
        node_disk.UnsetSize()
4303
      lu.cfg.SetDiskID(node_disk, node)
4304
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4305
      msg = result.fail_msg
4306
      if msg:
4307
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4308
                           " (is_primary=True, pass=2): %s",
4309
                           inst_disk.iv_name, node, msg)
4310
        disks_ok = False
4311
      else:
4312
        dev_path = result.payload
4313

    
4314
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4315

    
4316
  # leave the disks configured for the primary node
4317
  # this is a workaround that would be fixed better by
4318
  # improving the logical/physical id handling
4319
  for disk in disks:
4320
    lu.cfg.SetDiskID(disk, instance.primary_node)
4321

    
4322
  return disks_ok, device_info
4323

    
4324

    
4325
def _StartInstanceDisks(lu, instance, force):
4326
  """Start the disks of an instance.
4327

4328
  """
4329
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4330
                                           ignore_secondaries=force)
4331
  if not disks_ok:
4332
    _ShutdownInstanceDisks(lu, instance)
4333
    if force is not None and not force:
4334
      lu.proc.LogWarning("", hint="If the message above refers to a"
4335
                         " secondary node,"
4336
                         " you can retry the operation using '--force'.")
4337
    raise errors.OpExecError("Disk consistency error")
4338

    
4339

    
4340
class LUDeactivateInstanceDisks(NoHooksLU):
4341
  """Shutdown an instance's disks.
4342

4343
  """
4344
  _OP_PARAMS = [
4345
    _PInstanceName,
4346
    ]
4347
  REQ_BGL = False
4348

    
4349
  def ExpandNames(self):
4350
    self._ExpandAndLockInstance()
4351
    self.needed_locks[locking.LEVEL_NODE] = []
4352
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4353

    
4354
  def DeclareLocks(self, level):
4355
    if level == locking.LEVEL_NODE:
4356
      self._LockInstancesNodes()
4357

    
4358
  def CheckPrereq(self):
4359
    """Check prerequisites.
4360

4361
    This checks that the instance is in the cluster.
4362

4363
    """
4364
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4365
    assert self.instance is not None, \
4366
      "Cannot retrieve locked instance %s" % self.op.instance_name
4367

    
4368
  def Exec(self, feedback_fn):
4369
    """Deactivate the disks
4370

4371
    """
4372
    instance = self.instance
4373
    _SafeShutdownInstanceDisks(self, instance)
4374

    
4375

    
4376
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4377
  """Shutdown block devices of an instance.
4378

4379
  This function checks if an instance is running, before calling
4380
  _ShutdownInstanceDisks.
4381

4382
  """
4383
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4384
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4385

    
4386

    
4387
def _ExpandCheckDisks(instance, disks):
4388
  """Return the instance disks selected by the disks list
4389

4390
  @type disks: list of L{objects.Disk} or None
4391
  @param disks: selected disks
4392
  @rtype: list of L{objects.Disk}
4393
  @return: selected instance disks to act on
4394

4395
  """
4396
  if disks is None:
4397
    return instance.disks
4398
  else:
4399
    if not set(disks).issubset(instance.disks):
4400
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4401
                                   " target instance")
4402
    return disks
4403

    
4404

    
4405
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4406
  """Shutdown block devices of an instance.
4407

4408
  This does the shutdown on all nodes of the instance.
4409

4410
  If the ignore_primary is false, errors on the primary node are
4411
  ignored.
4412

4413
  """
4414
  all_result = True
4415
  disks = _ExpandCheckDisks(instance, disks)
4416

    
4417
  for disk in disks:
4418
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4419
      lu.cfg.SetDiskID(top_disk, node)
4420
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4421
      msg = result.fail_msg
4422
      if msg:
4423
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4424
                      disk.iv_name, node, msg)
4425
        if not ignore_primary or node != instance.primary_node:
4426
          all_result = False
4427
  return all_result
4428

    
4429

    
4430
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4431
  """Checks if a node has enough free memory.
4432

4433
  This function check if a given node has the needed amount of free
4434
  memory. In case the node has less memory or we cannot get the
4435
  information from the node, this function raise an OpPrereqError
4436
  exception.
4437

4438
  @type lu: C{LogicalUnit}
4439
  @param lu: a logical unit from which we get configuration data
4440
  @type node: C{str}
4441
  @param node: the node to check
4442
  @type reason: C{str}
4443
  @param reason: string to use in the error message
4444
  @type requested: C{int}
4445
  @param requested: the amount of memory in MiB to check for
4446
  @type hypervisor_name: C{str}
4447
  @param hypervisor_name: the hypervisor to ask for memory stats
4448
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4449
      we cannot check the node
4450

4451
  """
4452
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4453
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4454
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4455
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4456
  if not isinstance(free_mem, int):
4457
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4458
                               " was '%s'" % (node, free_mem),
4459
                               errors.ECODE_ENVIRON)
4460
  if requested > free_mem:
4461
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4462
                               " needed %s MiB, available %s MiB" %
4463
                               (node, reason, requested, free_mem),
4464
                               errors.ECODE_NORES)
4465

    
4466

    
4467
def _CheckNodesFreeDisk(lu, nodenames, requested):
4468
  """Checks if nodes have enough free disk space in the default VG.
4469

4470
  This function check if all given nodes have the needed amount of
4471
  free disk. In case any node has less disk or we cannot get the
4472
  information from the node, this function raise an OpPrereqError
4473
  exception.
4474

4475
  @type lu: C{LogicalUnit}
4476
  @param lu: a logical unit from which we get configuration data
4477
  @type nodenames: C{list}
4478
  @param nodenames: the list of node names to check
4479
  @type requested: C{int}
4480
  @param requested: the amount of disk in MiB to check for
4481
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4482
      we cannot check the node
4483

4484
  """
4485
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4486
                                   lu.cfg.GetHypervisorType())
4487
  for node in nodenames:
4488
    info = nodeinfo[node]
4489
    info.Raise("Cannot get current information from node %s" % node,
4490
               prereq=True, ecode=errors.ECODE_ENVIRON)
4491
    vg_free = info.payload.get("vg_free", None)
4492
    if not isinstance(vg_free, int):
4493
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4494
                                 " result was '%s'" % (node, vg_free),
4495
                                 errors.ECODE_ENVIRON)
4496
    if requested > vg_free:
4497
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4498
                                 " required %d MiB, available %d MiB" %
4499
                                 (node, requested, vg_free),
4500
                                 errors.ECODE_NORES)
4501

    
4502

    
4503
class LUStartupInstance(LogicalUnit):
4504
  """Starts an instance.
4505

4506
  """
4507
  HPATH = "instance-start"
4508
  HTYPE = constants.HTYPE_INSTANCE
4509
  _OP_PARAMS = [
4510
    _PInstanceName,
4511
    _PForce,
4512
    ("hvparams", _EmptyDict, _TDict),
4513
    ("beparams", _EmptyDict, _TDict),
4514
    ]
4515
  REQ_BGL = False
4516

    
4517
  def CheckArguments(self):
4518
    # extra beparams
4519
    if self.op.beparams:
4520
      # fill the beparams dict
4521
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4522

    
4523
  def ExpandNames(self):
4524
    self._ExpandAndLockInstance()
4525

    
4526
  def BuildHooksEnv(self):
4527
    """Build hooks env.
4528

4529
    This runs on master, primary and secondary nodes of the instance.
4530

4531
    """
4532
    env = {
4533
      "FORCE": self.op.force,
4534
      }
4535
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4536
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4537
    return env, nl, nl
4538

    
4539
  def CheckPrereq(self):
4540
    """Check prerequisites.
4541

4542
    This checks that the instance is in the cluster.
4543

4544
    """
4545
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4546
    assert self.instance is not None, \
4547
      "Cannot retrieve locked instance %s" % self.op.instance_name
4548

    
4549
    # extra hvparams
4550
    if self.op.hvparams:
4551
      # check hypervisor parameter syntax (locally)
4552
      cluster = self.cfg.GetClusterInfo()
4553
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4554
      filled_hvp = cluster.FillHV(instance)
4555
      filled_hvp.update(self.op.hvparams)
4556
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4557
      hv_type.CheckParameterSyntax(filled_hvp)
4558
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4559

    
4560
    _CheckNodeOnline(self, instance.primary_node)
4561

    
4562
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4563
    # check bridges existence
4564
    _CheckInstanceBridgesExist(self, instance)
4565

    
4566
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4567
                                              instance.name,
4568
                                              instance.hypervisor)
4569
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4570
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4571
    if not remote_info.payload: # not running already
4572
      _CheckNodeFreeMemory(self, instance.primary_node,
4573
                           "starting instance %s" % instance.name,
4574
                           bep[constants.BE_MEMORY], instance.hypervisor)
4575

    
4576
  def Exec(self, feedback_fn):
4577
    """Start the instance.
4578

4579
    """
4580
    instance = self.instance
4581
    force = self.op.force
4582

    
4583
    self.cfg.MarkInstanceUp(instance.name)
4584

    
4585
    node_current = instance.primary_node
4586

    
4587
    _StartInstanceDisks(self, instance, force)
4588

    
4589
    result = self.rpc.call_instance_start(node_current, instance,
4590
                                          self.op.hvparams, self.op.beparams)
4591
    msg = result.fail_msg
4592
    if msg:
4593
      _ShutdownInstanceDisks(self, instance)
4594
      raise errors.OpExecError("Could not start instance: %s" % msg)
4595

    
4596

    
4597
class LURebootInstance(LogicalUnit):
4598
  """Reboot an instance.
4599

4600
  """
4601
  HPATH = "instance-reboot"
4602
  HTYPE = constants.HTYPE_INSTANCE
4603
  _OP_PARAMS = [
4604
    _PInstanceName,
4605
    ("ignore_secondaries", False, _TBool),
4606
    ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4607
    _PShutdownTimeout,
4608
    ]
4609
  REQ_BGL = False
4610

    
4611
  def ExpandNames(self):
4612
    self._ExpandAndLockInstance()
4613

    
4614
  def BuildHooksEnv(self):
4615
    """Build hooks env.
4616

4617
    This runs on master, primary and secondary nodes of the instance.
4618

4619
    """
4620
    env = {
4621
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4622
      "REBOOT_TYPE": self.op.reboot_type,
4623
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4624
      }
4625
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4626
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4627
    return env, nl, nl
4628

    
4629
  def CheckPrereq(self):
4630
    """Check prerequisites.
4631

4632
    This checks that the instance is in the cluster.
4633

4634
    """
4635
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4636
    assert self.instance is not None, \
4637
      "Cannot retrieve locked instance %s" % self.op.instance_name
4638

    
4639
    _CheckNodeOnline(self, instance.primary_node)
4640

    
4641
    # check bridges existence
4642
    _CheckInstanceBridgesExist(self, instance)
4643

    
4644
  def Exec(self, feedback_fn):
4645
    """Reboot the instance.
4646

4647
    """
4648
    instance = self.instance
4649
    ignore_secondaries = self.op.ignore_secondaries
4650
    reboot_type = self.op.reboot_type
4651

    
4652
    node_current = instance.primary_node
4653

    
4654
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4655
                       constants.INSTANCE_REBOOT_HARD]:
4656
      for disk in instance.disks:
4657
        self.cfg.SetDiskID(disk, node_current)
4658
      result = self.rpc.call_instance_reboot(node_current, instance,
4659
                                             reboot_type,
4660
                                             self.op.shutdown_timeout)
4661
      result.Raise("Could not reboot instance")
4662
    else:
4663
      result = self.rpc.call_instance_shutdown(node_current, instance,
4664
                                               self.op.shutdown_timeout)
4665
      result.Raise("Could not shutdown instance for full reboot")
4666
      _ShutdownInstanceDisks(self, instance)
4667
      _StartInstanceDisks(self, instance, ignore_secondaries)
4668
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4669
      msg = result.fail_msg
4670
      if msg:
4671
        _ShutdownInstanceDisks(self, instance)
4672
        raise errors.OpExecError("Could not start instance for"
4673
                                 " full reboot: %s" % msg)
4674

    
4675
    self.cfg.MarkInstanceUp(instance.name)
4676

    
4677

    
4678
class LUShutdownInstance(LogicalUnit):
4679
  """Shutdown an instance.
4680

4681
  """
4682
  HPATH = "instance-stop"
4683
  HTYPE = constants.HTYPE_INSTANCE
4684
  _OP_PARAMS = [
4685
    _PInstanceName,
4686
    ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4687
    ]
4688
  REQ_BGL = False
4689

    
4690
  def ExpandNames(self):
4691
    self._ExpandAndLockInstance()
4692

    
4693
  def BuildHooksEnv(self):
4694
    """Build hooks env.
4695

4696
    This runs on master, primary and secondary nodes of the instance.
4697

4698
    """
4699
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4700
    env["TIMEOUT"] = self.op.timeout
4701
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4702
    return env, nl, nl
4703

    
4704
  def CheckPrereq(self):
4705
    """Check prerequisites.
4706

4707
    This checks that the instance is in the cluster.
4708

4709
    """
4710
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4711
    assert self.instance is not None, \
4712
      "Cannot retrieve locked instance %s" % self.op.instance_name
4713
    _CheckNodeOnline(self, self.instance.primary_node)
4714

    
4715
  def Exec(self, feedback_fn):
4716
    """Shutdown the instance.
4717

4718
    """
4719
    instance = self.instance
4720
    node_current = instance.primary_node
4721
    timeout = self.op.timeout
4722
    self.cfg.MarkInstanceDown(instance.name)
4723
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4724
    msg = result.fail_msg
4725
    if msg:
4726
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4727

    
4728
    _ShutdownInstanceDisks(self, instance)
4729

    
4730

    
4731
class LUReinstallInstance(LogicalUnit):
4732
  """Reinstall an instance.
4733

4734
  """
4735
  HPATH = "instance-reinstall"
4736
  HTYPE = constants.HTYPE_INSTANCE
4737
  _OP_PARAMS = [
4738
    _PInstanceName,
4739
    ("os_type", None, _TMaybeString),
4740
    ("force_variant", False, _TBool),
4741
    ]
4742
  REQ_BGL = False
4743

    
4744
  def ExpandNames(self):
4745
    self._ExpandAndLockInstance()
4746

    
4747
  def BuildHooksEnv(self):
4748
    """Build hooks env.
4749

4750
    This runs on master, primary and secondary nodes of the instance.
4751

4752
    """
4753
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4754
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4755
    return env, nl, nl
4756

    
4757
  def CheckPrereq(self):
4758
    """Check prerequisites.
4759

4760
    This checks that the instance is in the cluster and is not running.
4761

4762
    """
4763
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4764
    assert instance is not None, \
4765
      "Cannot retrieve locked instance %s" % self.op.instance_name
4766
    _CheckNodeOnline(self, instance.primary_node)
4767

    
4768
    if instance.disk_template == constants.DT_DISKLESS:
4769
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4770
                                 self.op.instance_name,
4771
                                 errors.ECODE_INVAL)
4772
    _CheckInstanceDown(self, instance, "cannot reinstall")
4773

    
4774
    if self.op.os_type is not None:
4775
      # OS verification
4776
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4777
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4778

    
4779
    self.instance = instance
4780

    
4781
  def Exec(self, feedback_fn):
4782
    """Reinstall the instance.
4783

4784
    """
4785
    inst = self.instance
4786

    
4787
    if self.op.os_type is not None:
4788
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4789
      inst.os = self.op.os_type
4790
      self.cfg.Update(inst, feedback_fn)
4791

    
4792
    _StartInstanceDisks(self, inst, None)
4793
    try:
4794
      feedback_fn("Running the instance OS create scripts...")
4795
      # FIXME: pass debug option from opcode to backend
4796
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4797
                                             self.op.debug_level)
4798
      result.Raise("Could not install OS for instance %s on node %s" %
4799
                   (inst.name, inst.primary_node))
4800
    finally:
4801
      _ShutdownInstanceDisks(self, inst)
4802

    
4803

    
4804
class LURecreateInstanceDisks(LogicalUnit):
4805
  """Recreate an instance's missing disks.
4806

4807
  """
4808
  HPATH = "instance-recreate-disks"
4809
  HTYPE = constants.HTYPE_INSTANCE
4810
  _OP_PARAMS = [
4811
    _PInstanceName,
4812
    ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4813
    ]
4814
  REQ_BGL = False
4815

    
4816
  def ExpandNames(self):
4817
    self._ExpandAndLockInstance()
4818

    
4819
  def BuildHooksEnv(self):
4820
    """Build hooks env.
4821

4822
    This runs on master, primary and secondary nodes of the instance.
4823

4824
    """
4825
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4826
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4827
    return env, nl, nl
4828

    
4829
  def CheckPrereq(self):
4830
    """Check prerequisites.
4831

4832
    This checks that the instance is in the cluster and is not running.
4833

4834
    """
4835
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4836
    assert instance is not None, \
4837
      "Cannot retrieve locked instance %s" % self.op.instance_name
4838
    _CheckNodeOnline(self, instance.primary_node)
4839

    
4840
    if instance.disk_template == constants.DT_DISKLESS:
4841
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4842
                                 self.op.instance_name, errors.ECODE_INVAL)
4843
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4844

    
4845
    if not self.op.disks:
4846
      self.op.disks = range(len(instance.disks))
4847
    else:
4848
      for idx in self.op.disks:
4849
        if idx >= len(instance.disks):
4850
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4851
                                     errors.ECODE_INVAL)
4852

    
4853
    self.instance = instance
4854

    
4855
  def Exec(self, feedback_fn):
4856
    """Recreate the disks.
4857

4858
    """
4859
    to_skip = []
4860
    for idx, _ in enumerate(self.instance.disks):
4861
      if idx not in self.op.disks: # disk idx has not been passed in
4862
        to_skip.append(idx)
4863
        continue
4864

    
4865
    _CreateDisks(self, self.instance, to_skip=to_skip)
4866

    
4867

    
4868
class LURenameInstance(LogicalUnit):
4869
  """Rename an instance.
4870

4871
  """
4872
  HPATH = "instance-rename"
4873
  HTYPE = constants.HTYPE_INSTANCE
4874
  _OP_PARAMS = [
4875
    _PInstanceName,
4876
    ("new_name", _NoDefault, _TNonEmptyString),
4877
    ("ip_check", False, _TBool),
4878
    ("name_check", True, _TBool),
4879
    ]
4880

    
4881
  def CheckArguments(self):
4882
    """Check arguments.
4883

4884
    """
4885
    if self.op.ip_check and not self.op.name_check:
4886
      # TODO: make the ip check more flexible and not depend on the name check
4887
      raise errors.OpPrereqError("Cannot do ip check without a name check",
4888
                                 errors.ECODE_INVAL)
4889

    
4890
  def BuildHooksEnv(self):
4891
    """Build hooks env.
4892

4893
    This runs on master, primary and secondary nodes of the instance.
4894

4895
    """
4896
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4897
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4898
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4899
    return env, nl, nl
4900

    
4901
  def CheckPrereq(self):
4902
    """Check prerequisites.
4903

4904
    This checks that the instance is in the cluster and is not running.
4905

4906
    """
4907
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4908
                                                self.op.instance_name)
4909
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4910
    assert instance is not None
4911
    _CheckNodeOnline(self, instance.primary_node)
4912
    _CheckInstanceDown(self, instance, "cannot rename")
4913
    self.instance = instance
4914

    
4915
    new_name = self.op.new_name
4916
    if self.op.name_check:
4917
      hostinfo = netutils.HostInfo(netutils.HostInfo.NormalizeName(new_name))
4918
      new_name = hostinfo.name
4919
      if (self.op.ip_check and
4920
          netutils.TcpPing(hostinfo.ip, constants.DEFAULT_NODED_PORT)):
4921
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4922
                                   (hostinfo.ip, new_name),
4923
                                   errors.ECODE_NOTUNIQUE)
4924

    
4925
    instance_list = self.cfg.GetInstanceList()
4926
    if new_name in instance_list:
4927
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4928
                                 new_name, errors.ECODE_EXISTS)
4929

    
4930

    
4931
  def Exec(self, feedback_fn):
4932
    """Reinstall the instance.
4933

4934
    """
4935
    inst = self.instance
4936
    old_name = inst.name
4937

    
4938
    if inst.disk_template == constants.DT_FILE:
4939
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4940

    
4941
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4942
    # Change the instance lock. This is definitely safe while we hold the BGL
4943
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4944
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4945

    
4946
    # re-read the instance from the configuration after rename
4947
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4948

    
4949
    if inst.disk_template == constants.DT_FILE:
4950
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4951
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4952
                                                     old_file_storage_dir,
4953
                                                     new_file_storage_dir)
4954
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4955
                   " (but the instance has been renamed in Ganeti)" %
4956
                   (inst.primary_node, old_file_storage_dir,
4957
                    new_file_storage_dir))
4958

    
4959
    _StartInstanceDisks(self, inst, None)
4960
    try:
4961
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4962
                                                 old_name, self.op.debug_level)
4963
      msg = result.fail_msg
4964
      if msg:
4965
        msg = ("Could not run OS rename script for instance %s on node %s"
4966
               " (but the instance has been renamed in Ganeti): %s" %
4967
               (inst.name, inst.primary_node, msg))
4968
        self.proc.LogWarning(msg)
4969
    finally:
4970
      _ShutdownInstanceDisks(self, inst)
4971

    
4972
    return inst.name
4973

    
4974

    
4975
class LURemoveInstance(LogicalUnit):
4976
  """Remove an instance.
4977

4978
  """
4979
  HPATH = "instance-remove"
4980
  HTYPE = constants.HTYPE_INSTANCE
4981
  _OP_PARAMS = [
4982
    _PInstanceName,
4983
    ("ignore_failures", False, _TBool),
4984
    _PShutdownTimeout,
4985
    ]
4986
  REQ_BGL = False
4987

    
4988
  def ExpandNames(self):
4989
    self._ExpandAndLockInstance()
4990
    self.needed_locks[locking.LEVEL_NODE] = []
4991
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4992

    
4993
  def DeclareLocks(self, level):
4994
    if level == locking.LEVEL_NODE:
4995
      self._LockInstancesNodes()
4996

    
4997
  def BuildHooksEnv(self):
4998
    """Build hooks env.
4999

5000
    This runs on master, primary and secondary nodes of the instance.
5001

5002
    """
5003
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5004
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5005
    nl = [self.cfg.GetMasterNode()]
5006
    nl_post = list(self.instance.all_nodes) + nl
5007
    return env, nl, nl_post
5008

    
5009
  def CheckPrereq(self):
5010
    """Check prerequisites.
5011

5012
    This checks that the instance is in the cluster.
5013

5014
    """
5015
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5016
    assert self.instance is not None, \
5017
      "Cannot retrieve locked instance %s" % self.op.instance_name
5018

    
5019
  def Exec(self, feedback_fn):
5020
    """Remove the instance.
5021

5022
    """
5023
    instance = self.instance
5024
    logging.info("Shutting down instance %s on node %s",
5025
                 instance.name, instance.primary_node)
5026

    
5027
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5028
                                             self.op.shutdown_timeout)
5029
    msg = result.fail_msg
5030
    if msg:
5031
      if self.op.ignore_failures:
5032
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5033
      else:
5034
        raise errors.OpExecError("Could not shutdown instance %s on"
5035
                                 " node %s: %s" %
5036
                                 (instance.name, instance.primary_node, msg))
5037

    
5038
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5039

    
5040

    
5041
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5042
  """Utility function to remove an instance.
5043

5044
  """
5045
  logging.info("Removing block devices for instance %s", instance.name)
5046

    
5047
  if not _RemoveDisks(lu, instance):
5048
    if not ignore_failures:
5049
      raise errors.OpExecError("Can't remove instance's disks")
5050
    feedback_fn("Warning: can't remove instance's disks")
5051

    
5052
  logging.info("Removing instance %s out of cluster config", instance.name)
5053

    
5054
  lu.cfg.RemoveInstance(instance.name)
5055

    
5056
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5057
    "Instance lock removal conflict"
5058

    
5059
  # Remove lock for the instance
5060
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5061

    
5062

    
5063
class LUQueryInstances(NoHooksLU):
5064
  """Logical unit for querying instances.
5065

5066
  """
5067
  # pylint: disable-msg=W0142
5068
  _OP_PARAMS = [
5069
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5070
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5071
    ("use_locking", False, _TBool),
5072
    ]
5073
  REQ_BGL = False
5074
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5075
                    "serial_no", "ctime", "mtime", "uuid"]
5076
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5077
                                    "admin_state",
5078
                                    "disk_template", "ip", "mac", "bridge",
5079
                                    "nic_mode", "nic_link",
5080
                                    "sda_size", "sdb_size", "vcpus", "tags",
5081
                                    "network_port", "beparams",
5082
                                    r"(disk)\.(size)/([0-9]+)",
5083
                                    r"(disk)\.(sizes)", "disk_usage",
5084
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5085
                                    r"(nic)\.(bridge)/([0-9]+)",
5086
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
5087
                                    r"(disk|nic)\.(count)",
5088
                                    "hvparams",
5089
                                    ] + _SIMPLE_FIELDS +
5090
                                  ["hv/%s" % name
5091
                                   for name in constants.HVS_PARAMETERS
5092
                                   if name not in constants.HVC_GLOBALS] +
5093
                                  ["be/%s" % name
5094
                                   for name in constants.BES_PARAMETERS])
5095
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5096
                                   "oper_ram",
5097
                                   "oper_vcpus",
5098
                                   "status")
5099

    
5100

    
5101
  def CheckArguments(self):
5102
    _CheckOutputFields(static=self._FIELDS_STATIC,
5103
                       dynamic=self._FIELDS_DYNAMIC,
5104
                       selected=self.op.output_fields)
5105

    
5106
  def ExpandNames(self):
5107
    self.needed_locks = {}
5108
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5109
    self.share_locks[locking.LEVEL_NODE] = 1
5110

    
5111
    if self.op.names:
5112
      self.wanted = _GetWantedInstances(self, self.op.names)
5113
    else:
5114
      self.wanted = locking.ALL_SET
5115

    
5116
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5117
    self.do_locking = self.do_node_query and self.op.use_locking
5118
    if self.do_locking:
5119
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5120
      self.needed_locks[locking.LEVEL_NODE] = []
5121
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5122

    
5123
  def DeclareLocks(self, level):
5124
    if level == locking.LEVEL_NODE and self.do_locking:
5125
      self._LockInstancesNodes()
5126

    
5127
  def Exec(self, feedback_fn):
5128
    """Computes the list of nodes and their attributes.
5129

5130
    """
5131
    # pylint: disable-msg=R0912
5132
    # way too many branches here
5133
    all_info = self.cfg.GetAllInstancesInfo()
5134
    if self.wanted == locking.ALL_SET:
5135
      # caller didn't specify instance names, so ordering is not important
5136
      if self.do_locking:
5137
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5138
      else:
5139
        instance_names = all_info.keys()
5140
      instance_names = utils.NiceSort(instance_names)
5141
    else:
5142
      # caller did specify names, so we must keep the ordering
5143
      if self.do_locking:
5144
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5145
      else:
5146
        tgt_set = all_info.keys()
5147
      missing = set(self.wanted).difference(tgt_set)
5148
      if missing:
5149
        raise errors.OpExecError("Some instances were removed before"
5150
                                 " retrieving their data: %s" % missing)
5151
      instance_names = self.wanted
5152

    
5153
    instance_list = [all_info[iname] for iname in instance_names]
5154

    
5155
    # begin data gathering
5156

    
5157
    nodes = frozenset([inst.primary_node for inst in instance_list])
5158
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
5159

    
5160
    bad_nodes = []
5161
    off_nodes = []
5162
    if self.do_node_query:
5163
      live_data = {}
5164
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5165
      for name in nodes:
5166
        result = node_data[name]
5167
        if result.offline:
5168
          # offline nodes will be in both lists
5169
          off_nodes.append(name)
5170
        if result.fail_msg:
5171
          bad_nodes.append(name)
5172
        else:
5173
          if result.payload:
5174
            live_data.update(result.payload)
5175
          # else no instance is alive
5176
    else:
5177
      live_data = dict([(name, {}) for name in instance_names])
5178

    
5179
    # end data gathering
5180

    
5181
    HVPREFIX = "hv/"
5182
    BEPREFIX = "be/"
5183
    output = []
5184
    cluster = self.cfg.GetClusterInfo()
5185
    for instance in instance_list:
5186
      iout = []
5187
      i_hv = cluster.FillHV(instance, skip_globals=True)
5188
      i_be = cluster.FillBE(instance)
5189
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5190
      for field in self.op.output_fields:
5191
        st_match = self._FIELDS_STATIC.Matches(field)
5192
        if field in self._SIMPLE_FIELDS:
5193
          val = getattr(instance, field)
5194
        elif field == "pnode":
5195
          val = instance.primary_node
5196
        elif field == "snodes":
5197
          val = list(instance.secondary_nodes)
5198
        elif field == "admin_state":
5199
          val = instance.admin_up
5200
        elif field == "oper_state":
5201
          if instance.primary_node in bad_nodes:
5202
            val = None
5203
          else:
5204
            val = bool(live_data.get(instance.name))
5205
        elif field == "status":
5206
          if instance.primary_node in off_nodes:
5207
            val = "ERROR_nodeoffline"
5208
          elif instance.primary_node in bad_nodes:
5209
            val = "ERROR_nodedown"
5210
          else:
5211
            running = bool(live_data.get(instance.name))
5212
            if running:
5213
              if instance.admin_up:
5214
                val = "running"
5215
              else:
5216
                val = "ERROR_up"
5217
            else:
5218
              if instance.admin_up:
5219
                val = "ERROR_down"
5220
              else:
5221
                val = "ADMIN_down"
5222
        elif field == "oper_ram":
5223
          if instance.primary_node in bad_nodes:
5224
            val = None
5225
          elif instance.name in live_data:
5226
            val = live_data[instance.name].get("memory", "?")
5227
          else:
5228
            val = "-"
5229
        elif field == "oper_vcpus":
5230
          if instance.primary_node in bad_nodes:
5231
            val = None
5232
          elif instance.name in live_data:
5233
            val = live_data[instance.name].get("vcpus", "?")
5234
          else:
5235
            val = "-"
5236
        elif field == "vcpus":
5237
          val = i_be[constants.BE_VCPUS]
5238
        elif field == "disk_template":
5239
          val = instance.disk_template
5240
        elif field == "ip":
5241
          if instance.nics:
5242
            val = instance.nics[0].ip
5243
          else:
5244
            val = None
5245
        elif field == "nic_mode":
5246
          if instance.nics:
5247
            val = i_nicp[0][constants.NIC_MODE]
5248
          else:
5249
            val = None
5250
        elif field == "nic_link":
5251
          if instance.nics:
5252
            val = i_nicp[0][constants.NIC_LINK]
5253
          else:
5254
            val = None
5255
        elif field == "bridge":
5256
          if (instance.nics and
5257
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5258
            val = i_nicp[0][constants.NIC_LINK]
5259
          else:
5260
            val = None
5261
        elif field == "mac":
5262
          if instance.nics:
5263
            val = instance.nics[0].mac
5264
          else:
5265
            val = None
5266
        elif field == "sda_size" or field == "sdb_size":
5267
          idx = ord(field[2]) - ord('a')
5268
          try:
5269
            val = instance.FindDisk(idx).size
5270
          except errors.OpPrereqError:
5271
            val = None
5272
        elif field == "disk_usage": # total disk usage per node
5273
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5274
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5275
        elif field == "tags":
5276
          val = list(instance.GetTags())
5277
        elif field == "hvparams":
5278
          val = i_hv
5279
        elif (field.startswith(HVPREFIX) and
5280
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5281
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5282
          val = i_hv.get(field[len(HVPREFIX):], None)
5283
        elif field == "beparams":
5284
          val = i_be
5285
        elif (field.startswith(BEPREFIX) and
5286
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5287
          val = i_be.get(field[len(BEPREFIX):], None)
5288
        elif st_match and st_match.groups():
5289
          # matches a variable list
5290
          st_groups = st_match.groups()
5291
          if st_groups and st_groups[0] == "disk":
5292
            if st_groups[1] == "count":
5293
              val = len(instance.disks)
5294
            elif st_groups[1] == "sizes":
5295
              val = [disk.size for disk in instance.disks]
5296
            elif st_groups[1] == "size":
5297
              try:
5298
                val = instance.FindDisk(st_groups[2]).size
5299
              except errors.OpPrereqError:
5300
                val = None
5301
            else:
5302
              assert False, "Unhandled disk parameter"
5303
          elif st_groups[0] == "nic":
5304
            if st_groups[1] == "count":
5305
              val = len(instance.nics)
5306
            elif st_groups[1] == "macs":
5307
              val = [nic.mac for nic in instance.nics]
5308
            elif st_groups[1] == "ips":
5309
              val = [nic.ip for nic in instance.nics]
5310
            elif st_groups[1] == "modes":
5311
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5312
            elif st_groups[1] == "links":
5313
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5314
            elif st_groups[1] == "bridges":
5315
              val = []
5316
              for nicp in i_nicp:
5317
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5318
                  val.append(nicp[constants.NIC_LINK])
5319
                else:
5320
                  val.append(None)
5321
            else:
5322
              # index-based item
5323
              nic_idx = int(st_groups[2])
5324
              if nic_idx >= len(instance.nics):
5325
                val = None
5326
              else:
5327
                if st_groups[1] == "mac":
5328
                  val = instance.nics[nic_idx].mac
5329
                elif st_groups[1] == "ip":
5330
                  val = instance.nics[nic_idx].ip
5331
                elif st_groups[1] == "mode":
5332
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5333
                elif st_groups[1] == "link":
5334
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5335
                elif st_groups[1] == "bridge":
5336
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5337
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5338
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5339
                  else:
5340
                    val = None
5341
                else:
5342
                  assert False, "Unhandled NIC parameter"
5343
          else:
5344
            assert False, ("Declared but unhandled variable parameter '%s'" %
5345
                           field)
5346
        else:
5347
          assert False, "Declared but unhandled parameter '%s'" % field
5348
        iout.append(val)
5349
      output.append(iout)
5350

    
5351
    return output
5352

    
5353

    
5354
class LUFailoverInstance(LogicalUnit):
5355
  """Failover an instance.
5356

5357
  """
5358
  HPATH = "instance-failover"
5359
  HTYPE = constants.HTYPE_INSTANCE
5360
  _OP_PARAMS = [
5361
    _PInstanceName,
5362
    ("ignore_consistency", False, _TBool),
5363
    _PShutdownTimeout,
5364
    ]
5365
  REQ_BGL = False
5366

    
5367
  def ExpandNames(self):
5368
    self._ExpandAndLockInstance()
5369
    self.needed_locks[locking.LEVEL_NODE] = []
5370
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5371

    
5372
  def DeclareLocks(self, level):
5373
    if level == locking.LEVEL_NODE:
5374
      self._LockInstancesNodes()
5375

    
5376
  def BuildHooksEnv(self):
5377
    """Build hooks env.
5378

5379
    This runs on master, primary and secondary nodes of the instance.
5380

5381
    """
5382
    instance = self.instance
5383
    source_node = instance.primary_node
5384
    target_node = instance.secondary_nodes[0]
5385
    env = {
5386
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5387
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5388
      "OLD_PRIMARY": source_node,
5389
      "OLD_SECONDARY": target_node,
5390
      "NEW_PRIMARY": target_node,
5391
      "NEW_SECONDARY": source_node,
5392
      }
5393
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5394
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5395
    nl_post = list(nl)
5396
    nl_post.append(source_node)
5397
    return env, nl, nl_post
5398

    
5399
  def CheckPrereq(self):
5400
    """Check prerequisites.
5401

5402
    This checks that the instance is in the cluster.
5403

5404
    """
5405
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5406
    assert self.instance is not None, \
5407
      "Cannot retrieve locked instance %s" % self.op.instance_name
5408

    
5409
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5410
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5411
      raise errors.OpPrereqError("Instance's disk layout is not"
5412
                                 " network mirrored, cannot failover.",
5413
                                 errors.ECODE_STATE)
5414

    
5415
    secondary_nodes = instance.secondary_nodes
5416
    if not secondary_nodes:
5417
      raise errors.ProgrammerError("no secondary node but using "
5418
                                   "a mirrored disk template")
5419

    
5420
    target_node = secondary_nodes[0]
5421
    _CheckNodeOnline(self, target_node)
5422
    _CheckNodeNotDrained(self, target_node)
5423
    if instance.admin_up:
5424
      # check memory requirements on the secondary node
5425
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5426
                           instance.name, bep[constants.BE_MEMORY],
5427
                           instance.hypervisor)
5428
    else:
5429
      self.LogInfo("Not checking memory on the secondary node as"
5430
                   " instance will not be started")
5431

    
5432
    # check bridge existance
5433
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5434

    
5435
  def Exec(self, feedback_fn):
5436
    """Failover an instance.
5437

5438
    The failover is done by shutting it down on its present node and
5439
    starting it on the secondary.
5440

5441
    """
5442
    instance = self.instance
5443

    
5444
    source_node = instance.primary_node
5445
    target_node = instance.secondary_nodes[0]
5446

    
5447
    if instance.admin_up:
5448
      feedback_fn("* checking disk consistency between source and target")
5449
      for dev in instance.disks:
5450
        # for drbd, these are drbd over lvm
5451
        if not _CheckDiskConsistency(self, dev, target_node, False):
5452
          if not self.op.ignore_consistency:
5453
            raise errors.OpExecError("Disk %s is degraded on target node,"
5454
                                     " aborting failover." % dev.iv_name)
5455
    else:
5456
      feedback_fn("* not checking disk consistency as instance is not running")
5457

    
5458
    feedback_fn("* shutting down instance on source node")
5459
    logging.info("Shutting down instance %s on node %s",
5460
                 instance.name, source_node)
5461

    
5462
    result = self.rpc.call_instance_shutdown(source_node, instance,
5463
                                             self.op.shutdown_timeout)
5464
    msg = result.fail_msg
5465
    if msg:
5466
      if self.op.ignore_consistency:
5467
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5468
                             " Proceeding anyway. Please make sure node"
5469
                             " %s is down. Error details: %s",
5470
                             instance.name, source_node, source_node, msg)
5471
      else:
5472
        raise errors.OpExecError("Could not shutdown instance %s on"
5473
                                 " node %s: %s" %
5474
                                 (instance.name, source_node, msg))
5475

    
5476
    feedback_fn("* deactivating the instance's disks on source node")
5477
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5478
      raise errors.OpExecError("Can't shut down the instance's disks.")
5479

    
5480
    instance.primary_node = target_node
5481
    # distribute new instance config to the other nodes
5482
    self.cfg.Update(instance, feedback_fn)
5483

    
5484
    # Only start the instance if it's marked as up
5485
    if instance.admin_up:
5486
      feedback_fn("* activating the instance's disks on target node")
5487
      logging.info("Starting instance %s on node %s",
5488
                   instance.name, target_node)
5489

    
5490
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5491
                                           ignore_secondaries=True)
5492
      if not disks_ok:
5493
        _ShutdownInstanceDisks(self, instance)
5494
        raise errors.OpExecError("Can't activate the instance's disks")
5495

    
5496
      feedback_fn("* starting the instance on the target node")
5497
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5498
      msg = result.fail_msg
5499
      if msg:
5500
        _ShutdownInstanceDisks(self, instance)
5501
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5502
                                 (instance.name, target_node, msg))
5503

    
5504

    
5505
class LUMigrateInstance(LogicalUnit):
5506
  """Migrate an instance.
5507

5508
  This is migration without shutting down, compared to the failover,
5509
  which is done with shutdown.
5510

5511
  """
5512
  HPATH = "instance-migrate"
5513
  HTYPE = constants.HTYPE_INSTANCE
5514
  _OP_PARAMS = [
5515
    _PInstanceName,
5516
    _PMigrationMode,
5517
    _PMigrationLive,
5518
    ("cleanup", False, _TBool),
5519
    ]
5520

    
5521
  REQ_BGL = False
5522

    
5523
  def ExpandNames(self):
5524
    self._ExpandAndLockInstance()
5525

    
5526
    self.needed_locks[locking.LEVEL_NODE] = []
5527
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5528

    
5529
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5530
                                       self.op.cleanup)
5531
    self.tasklets = [self._migrater]
5532

    
5533
  def DeclareLocks(self, level):
5534
    if level == locking.LEVEL_NODE:
5535
      self._LockInstancesNodes()
5536

    
5537
  def BuildHooksEnv(self):
5538
    """Build hooks env.
5539

5540
    This runs on master, primary and secondary nodes of the instance.
5541

5542
    """
5543
    instance = self._migrater.instance
5544
    source_node = instance.primary_node
5545
    target_node = instance.secondary_nodes[0]
5546
    env = _BuildInstanceHookEnvByObject(self, instance)
5547
    env["MIGRATE_LIVE"] = self._migrater.live
5548
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5549
    env.update({
5550
        "OLD_PRIMARY": source_node,
5551
        "OLD_SECONDARY": target_node,
5552
        "NEW_PRIMARY": target_node,
5553
        "NEW_SECONDARY": source_node,
5554
        })
5555
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5556
    nl_post = list(nl)
5557
    nl_post.append(source_node)
5558
    return env, nl, nl_post
5559

    
5560

    
5561
class LUMoveInstance(LogicalUnit):
5562
  """Move an instance by data-copying.
5563

5564
  """
5565
  HPATH = "instance-move"
5566
  HTYPE = constants.HTYPE_INSTANCE
5567
  _OP_PARAMS = [
5568
    _PInstanceName,
5569
    ("target_node", _NoDefault, _TNonEmptyString),
5570
    _PShutdownTimeout,
5571
    ]
5572
  REQ_BGL = False
5573

    
5574
  def ExpandNames(self):
5575
    self._ExpandAndLockInstance()
5576
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5577
    self.op.target_node = target_node
5578
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5579
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5580

    
5581
  def DeclareLocks(self, level):
5582
    if level == locking.LEVEL_NODE:
5583
      self._LockInstancesNodes(primary_only=True)
5584

    
5585
  def BuildHooksEnv(self):
5586
    """Build hooks env.
5587

5588
    This runs on master, primary and secondary nodes of the instance.
5589

5590
    """
5591
    env = {
5592
      "TARGET_NODE": self.op.target_node,
5593
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5594
      }
5595
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5596
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5597
                                       self.op.target_node]
5598
    return env, nl, nl
5599

    
5600
  def CheckPrereq(self):
5601
    """Check prerequisites.
5602

5603
    This checks that the instance is in the cluster.
5604

5605
    """
5606
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5607
    assert self.instance is not None, \
5608
      "Cannot retrieve locked instance %s" % self.op.instance_name
5609

    
5610
    node = self.cfg.GetNodeInfo(self.op.target_node)
5611
    assert node is not None, \
5612
      "Cannot retrieve locked node %s" % self.op.target_node
5613

    
5614
    self.target_node = target_node = node.name
5615

    
5616
    if target_node == instance.primary_node:
5617
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5618
                                 (instance.name, target_node),
5619
                                 errors.ECODE_STATE)
5620

    
5621
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5622

    
5623
    for idx, dsk in enumerate(instance.disks):
5624
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5625
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5626
                                   " cannot copy" % idx, errors.ECODE_STATE)
5627

    
5628
    _CheckNodeOnline(self, target_node)
5629
    _CheckNodeNotDrained(self, target_node)
5630

    
5631
    if instance.admin_up:
5632
      # check memory requirements on the secondary node
5633
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5634
                           instance.name, bep[constants.BE_MEMORY],
5635
                           instance.hypervisor)
5636
    else:
5637
      self.LogInfo("Not checking memory on the secondary node as"
5638
                   " instance will not be started")
5639

    
5640
    # check bridge existance
5641
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5642

    
5643
  def Exec(self, feedback_fn):
5644
    """Move an instance.
5645

5646
    The move is done by shutting it down on its present node, copying
5647
    the data over (slow) and starting it on the new node.
5648

5649
    """
5650
    instance = self.instance
5651

    
5652
    source_node = instance.primary_node
5653
    target_node = self.target_node
5654

    
5655
    self.LogInfo("Shutting down instance %s on source node %s",
5656
                 instance.name, source_node)
5657

    
5658
    result = self.rpc.call_instance_shutdown(source_node, instance,
5659
                                             self.op.shutdown_timeout)
5660
    msg = result.fail_msg
5661
    if msg:
5662
      if self.op.ignore_consistency:
5663
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5664
                             " Proceeding anyway. Please make sure node"
5665
                             " %s is down. Error details: %s",
5666
                             instance.name, source_node, source_node, msg)
5667
      else:
5668
        raise errors.OpExecError("Could not shutdown instance %s on"
5669
                                 " node %s: %s" %
5670
                                 (instance.name, source_node, msg))
5671

    
5672
    # create the target disks
5673
    try:
5674
      _CreateDisks(self, instance, target_node=target_node)
5675
    except errors.OpExecError:
5676
      self.LogWarning("Device creation failed, reverting...")
5677
      try:
5678
        _RemoveDisks(self, instance, target_node=target_node)
5679
      finally:
5680
        self.cfg.ReleaseDRBDMinors(instance.name)
5681
        raise
5682

    
5683
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5684

    
5685
    errs = []
5686
    # activate, get path, copy the data over
5687
    for idx, disk in enumerate(instance.disks):
5688
      self.LogInfo("Copying data for disk %d", idx)
5689
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5690
                                               instance.name, True)
5691
      if result.fail_msg:
5692
        self.LogWarning("Can't assemble newly created disk %d: %s",
5693
                        idx, result.fail_msg)
5694
        errs.append(result.fail_msg)
5695
        break
5696
      dev_path = result.payload
5697
      result = self.rpc.call_blockdev_export(source_node, disk,
5698
                                             target_node, dev_path,
5699
                                             cluster_name)
5700
      if result.fail_msg:
5701
        self.LogWarning("Can't copy data over for disk %d: %s",
5702
                        idx, result.fail_msg)
5703
        errs.append(result.fail_msg)
5704
        break
5705

    
5706
    if errs:
5707
      self.LogWarning("Some disks failed to copy, aborting")
5708
      try:
5709
        _RemoveDisks(self, instance, target_node=target_node)
5710
      finally:
5711
        self.cfg.ReleaseDRBDMinors(instance.name)
5712
        raise errors.OpExecError("Errors during disk copy: %s" %
5713
                                 (",".join(errs),))
5714

    
5715
    instance.primary_node = target_node
5716
    self.cfg.Update(instance, feedback_fn)
5717

    
5718
    self.LogInfo("Removing the disks on the original node")
5719
    _RemoveDisks(self, instance, target_node=source_node)
5720

    
5721
    # Only start the instance if it's marked as up
5722
    if instance.admin_up:
5723
      self.LogInfo("Starting instance %s on node %s",
5724
                   instance.name, target_node)
5725

    
5726
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5727
                                           ignore_secondaries=True)
5728
      if not disks_ok:
5729
        _ShutdownInstanceDisks(self, instance)
5730
        raise errors.OpExecError("Can't activate the instance's disks")
5731

    
5732
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5733
      msg = result.fail_msg
5734
      if msg:
5735
        _ShutdownInstanceDisks(self, instance)
5736
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5737
                                 (instance.name, target_node, msg))
5738

    
5739

    
5740
class LUMigrateNode(LogicalUnit):
5741
  """Migrate all instances from a node.
5742

5743
  """
5744
  HPATH = "node-migrate"
5745
  HTYPE = constants.HTYPE_NODE
5746
  _OP_PARAMS = [
5747
    _PNodeName,
5748
    _PMigrationMode,
5749
    _PMigrationLive,
5750
    ]
5751
  REQ_BGL = False
5752

    
5753
  def ExpandNames(self):
5754
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5755

    
5756
    self.needed_locks = {
5757
      locking.LEVEL_NODE: [self.op.node_name],
5758
      }
5759

    
5760
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5761

    
5762
    # Create tasklets for migrating instances for all instances on this node
5763
    names = []
5764
    tasklets = []
5765

    
5766
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5767
      logging.debug("Migrating instance %s", inst.name)
5768
      names.append(inst.name)
5769

    
5770
      tasklets.append(TLMigrateInstance(self, inst.name, False))
5771

    
5772
    self.tasklets = tasklets
5773

    
5774
    # Declare instance locks
5775
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5776

    
5777
  def DeclareLocks(self, level):
5778
    if level == locking.LEVEL_NODE:
5779
      self._LockInstancesNodes()
5780

    
5781
  def BuildHooksEnv(self):
5782
    """Build hooks env.
5783

5784
    This runs on the master, the primary and all the secondaries.
5785

5786
    """
5787
    env = {
5788
      "NODE_NAME": self.op.node_name,
5789
      }
5790

    
5791
    nl = [self.cfg.GetMasterNode()]
5792

    
5793
    return (env, nl, nl)
5794

    
5795

    
5796
class TLMigrateInstance(Tasklet):
5797
  """Tasklet class for instance migration.
5798

5799
  @type live: boolean
5800
  @ivar live: whether the migration will be done live or non-live;
5801
      this variable is initalized only after CheckPrereq has run
5802

5803
  """
5804
  def __init__(self, lu, instance_name, cleanup):
5805
    """Initializes this class.
5806

5807
    """
5808
    Tasklet.__init__(self, lu)
5809

    
5810
    # Parameters
5811
    self.instance_name = instance_name
5812
    self.cleanup = cleanup
5813
    self.live = False # will be overridden later
5814

    
5815
  def CheckPrereq(self):
5816
    """Check prerequisites.
5817

5818
    This checks that the instance is in the cluster.
5819

5820
    """
5821
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5822
    instance = self.cfg.GetInstanceInfo(instance_name)
5823
    assert instance is not None
5824

    
5825
    if instance.disk_template != constants.DT_DRBD8:
5826
      raise errors.OpPrereqError("Instance's disk layout is not"
5827
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5828

    
5829
    secondary_nodes = instance.secondary_nodes
5830
    if not secondary_nodes:
5831
      raise errors.ConfigurationError("No secondary node but using"
5832
                                      " drbd8 disk template")
5833

    
5834
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5835

    
5836
    target_node = secondary_nodes[0]
5837
    # check memory requirements on the secondary node
5838
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5839
                         instance.name, i_be[constants.BE_MEMORY],
5840
                         instance.hypervisor)
5841

    
5842
    # check bridge existance
5843
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5844

    
5845
    if not self.cleanup:
5846
      _CheckNodeNotDrained(self.lu, target_node)
5847
      result = self.rpc.call_instance_migratable(instance.primary_node,
5848
                                                 instance)
5849
      result.Raise("Can't migrate, please use failover",
5850
                   prereq=True, ecode=errors.ECODE_STATE)
5851

    
5852
    self.instance = instance
5853

    
5854
    if self.lu.op.live is not None and self.lu.op.mode is not None:
5855
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5856
                                 " parameters are accepted",
5857
                                 errors.ECODE_INVAL)
5858
    if self.lu.op.live is not None:
5859
      if self.lu.op.live:
5860
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
5861
      else:
5862
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5863
      # reset the 'live' parameter to None so that repeated
5864
      # invocations of CheckPrereq do not raise an exception
5865
      self.lu.op.live = None
5866
    elif self.lu.op.mode is None:
5867
      # read the default value from the hypervisor
5868
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5869
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5870

    
5871
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5872

    
5873
  def _WaitUntilSync(self):
5874
    """Poll with custom rpc for disk sync.
5875

5876
    This uses our own step-based rpc call.
5877

5878
    """
5879
    self.feedback_fn("* wait until resync is done")
5880
    all_done = False
5881
    while not all_done:
5882
      all_done = True
5883
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5884
                                            self.nodes_ip,
5885
                                            self.instance.disks)
5886
      min_percent = 100
5887
      for node, nres in result.items():
5888
        nres.Raise("Cannot resync disks on node %s" % node)
5889
        node_done, node_percent = nres.payload
5890
        all_done = all_done and node_done
5891
        if node_percent is not None:
5892
          min_percent = min(min_percent, node_percent)
5893
      if not all_done:
5894
        if min_percent < 100:
5895
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5896
        time.sleep(2)
5897

    
5898
  def _EnsureSecondary(self, node):
5899
    """Demote a node to secondary.
5900

5901
    """
5902
    self.feedback_fn("* switching node %s to secondary mode" % node)
5903

    
5904
    for dev in self.instance.disks:
5905
      self.cfg.SetDiskID(dev, node)
5906

    
5907
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5908
                                          self.instance.disks)
5909
    result.Raise("Cannot change disk to secondary on node %s" % node)
5910

    
5911
  def _GoStandalone(self):
5912
    """Disconnect from the network.
5913

5914
    """
5915
    self.feedback_fn("* changing into standalone mode")
5916
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5917
                                               self.instance.disks)
5918
    for node, nres in result.items():
5919
      nres.Raise("Cannot disconnect disks node %s" % node)
5920

    
5921
  def _GoReconnect(self, multimaster):
5922
    """Reconnect to the network.
5923

5924
    """
5925
    if multimaster:
5926
      msg = "dual-master"
5927
    else:
5928
      msg = "single-master"
5929
    self.feedback_fn("* changing disks into %s mode" % msg)
5930
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5931
                                           self.instance.disks,
5932
                                           self.instance.name, multimaster)
5933
    for node, nres in result.items():
5934
      nres.Raise("Cannot change disks config on node %s" % node)
5935

    
5936
  def _ExecCleanup(self):
5937
    """Try to cleanup after a failed migration.
5938

5939
    The cleanup is done by:
5940
      - check that the instance is running only on one node
5941
        (and update the config if needed)
5942
      - change disks on its secondary node to secondary
5943
      - wait until disks are fully synchronized
5944
      - disconnect from the network
5945
      - change disks into single-master mode
5946
      - wait again until disks are fully synchronized
5947

5948
    """
5949
    instance = self.instance
5950
    target_node = self.target_node
5951
    source_node = self.source_node
5952

    
5953
    # check running on only one node
5954
    self.feedback_fn("* checking where the instance actually runs"
5955
                     " (if this hangs, the hypervisor might be in"
5956
                     " a bad state)")
5957
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5958
    for node, result in ins_l.items():
5959
      result.Raise("Can't contact node %s" % node)
5960

    
5961
    runningon_source = instance.name in ins_l[source_node].payload
5962
    runningon_target = instance.name in ins_l[target_node].payload
5963

    
5964
    if runningon_source and runningon_target:
5965
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5966
                               " or the hypervisor is confused. You will have"
5967
                               " to ensure manually that it runs only on one"
5968
                               " and restart this operation.")
5969

    
5970
    if not (runningon_source or runningon_target):
5971
      raise errors.OpExecError("Instance does not seem to be running at all."
5972
                               " In this case, it's safer to repair by"
5973
                               " running 'gnt-instance stop' to ensure disk"
5974
                               " shutdown, and then restarting it.")
5975

    
5976
    if runningon_target:
5977
      # the migration has actually succeeded, we need to update the config
5978
      self.feedback_fn("* instance running on secondary node (%s),"
5979
                       " updating config" % target_node)
5980
      instance.primary_node = target_node
5981
      self.cfg.Update(instance, self.feedback_fn)
5982
      demoted_node = source_node
5983
    else:
5984
      self.feedback_fn("* instance confirmed to be running on its"
5985
                       " primary node (%s)" % source_node)
5986
      demoted_node = target_node
5987

    
5988
    self._EnsureSecondary(demoted_node)
5989
    try:
5990
      self._WaitUntilSync()
5991
    except errors.OpExecError:
5992
      # we ignore here errors, since if the device is standalone, it
5993
      # won't be able to sync
5994
      pass
5995
    self._GoStandalone()
5996
    self._GoReconnect(False)
5997
    self._WaitUntilSync()
5998

    
5999
    self.feedback_fn("* done")
6000

    
6001
  def _RevertDiskStatus(self):
6002
    """Try to revert the disk status after a failed migration.
6003

6004
    """
6005
    target_node = self.target_node
6006
    try:
6007
      self._EnsureSecondary(target_node)
6008
      self._GoStandalone()
6009
      self._GoReconnect(False)
6010
      self._WaitUntilSync()
6011
    except errors.OpExecError, err:
6012
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6013
                         " drives: error '%s'\n"
6014
                         "Please look and recover the instance status" %
6015
                         str(err))
6016

    
6017
  def _AbortMigration(self):
6018
    """Call the hypervisor code to abort a started migration.
6019

6020
    """
6021
    instance = self.instance
6022
    target_node = self.target_node
6023
    migration_info = self.migration_info
6024

    
6025
    abort_result = self.rpc.call_finalize_migration(target_node,
6026
                                                    instance,
6027
                                                    migration_info,
6028
                                                    False)
6029
    abort_msg = abort_result.fail_msg
6030
    if abort_msg:
6031
      logging.error("Aborting migration failed on target node %s: %s",
6032
                    target_node, abort_msg)
6033
      # Don't raise an exception here, as we stil have to try to revert the
6034
      # disk status, even if this step failed.
6035

    
6036
  def _ExecMigration(self):
6037
    """Migrate an instance.
6038

6039
    The migrate is done by:
6040
      - change the disks into dual-master mode
6041
      - wait until disks are fully synchronized again
6042
      - migrate the instance
6043
      - change disks on the new secondary node (the old primary) to secondary
6044
      - wait until disks are fully synchronized
6045
      - change disks into single-master mode
6046

6047
    """
6048
    instance = self.instance
6049
    target_node = self.target_node
6050
    source_node = self.source_node
6051

    
6052
    self.feedback_fn("* checking disk consistency between source and target")
6053
    for dev in instance.disks:
6054
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6055
        raise errors.OpExecError("Disk %s is degraded or not fully"
6056
                                 " synchronized on target node,"
6057
                                 " aborting migrate." % dev.iv_name)
6058

    
6059
    # First get the migration information from the remote node
6060
    result = self.rpc.call_migration_info(source_node, instance)
6061
    msg = result.fail_msg
6062
    if msg:
6063
      log_err = ("Failed fetching source migration information from %s: %s" %
6064
                 (source_node, msg))
6065
      logging.error(log_err)
6066
      raise errors.OpExecError(log_err)
6067

    
6068
    self.migration_info = migration_info = result.payload
6069

    
6070
    # Then switch the disks to master/master mode
6071
    self._EnsureSecondary(target_node)
6072
    self._GoStandalone()
6073
    self._GoReconnect(True)
6074
    self._WaitUntilSync()
6075

    
6076
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6077
    result = self.rpc.call_accept_instance(target_node,
6078
                                           instance,
6079
                                           migration_info,
6080
                                           self.nodes_ip[target_node])
6081

    
6082
    msg = result.fail_msg
6083
    if msg:
6084
      logging.error("Instance pre-migration failed, trying to revert"
6085
                    " disk status: %s", msg)
6086
      self.feedback_fn("Pre-migration failed, aborting")
6087
      self._AbortMigration()
6088
      self._RevertDiskStatus()
6089
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6090
                               (instance.name, msg))
6091

    
6092
    self.feedback_fn("* migrating instance to %s" % target_node)
6093
    time.sleep(10)
6094
    result = self.rpc.call_instance_migrate(source_node, instance,
6095
                                            self.nodes_ip[target_node],
6096
                                            self.live)
6097
    msg = result.fail_msg
6098
    if msg:
6099
      logging.error("Instance migration failed, trying to revert"
6100
                    " disk status: %s", msg)
6101
      self.feedback_fn("Migration failed, aborting")
6102
      self._AbortMigration()
6103
      self._RevertDiskStatus()
6104
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6105
                               (instance.name, msg))
6106
    time.sleep(10)
6107

    
6108
    instance.primary_node = target_node
6109
    # distribute new instance config to the other nodes
6110
    self.cfg.Update(instance, self.feedback_fn)
6111

    
6112
    result = self.rpc.call_finalize_migration(target_node,
6113
                                              instance,
6114
                                              migration_info,
6115
                                              True)
6116
    msg = result.fail_msg
6117
    if msg:
6118
      logging.error("Instance migration succeeded, but finalization failed:"
6119
                    " %s", msg)
6120
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6121
                               msg)
6122

    
6123
    self._EnsureSecondary(source_node)
6124
    self._WaitUntilSync()
6125
    self._GoStandalone()
6126
    self._GoReconnect(False)
6127
    self._WaitUntilSync()
6128

    
6129
    self.feedback_fn("* done")
6130

    
6131
  def Exec(self, feedback_fn):
6132
    """Perform the migration.
6133

6134
    """
6135
    feedback_fn("Migrating instance %s" % self.instance.name)
6136

    
6137
    self.feedback_fn = feedback_fn
6138

    
6139
    self.source_node = self.instance.primary_node
6140
    self.target_node = self.instance.secondary_nodes[0]
6141
    self.all_nodes = [self.source_node, self.target_node]
6142
    self.nodes_ip = {
6143
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6144
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6145
      }
6146

    
6147
    if self.cleanup:
6148
      return self._ExecCleanup()
6149
    else:
6150
      return self._ExecMigration()
6151

    
6152

    
6153
def _CreateBlockDev(lu, node, instance, device, force_create,
6154
                    info, force_open):
6155
  """Create a tree of block devices on a given node.
6156

6157
  If this device type has to be created on secondaries, create it and
6158
  all its children.
6159

6160
  If not, just recurse to children keeping the same 'force' value.
6161

6162
  @param lu: the lu on whose behalf we execute
6163
  @param node: the node on which to create the device
6164
  @type instance: L{objects.Instance}
6165
  @param instance: the instance which owns the device
6166
  @type device: L{objects.Disk}
6167
  @param device: the device to create
6168
  @type force_create: boolean
6169
  @param force_create: whether to force creation of this device; this
6170
      will be change to True whenever we find a device which has
6171
      CreateOnSecondary() attribute
6172
  @param info: the extra 'metadata' we should attach to the device
6173
      (this will be represented as a LVM tag)
6174
  @type force_open: boolean
6175
  @param force_open: this parameter will be passes to the
6176
      L{backend.BlockdevCreate} function where it specifies
6177
      whether we run on primary or not, and it affects both
6178
      the child assembly and the device own Open() execution
6179

6180
  """
6181
  if device.CreateOnSecondary():
6182
    force_create = True
6183

    
6184
  if device.children:
6185
    for child in device.children:
6186
      _CreateBlockDev(lu, node, instance, child, force_create,
6187
                      info, force_open)
6188

    
6189
  if not force_create:
6190
    return
6191

    
6192
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6193

    
6194

    
6195
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6196
  """Create a single block device on a given node.
6197

6198
  This will not recurse over children of the device, so they must be
6199
  created in advance.
6200

6201
  @param lu: the lu on whose behalf we execute
6202
  @param node: the node on which to create the device
6203
  @type instance: L{objects.Instance}
6204
  @param instance: the instance which owns the device
6205
  @type device: L{objects.Disk}
6206
  @param device: the device to create
6207
  @param info: the extra 'metadata' we should attach to the device
6208
      (this will be represented as a LVM tag)
6209
  @type force_open: boolean
6210
  @param force_open: this parameter will be passes to the
6211
      L{backend.BlockdevCreate} function where it specifies
6212
      whether we run on primary or not, and it affects both
6213
      the child assembly and the device own Open() execution
6214

6215
  """
6216
  lu.cfg.SetDiskID(device, node)
6217
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6218
                                       instance.name, force_open, info)
6219
  result.Raise("Can't create block device %s on"
6220
               " node %s for instance %s" % (device, node, instance.name))
6221
  if device.physical_id is None:
6222
    device.physical_id = result.payload
6223

    
6224

    
6225
def _GenerateUniqueNames(lu, exts):
6226
  """Generate a suitable LV name.
6227

6228
  This will generate a logical volume name for the given instance.
6229

6230
  """
6231
  results = []
6232
  for val in exts:
6233
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6234
    results.append("%s%s" % (new_id, val))
6235
  return results
6236

    
6237

    
6238
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6239
                         p_minor, s_minor):
6240
  """Generate a drbd8 device complete with its children.
6241

6242
  """
6243
  port = lu.cfg.AllocatePort()
6244
  vgname = lu.cfg.GetVGName()
6245
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6246
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6247
                          logical_id=(vgname, names[0]))
6248
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6249
                          logical_id=(vgname, names[1]))
6250
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6251
                          logical_id=(primary, secondary, port,
6252
                                      p_minor, s_minor,
6253
                                      shared_secret),
6254
                          children=[dev_data, dev_meta],
6255
                          iv_name=iv_name)
6256
  return drbd_dev
6257

    
6258

    
6259
def _GenerateDiskTemplate(lu, template_name,
6260
                          instance_name, primary_node,
6261
                          secondary_nodes, disk_info,
6262
                          file_storage_dir, file_driver,
6263
                          base_index):
6264
  """Generate the entire disk layout for a given template type.
6265

6266
  """
6267
  #TODO: compute space requirements
6268

    
6269
  vgname = lu.cfg.GetVGName()
6270
  disk_count = len(disk_info)
6271
  disks = []
6272
  if template_name == constants.DT_DISKLESS:
6273
    pass
6274
  elif template_name == constants.DT_PLAIN:
6275
    if len(secondary_nodes) != 0:
6276
      raise errors.ProgrammerError("Wrong template configuration")
6277

    
6278
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6279
                                      for i in range(disk_count)])
6280
    for idx, disk in enumerate(disk_info):
6281
      disk_index = idx + base_index
6282
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6283
                              logical_id=(vgname, names[idx]),
6284
                              iv_name="disk/%d" % disk_index,
6285
                              mode=disk["mode"])
6286
      disks.append(disk_dev)
6287
  elif template_name == constants.DT_DRBD8:
6288
    if len(secondary_nodes) != 1:
6289
      raise errors.ProgrammerError("Wrong template configuration")
6290
    remote_node = secondary_nodes[0]
6291
    minors = lu.cfg.AllocateDRBDMinor(
6292
      [primary_node, remote_node] * len(disk_info), instance_name)
6293

    
6294
    names = []
6295
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6296
                                               for i in range(disk_count)]):
6297
      names.append(lv_prefix + "_data")
6298
      names.append(lv_prefix + "_meta")
6299
    for idx, disk in enumerate(disk_info):
6300
      disk_index = idx + base_index
6301
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6302
                                      disk["size"], names[idx*2:idx*2+2],
6303
                                      "disk/%d" % disk_index,
6304
                                      minors[idx*2], minors[idx*2+1])
6305
      disk_dev.mode = disk["mode"]
6306
      disks.append(disk_dev)
6307
  elif template_name == constants.DT_FILE:
6308
    if len(secondary_nodes) != 0:
6309
      raise errors.ProgrammerError("Wrong template configuration")
6310

    
6311
    _RequireFileStorage()
6312

    
6313
    for idx, disk in enumerate(disk_info):
6314
      disk_index = idx + base_index
6315
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6316
                              iv_name="disk/%d" % disk_index,
6317
                              logical_id=(file_driver,
6318
                                          "%s/disk%d" % (file_storage_dir,
6319
                                                         disk_index)),
6320
                              mode=disk["mode"])
6321
      disks.append(disk_dev)
6322
  else:
6323
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6324
  return disks
6325

    
6326

    
6327
def _GetInstanceInfoText(instance):
6328
  """Compute that text that should be added to the disk's metadata.
6329

6330
  """
6331
  return "originstname+%s" % instance.name
6332

    
6333

    
6334
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6335
  """Create all disks for an instance.
6336

6337
  This abstracts away some work from AddInstance.
6338

6339
  @type lu: L{LogicalUnit}
6340
  @param lu: the logical unit on whose behalf we execute
6341
  @type instance: L{objects.Instance}
6342
  @param instance: the instance whose disks we should create
6343
  @type to_skip: list
6344
  @param to_skip: list of indices to skip
6345
  @type target_node: string
6346
  @param target_node: if passed, overrides the target node for creation
6347
  @rtype: boolean
6348
  @return: the success of the creation
6349

6350
  """
6351
  info = _GetInstanceInfoText(instance)
6352
  if target_node is None:
6353
    pnode = instance.primary_node
6354
    all_nodes = instance.all_nodes
6355
  else:
6356
    pnode = target_node
6357
    all_nodes = [pnode]
6358

    
6359
  if instance.disk_template == constants.DT_FILE:
6360
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6361
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6362

    
6363
    result.Raise("Failed to create directory '%s' on"
6364
                 " node %s" % (file_storage_dir, pnode))
6365

    
6366
  # Note: this needs to be kept in sync with adding of disks in
6367
  # LUSetInstanceParams
6368
  for idx, device in enumerate(instance.disks):
6369
    if to_skip and idx in to_skip:
6370
      continue
6371
    logging.info("Creating volume %s for instance %s",
6372
                 device.iv_name, instance.name)
6373
    #HARDCODE
6374
    for node in all_nodes:
6375
      f_create = node == pnode
6376
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6377

    
6378

    
6379
def _RemoveDisks(lu, instance, target_node=None):
6380
  """Remove all disks for an instance.
6381

6382
  This abstracts away some work from `AddInstance()` and
6383
  `RemoveInstance()`. Note that in case some of the devices couldn't
6384
  be removed, the removal will continue with the other ones (compare
6385
  with `_CreateDisks()`).
6386

6387
  @type lu: L{LogicalUnit}
6388
  @param lu: the logical unit on whose behalf we execute
6389
  @type instance: L{objects.Instance}
6390
  @param instance: the instance whose disks we should remove
6391
  @type target_node: string
6392
  @param target_node: used to override the node on which to remove the disks
6393
  @rtype: boolean
6394
  @return: the success of the removal
6395

6396
  """
6397
  logging.info("Removing block devices for instance %s", instance.name)
6398

    
6399
  all_result = True
6400
  for device in instance.disks:
6401
    if target_node:
6402
      edata = [(target_node, device)]
6403
    else:
6404
      edata = device.ComputeNodeTree(instance.primary_node)
6405
    for node, disk in edata:
6406
      lu.cfg.SetDiskID(disk, node)
6407
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6408
      if msg:
6409
        lu.LogWarning("Could not remove block device %s on node %s,"
6410
                      " continuing anyway: %s", device.iv_name, node, msg)
6411
        all_result = False
6412

    
6413
  if instance.disk_template == constants.DT_FILE:
6414
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6415
    if target_node:
6416
      tgt = target_node
6417
    else:
6418
      tgt = instance.primary_node
6419
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6420
    if result.fail_msg:
6421
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6422
                    file_storage_dir, instance.primary_node, result.fail_msg)
6423
      all_result = False
6424

    
6425
  return all_result
6426

    
6427

    
6428
def _ComputeDiskSize(disk_template, disks):
6429
  """Compute disk size requirements in the volume group
6430

6431
  """
6432
  # Required free disk space as a function of disk and swap space
6433
  req_size_dict = {
6434
    constants.DT_DISKLESS: None,
6435
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6436
    # 128 MB are added for drbd metadata for each disk
6437
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6438
    constants.DT_FILE: None,
6439
  }
6440

    
6441
  if disk_template not in req_size_dict:
6442
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6443
                                 " is unknown" %  disk_template)
6444

    
6445
  return req_size_dict[disk_template]
6446

    
6447

    
6448
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6449
  """Hypervisor parameter validation.
6450

6451
  This function abstract the hypervisor parameter validation to be
6452
  used in both instance create and instance modify.
6453

6454
  @type lu: L{LogicalUnit}
6455
  @param lu: the logical unit for which we check
6456
  @type nodenames: list
6457
  @param nodenames: the list of nodes on which we should check
6458
  @type hvname: string
6459
  @param hvname: the name of the hypervisor we should use
6460
  @type hvparams: dict
6461
  @param hvparams: the parameters which we need to check
6462
  @raise errors.OpPrereqError: if the parameters are not valid
6463

6464
  """
6465
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6466
                                                  hvname,
6467
                                                  hvparams)
6468
  for node in nodenames:
6469
    info = hvinfo[node]
6470
    if info.offline:
6471
      continue
6472
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6473

    
6474

    
6475
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6476
  """OS parameters validation.
6477

6478
  @type lu: L{LogicalUnit}
6479
  @param lu: the logical unit for which we check
6480
  @type required: boolean
6481
  @param required: whether the validation should fail if the OS is not
6482
      found
6483
  @type nodenames: list
6484
  @param nodenames: the list of nodes on which we should check
6485
  @type osname: string
6486
  @param osname: the name of the hypervisor we should use
6487
  @type osparams: dict
6488
  @param osparams: the parameters which we need to check
6489
  @raise errors.OpPrereqError: if the parameters are not valid
6490

6491
  """
6492
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6493
                                   [constants.OS_VALIDATE_PARAMETERS],
6494
                                   osparams)
6495
  for node, nres in result.items():
6496
    # we don't check for offline cases since this should be run only
6497
    # against the master node and/or an instance's nodes
6498
    nres.Raise("OS Parameters validation failed on node %s" % node)
6499
    if not nres.payload:
6500
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6501
                 osname, node)
6502

    
6503

    
6504
class LUCreateInstance(LogicalUnit):
6505
  """Create an instance.
6506

6507
  """
6508
  HPATH = "instance-add"
6509
  HTYPE = constants.HTYPE_INSTANCE
6510
  _OP_PARAMS = [
6511
    _PInstanceName,
6512
    ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6513
    ("start", True, _TBool),
6514
    ("wait_for_sync", True, _TBool),
6515
    ("ip_check", True, _TBool),
6516
    ("name_check", True, _TBool),
6517
    ("disks", _NoDefault, _TListOf(_TDict)),
6518
    ("nics", _NoDefault, _TListOf(_TDict)),
6519
    ("hvparams", _EmptyDict, _TDict),
6520
    ("beparams", _EmptyDict, _TDict),
6521
    ("osparams", _EmptyDict, _TDict),
6522
    ("no_install", None, _TMaybeBool),
6523
    ("os_type", None, _TMaybeString),
6524
    ("force_variant", False, _TBool),
6525
    ("source_handshake", None, _TOr(_TList, _TNone)),
6526
    ("source_x509_ca", None, _TMaybeString),
6527
    ("source_instance_name", None, _TMaybeString),
6528
    ("src_node", None, _TMaybeString),
6529
    ("src_path", None, _TMaybeString),
6530
    ("pnode", None, _TMaybeString),
6531
    ("snode", None, _TMaybeString),
6532
    ("iallocator", None, _TMaybeString),
6533
    ("hypervisor", None, _TMaybeString),
6534
    ("disk_template", _NoDefault, _CheckDiskTemplate),
6535
    ("identify_defaults", False, _TBool),
6536
    ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6537
    ("file_storage_dir", None, _TMaybeString),
6538
    ]
6539
  REQ_BGL = False
6540

    
6541
  def CheckArguments(self):
6542
    """Check arguments.
6543

6544
    """
6545
    # do not require name_check to ease forward/backward compatibility
6546
    # for tools
6547
    if self.op.no_install and self.op.start:
6548
      self.LogInfo("No-installation mode selected, disabling startup")
6549
      self.op.start = False
6550
    # validate/normalize the instance name
6551
    self.op.instance_name = \
6552
      netutils.HostInfo.NormalizeName(self.op.instance_name)
6553

    
6554
    if self.op.ip_check and not self.op.name_check:
6555
      # TODO: make the ip check more flexible and not depend on the name check
6556
      raise errors.OpPrereqError("Cannot do ip check without a name check",
6557
                                 errors.ECODE_INVAL)
6558

    
6559
    # check nics' parameter names
6560
    for nic in self.op.nics:
6561
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6562

    
6563
    # check disks. parameter names and consistent adopt/no-adopt strategy
6564
    has_adopt = has_no_adopt = False
6565
    for disk in self.op.disks:
6566
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6567
      if "adopt" in disk:
6568
        has_adopt = True
6569
      else:
6570
        has_no_adopt = True
6571
    if has_adopt and has_no_adopt:
6572
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6573
                                 errors.ECODE_INVAL)
6574
    if has_adopt:
6575
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6576
        raise errors.OpPrereqError("Disk adoption is not supported for the"
6577
                                   " '%s' disk template" %
6578
                                   self.op.disk_template,
6579
                                   errors.ECODE_INVAL)
6580
      if self.op.iallocator is not None:
6581
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6582
                                   " iallocator script", errors.ECODE_INVAL)
6583
      if self.op.mode == constants.INSTANCE_IMPORT:
6584
        raise errors.OpPrereqError("Disk adoption not allowed for"
6585
                                   " instance import", errors.ECODE_INVAL)
6586

    
6587
    self.adopt_disks = has_adopt
6588

    
6589
    # instance name verification
6590
    if self.op.name_check:
6591
      self.hostname1 = netutils.GetHostInfo(self.op.instance_name)
6592
      self.op.instance_name = self.hostname1.name
6593
      # used in CheckPrereq for ip ping check
6594
      self.check_ip = self.hostname1.ip
6595
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6596
      raise errors.OpPrereqError("Remote imports require names to be checked" %
6597
                                 errors.ECODE_INVAL)
6598
    else:
6599
      self.check_ip = None
6600

    
6601
    # file storage checks
6602
    if (self.op.file_driver and
6603
        not self.op.file_driver in constants.FILE_DRIVER):
6604
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6605
                                 self.op.file_driver, errors.ECODE_INVAL)
6606

    
6607
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6608
      raise errors.OpPrereqError("File storage directory path not absolute",
6609
                                 errors.ECODE_INVAL)
6610

    
6611
    ### Node/iallocator related checks
6612
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6613

    
6614
    if self.op.pnode is not None:
6615
      if self.op.disk_template in constants.DTS_NET_MIRROR:
6616
        if self.op.snode is None:
6617
          raise errors.OpPrereqError("The networked disk templates need"
6618
                                     " a mirror node", errors.ECODE_INVAL)
6619
      elif self.op.snode:
6620
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6621
                        " template")
6622
        self.op.snode = None
6623

    
6624
    self._cds = _GetClusterDomainSecret()
6625

    
6626
    if self.op.mode == constants.INSTANCE_IMPORT:
6627
      # On import force_variant must be True, because if we forced it at
6628
      # initial install, our only chance when importing it back is that it
6629
      # works again!
6630
      self.op.force_variant = True
6631

    
6632
      if self.op.no_install:
6633
        self.LogInfo("No-installation mode has no effect during import")
6634

    
6635
    elif self.op.mode == constants.INSTANCE_CREATE:
6636
      if self.op.os_type is None:
6637
        raise errors.OpPrereqError("No guest OS specified",
6638
                                   errors.ECODE_INVAL)
6639
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_oss:
6640
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6641
                                   " installation" % self.op.os_type,
6642
                                   errors.ECODE_STATE)
6643
      if self.op.disk_template is None:
6644
        raise errors.OpPrereqError("No disk template specified",
6645
                                   errors.ECODE_INVAL)
6646

    
6647
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6648
      # Check handshake to ensure both clusters have the same domain secret
6649
      src_handshake = self.op.source_handshake
6650
      if not src_handshake:
6651
        raise errors.OpPrereqError("Missing source handshake",
6652
                                   errors.ECODE_INVAL)
6653

    
6654
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6655
                                                           src_handshake)
6656
      if errmsg:
6657
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6658
                                   errors.ECODE_INVAL)
6659

    
6660
      # Load and check source CA
6661
      self.source_x509_ca_pem = self.op.source_x509_ca
6662
      if not self.source_x509_ca_pem:
6663
        raise errors.OpPrereqError("Missing source X509 CA",
6664
                                   errors.ECODE_INVAL)
6665

    
6666
      try:
6667
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6668
                                                    self._cds)
6669
      except OpenSSL.crypto.Error, err:
6670
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6671
                                   (err, ), errors.ECODE_INVAL)
6672

    
6673
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6674
      if errcode is not None:
6675
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6676
                                   errors.ECODE_INVAL)
6677

    
6678
      self.source_x509_ca = cert
6679

    
6680
      src_instance_name = self.op.source_instance_name
6681
      if not src_instance_name:
6682
        raise errors.OpPrereqError("Missing source instance name",
6683
                                   errors.ECODE_INVAL)
6684

    
6685
      norm_name = netutils.HostInfo.NormalizeName(src_instance_name)
6686
      self.source_instance_name = netutils.GetHostInfo(norm_name).name
6687

    
6688
    else:
6689
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6690
                                 self.op.mode, errors.ECODE_INVAL)
6691

    
6692
  def ExpandNames(self):
6693
    """ExpandNames for CreateInstance.
6694

6695
    Figure out the right locks for instance creation.
6696

6697
    """
6698
    self.needed_locks = {}
6699

    
6700
    instance_name = self.op.instance_name
6701
    # this is just a preventive check, but someone might still add this
6702
    # instance in the meantime, and creation will fail at lock-add time
6703
    if instance_name in self.cfg.GetInstanceList():
6704
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6705
                                 instance_name, errors.ECODE_EXISTS)
6706

    
6707
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6708

    
6709
    if self.op.iallocator:
6710
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6711
    else:
6712
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6713
      nodelist = [self.op.pnode]
6714
      if self.op.snode is not None:
6715
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6716
        nodelist.append(self.op.snode)
6717
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6718

    
6719
    # in case of import lock the source node too
6720
    if self.op.mode == constants.INSTANCE_IMPORT:
6721
      src_node = self.op.src_node
6722
      src_path = self.op.src_path
6723

    
6724
      if src_path is None:
6725
        self.op.src_path = src_path = self.op.instance_name
6726

    
6727
      if src_node is None:
6728
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6729
        self.op.src_node = None
6730
        if os.path.isabs(src_path):
6731
          raise errors.OpPrereqError("Importing an instance from an absolute"
6732
                                     " path requires a source node option.",
6733
                                     errors.ECODE_INVAL)
6734
      else:
6735
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6736
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6737
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6738
        if not os.path.isabs(src_path):
6739
          self.op.src_path = src_path = \
6740
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6741

    
6742
  def _RunAllocator(self):
6743
    """Run the allocator based on input opcode.
6744

6745
    """
6746
    nics = [n.ToDict() for n in self.nics]
6747
    ial = IAllocator(self.cfg, self.rpc,
6748
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6749
                     name=self.op.instance_name,
6750
                     disk_template=self.op.disk_template,
6751
                     tags=[],
6752
                     os=self.op.os_type,
6753
                     vcpus=self.be_full[constants.BE_VCPUS],
6754
                     mem_size=self.be_full[constants.BE_MEMORY],
6755
                     disks=self.disks,
6756
                     nics=nics,
6757
                     hypervisor=self.op.hypervisor,
6758
                     )
6759

    
6760
    ial.Run(self.op.iallocator)
6761

    
6762
    if not ial.success:
6763
      raise errors.OpPrereqError("Can't compute nodes using"
6764
                                 " iallocator '%s': %s" %
6765
                                 (self.op.iallocator, ial.info),
6766
                                 errors.ECODE_NORES)
6767
    if len(ial.result) != ial.required_nodes:
6768
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6769
                                 " of nodes (%s), required %s" %
6770
                                 (self.op.iallocator, len(ial.result),
6771
                                  ial.required_nodes), errors.ECODE_FAULT)
6772
    self.op.pnode = ial.result[0]
6773
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6774
                 self.op.instance_name, self.op.iallocator,
6775
                 utils.CommaJoin(ial.result))
6776
    if ial.required_nodes == 2:
6777
      self.op.snode = ial.result[1]
6778

    
6779
  def BuildHooksEnv(self):
6780
    """Build hooks env.
6781

6782
    This runs on master, primary and secondary nodes of the instance.
6783

6784
    """
6785
    env = {
6786
      "ADD_MODE": self.op.mode,
6787
      }
6788
    if self.op.mode == constants.INSTANCE_IMPORT:
6789
      env["SRC_NODE"] = self.op.src_node
6790
      env["SRC_PATH"] = self.op.src_path
6791
      env["SRC_IMAGES"] = self.src_images
6792

    
6793
    env.update(_BuildInstanceHookEnv(
6794
      name=self.op.instance_name,
6795
      primary_node=self.op.pnode,
6796
      secondary_nodes=self.secondaries,
6797
      status=self.op.start,
6798
      os_type=self.op.os_type,
6799
      memory=self.be_full[constants.BE_MEMORY],
6800
      vcpus=self.be_full[constants.BE_VCPUS],
6801
      nics=_NICListToTuple(self, self.nics),
6802
      disk_template=self.op.disk_template,
6803
      disks=[(d["size"], d["mode"]) for d in self.disks],
6804
      bep=self.be_full,
6805
      hvp=self.hv_full,
6806
      hypervisor_name=self.op.hypervisor,
6807
    ))
6808

    
6809
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6810
          self.secondaries)
6811
    return env, nl, nl
6812

    
6813
  def _ReadExportInfo(self):
6814
    """Reads the export information from disk.
6815

6816
    It will override the opcode source node and path with the actual
6817
    information, if these two were not specified before.
6818

6819
    @return: the export information
6820

6821
    """
6822
    assert self.op.mode == constants.INSTANCE_IMPORT
6823

    
6824
    src_node = self.op.src_node
6825
    src_path = self.op.src_path
6826

    
6827
    if src_node is None:
6828
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6829
      exp_list = self.rpc.call_export_list(locked_nodes)
6830
      found = False
6831
      for node in exp_list:
6832
        if exp_list[node].fail_msg:
6833
          continue
6834
        if src_path in exp_list[node].payload:
6835
          found = True
6836
          self.op.src_node = src_node = node
6837
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6838
                                                       src_path)
6839
          break
6840
      if not found:
6841
        raise errors.OpPrereqError("No export found for relative path %s" %
6842
                                    src_path, errors.ECODE_INVAL)
6843

    
6844
    _CheckNodeOnline(self, src_node)
6845
    result = self.rpc.call_export_info(src_node, src_path)
6846
    result.Raise("No export or invalid export found in dir %s" % src_path)
6847

    
6848
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6849
    if not export_info.has_section(constants.INISECT_EXP):
6850
      raise errors.ProgrammerError("Corrupted export config",
6851
                                   errors.ECODE_ENVIRON)
6852

    
6853
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6854
    if (int(ei_version) != constants.EXPORT_VERSION):
6855
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6856
                                 (ei_version, constants.EXPORT_VERSION),
6857
                                 errors.ECODE_ENVIRON)
6858
    return export_info
6859

    
6860
  def _ReadExportParams(self, einfo):
6861
    """Use export parameters as defaults.
6862

6863
    In case the opcode doesn't specify (as in override) some instance
6864
    parameters, then try to use them from the export information, if
6865
    that declares them.
6866

6867
    """
6868
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6869

    
6870
    if self.op.disk_template is None:
6871
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6872
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6873
                                          "disk_template")
6874
      else:
6875
        raise errors.OpPrereqError("No disk template specified and the export"
6876
                                   " is missing the disk_template information",
6877
                                   errors.ECODE_INVAL)
6878

    
6879
    if not self.op.disks:
6880
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6881
        disks = []
6882
        # TODO: import the disk iv_name too
6883
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6884
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6885
          disks.append({"size": disk_sz})
6886
        self.op.disks = disks
6887
      else:
6888
        raise errors.OpPrereqError("No disk info specified and the export"
6889
                                   " is missing the disk information",
6890
                                   errors.ECODE_INVAL)
6891

    
6892
    if (not self.op.nics and
6893
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6894
      nics = []
6895
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6896
        ndict = {}
6897
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6898
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6899
          ndict[name] = v
6900
        nics.append(ndict)
6901
      self.op.nics = nics
6902

    
6903
    if (self.op.hypervisor is None and
6904
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6905
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6906
    if einfo.has_section(constants.INISECT_HYP):
6907
      # use the export parameters but do not override the ones
6908
      # specified by the user
6909
      for name, value in einfo.items(constants.INISECT_HYP):
6910
        if name not in self.op.hvparams:
6911
          self.op.hvparams[name] = value
6912

    
6913
    if einfo.has_section(constants.INISECT_BEP):
6914
      # use the parameters, without overriding
6915
      for name, value in einfo.items(constants.INISECT_BEP):
6916
        if name not in self.op.beparams:
6917
          self.op.beparams[name] = value
6918
    else:
6919
      # try to read the parameters old style, from the main section
6920
      for name in constants.BES_PARAMETERS:
6921
        if (name not in self.op.beparams and
6922
            einfo.has_option(constants.INISECT_INS, name)):
6923
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6924

    
6925
    if einfo.has_section(constants.INISECT_OSP):
6926
      # use the parameters, without overriding
6927
      for name, value in einfo.items(constants.INISECT_OSP):
6928
        if name not in self.op.osparams:
6929
          self.op.osparams[name] = value
6930

    
6931
  def _RevertToDefaults(self, cluster):
6932
    """Revert the instance parameters to the default values.
6933

6934
    """
6935
    # hvparams
6936
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6937
    for name in self.op.hvparams.keys():
6938
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6939
        del self.op.hvparams[name]
6940
    # beparams
6941
    be_defs = cluster.SimpleFillBE({})
6942
    for name in self.op.beparams.keys():
6943
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6944
        del self.op.beparams[name]
6945
    # nic params
6946
    nic_defs = cluster.SimpleFillNIC({})
6947
    for nic in self.op.nics:
6948
      for name in constants.NICS_PARAMETERS:
6949
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6950
          del nic[name]
6951
    # osparams
6952
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6953
    for name in self.op.osparams.keys():
6954
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
6955
        del self.op.osparams[name]
6956

    
6957
  def CheckPrereq(self):
6958
    """Check prerequisites.
6959

6960
    """
6961
    if self.op.mode == constants.INSTANCE_IMPORT:
6962
      export_info = self._ReadExportInfo()
6963
      self._ReadExportParams(export_info)
6964

    
6965
    _CheckDiskTemplate(self.op.disk_template)
6966

    
6967
    if (not self.cfg.GetVGName() and
6968
        self.op.disk_template not in constants.DTS_NOT_LVM):
6969
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6970
                                 " instances", errors.ECODE_STATE)
6971

    
6972
    if self.op.hypervisor is None:
6973
      self.op.hypervisor = self.cfg.GetHypervisorType()
6974

    
6975
    cluster = self.cfg.GetClusterInfo()
6976
    enabled_hvs = cluster.enabled_hypervisors
6977
    if self.op.hypervisor not in enabled_hvs:
6978
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6979
                                 " cluster (%s)" % (self.op.hypervisor,
6980
                                  ",".join(enabled_hvs)),
6981
                                 errors.ECODE_STATE)
6982

    
6983
    # check hypervisor parameter syntax (locally)
6984
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6985
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6986
                                      self.op.hvparams)
6987
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6988
    hv_type.CheckParameterSyntax(filled_hvp)
6989
    self.hv_full = filled_hvp
6990
    # check that we don't specify global parameters on an instance
6991
    _CheckGlobalHvParams(self.op.hvparams)
6992

    
6993
    # fill and remember the beparams dict
6994
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6995
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
6996

    
6997
    # build os parameters
6998
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6999

    
7000
    # now that hvp/bep are in final format, let's reset to defaults,
7001
    # if told to do so
7002
    if self.op.identify_defaults:
7003
      self._RevertToDefaults(cluster)
7004

    
7005
    # NIC buildup
7006
    self.nics = []
7007
    for idx, nic in enumerate(self.op.nics):
7008
      nic_mode_req = nic.get("mode", None)
7009
      nic_mode = nic_mode_req
7010
      if nic_mode is None:
7011
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7012

    
7013
      # in routed mode, for the first nic, the default ip is 'auto'
7014
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7015
        default_ip_mode = constants.VALUE_AUTO
7016
      else:
7017
        default_ip_mode = constants.VALUE_NONE
7018

    
7019
      # ip validity checks
7020
      ip = nic.get("ip", default_ip_mode)
7021
      if ip is None or ip.lower() == constants.VALUE_NONE:
7022
        nic_ip = None
7023
      elif ip.lower() == constants.VALUE_AUTO:
7024
        if not self.op.name_check:
7025
          raise errors.OpPrereqError("IP address set to auto but name checks"
7026
                                     " have been skipped. Aborting.",
7027
                                     errors.ECODE_INVAL)
7028
        nic_ip = self.hostname1.ip
7029
      else:
7030
        if not netutils.IsValidIP4(ip):
7031
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
7032
                                     " like a valid IP" % ip,
7033
                                     errors.ECODE_INVAL)
7034
        nic_ip = ip
7035

    
7036
      # TODO: check the ip address for uniqueness
7037
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7038
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7039
                                   errors.ECODE_INVAL)
7040

    
7041
      # MAC address verification
7042
      mac = nic.get("mac", constants.VALUE_AUTO)
7043
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7044
        mac = utils.NormalizeAndValidateMac(mac)
7045

    
7046
        try:
7047
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7048
        except errors.ReservationError:
7049
          raise errors.OpPrereqError("MAC address %s already in use"
7050
                                     " in cluster" % mac,
7051
                                     errors.ECODE_NOTUNIQUE)
7052

    
7053
      # bridge verification
7054
      bridge = nic.get("bridge", None)
7055
      link = nic.get("link", None)
7056
      if bridge and link:
7057
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7058
                                   " at the same time", errors.ECODE_INVAL)
7059
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7060
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7061
                                   errors.ECODE_INVAL)
7062
      elif bridge:
7063
        link = bridge
7064

    
7065
      nicparams = {}
7066
      if nic_mode_req:
7067
        nicparams[constants.NIC_MODE] = nic_mode_req
7068
      if link:
7069
        nicparams[constants.NIC_LINK] = link
7070

    
7071
      check_params = cluster.SimpleFillNIC(nicparams)
7072
      objects.NIC.CheckParameterSyntax(check_params)
7073
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7074

    
7075
    # disk checks/pre-build
7076
    self.disks = []
7077
    for disk in self.op.disks:
7078
      mode = disk.get("mode", constants.DISK_RDWR)
7079
      if mode not in constants.DISK_ACCESS_SET:
7080
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7081
                                   mode, errors.ECODE_INVAL)
7082
      size = disk.get("size", None)
7083
      if size is None:
7084
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7085
      try:
7086
        size = int(size)
7087
      except (TypeError, ValueError):
7088
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7089
                                   errors.ECODE_INVAL)
7090
      new_disk = {"size": size, "mode": mode}
7091
      if "adopt" in disk:
7092
        new_disk["adopt"] = disk["adopt"]
7093
      self.disks.append(new_disk)
7094

    
7095
    if self.op.mode == constants.INSTANCE_IMPORT:
7096

    
7097
      # Check that the new instance doesn't have less disks than the export
7098
      instance_disks = len(self.disks)
7099
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7100
      if instance_disks < export_disks:
7101
        raise errors.OpPrereqError("Not enough disks to import."
7102
                                   " (instance: %d, export: %d)" %
7103
                                   (instance_disks, export_disks),
7104
                                   errors.ECODE_INVAL)
7105

    
7106
      disk_images = []
7107
      for idx in range(export_disks):
7108
        option = 'disk%d_dump' % idx
7109
        if export_info.has_option(constants.INISECT_INS, option):
7110
          # FIXME: are the old os-es, disk sizes, etc. useful?
7111
          export_name = export_info.get(constants.INISECT_INS, option)
7112
          image = utils.PathJoin(self.op.src_path, export_name)
7113
          disk_images.append(image)
7114
        else:
7115
          disk_images.append(False)
7116

    
7117
      self.src_images = disk_images
7118

    
7119
      old_name = export_info.get(constants.INISECT_INS, 'name')
7120
      try:
7121
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7122
      except (TypeError, ValueError), err:
7123
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7124
                                   " an integer: %s" % str(err),
7125
                                   errors.ECODE_STATE)
7126
      if self.op.instance_name == old_name:
7127
        for idx, nic in enumerate(self.nics):
7128
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7129
            nic_mac_ini = 'nic%d_mac' % idx
7130
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7131

    
7132
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7133

    
7134
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7135
    if self.op.ip_check:
7136
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7137
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7138
                                   (self.check_ip, self.op.instance_name),
7139
                                   errors.ECODE_NOTUNIQUE)
7140

    
7141
    #### mac address generation
7142
    # By generating here the mac address both the allocator and the hooks get
7143
    # the real final mac address rather than the 'auto' or 'generate' value.
7144
    # There is a race condition between the generation and the instance object
7145
    # creation, which means that we know the mac is valid now, but we're not
7146
    # sure it will be when we actually add the instance. If things go bad
7147
    # adding the instance will abort because of a duplicate mac, and the
7148
    # creation job will fail.
7149
    for nic in self.nics:
7150
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7151
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7152

    
7153
    #### allocator run
7154

    
7155
    if self.op.iallocator is not None:
7156
      self._RunAllocator()
7157

    
7158
    #### node related checks
7159

    
7160
    # check primary node
7161
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7162
    assert self.pnode is not None, \
7163
      "Cannot retrieve locked node %s" % self.op.pnode
7164
    if pnode.offline:
7165
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7166
                                 pnode.name, errors.ECODE_STATE)
7167
    if pnode.drained:
7168
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7169
                                 pnode.name, errors.ECODE_STATE)
7170

    
7171
    self.secondaries = []
7172

    
7173
    # mirror node verification
7174
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7175
      if self.op.snode == pnode.name:
7176
        raise errors.OpPrereqError("The secondary node cannot be the"
7177
                                   " primary node.", errors.ECODE_INVAL)
7178
      _CheckNodeOnline(self, self.op.snode)
7179
      _CheckNodeNotDrained(self, self.op.snode)
7180
      self.secondaries.append(self.op.snode)
7181

    
7182
    nodenames = [pnode.name] + self.secondaries
7183

    
7184
    req_size = _ComputeDiskSize(self.op.disk_template,
7185
                                self.disks)
7186

    
7187
    # Check lv size requirements, if not adopting
7188
    if req_size is not None and not self.adopt_disks:
7189
      _CheckNodesFreeDisk(self, nodenames, req_size)
7190

    
7191
    if self.adopt_disks: # instead, we must check the adoption data
7192
      all_lvs = set([i["adopt"] for i in self.disks])
7193
      if len(all_lvs) != len(self.disks):
7194
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7195
                                   errors.ECODE_INVAL)
7196
      for lv_name in all_lvs:
7197
        try:
7198
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7199
        except errors.ReservationError:
7200
          raise errors.OpPrereqError("LV named %s used by another instance" %
7201
                                     lv_name, errors.ECODE_NOTUNIQUE)
7202

    
7203
      node_lvs = self.rpc.call_lv_list([pnode.name],
7204
                                       self.cfg.GetVGName())[pnode.name]
7205
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7206
      node_lvs = node_lvs.payload
7207
      delta = all_lvs.difference(node_lvs.keys())
7208
      if delta:
7209
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7210
                                   utils.CommaJoin(delta),
7211
                                   errors.ECODE_INVAL)
7212
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7213
      if online_lvs:
7214
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7215
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7216
                                   errors.ECODE_STATE)
7217
      # update the size of disk based on what is found
7218
      for dsk in self.disks:
7219
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7220

    
7221
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7222

    
7223
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7224
    # check OS parameters (remotely)
7225
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7226

    
7227
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7228

    
7229
    # memory check on primary node
7230
    if self.op.start:
7231
      _CheckNodeFreeMemory(self, self.pnode.name,
7232
                           "creating instance %s" % self.op.instance_name,
7233
                           self.be_full[constants.BE_MEMORY],
7234
                           self.op.hypervisor)
7235

    
7236
    self.dry_run_result = list(nodenames)
7237

    
7238
  def Exec(self, feedback_fn):
7239
    """Create and add the instance to the cluster.
7240

7241
    """
7242
    instance = self.op.instance_name
7243
    pnode_name = self.pnode.name
7244

    
7245
    ht_kind = self.op.hypervisor
7246
    if ht_kind in constants.HTS_REQ_PORT:
7247
      network_port = self.cfg.AllocatePort()
7248
    else:
7249
      network_port = None
7250

    
7251
    if constants.ENABLE_FILE_STORAGE:
7252
      # this is needed because os.path.join does not accept None arguments
7253
      if self.op.file_storage_dir is None:
7254
        string_file_storage_dir = ""
7255
      else:
7256
        string_file_storage_dir = self.op.file_storage_dir
7257

    
7258
      # build the full file storage dir path
7259
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7260
                                        string_file_storage_dir, instance)
7261
    else:
7262
      file_storage_dir = ""
7263

    
7264
    disks = _GenerateDiskTemplate(self,
7265
                                  self.op.disk_template,
7266
                                  instance, pnode_name,
7267
                                  self.secondaries,
7268
                                  self.disks,
7269
                                  file_storage_dir,
7270
                                  self.op.file_driver,
7271
                                  0)
7272

    
7273
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7274
                            primary_node=pnode_name,
7275
                            nics=self.nics, disks=disks,
7276
                            disk_template=self.op.disk_template,
7277
                            admin_up=False,
7278
                            network_port=network_port,
7279
                            beparams=self.op.beparams,
7280
                            hvparams=self.op.hvparams,
7281
                            hypervisor=self.op.hypervisor,
7282
                            osparams=self.op.osparams,
7283
                            )
7284

    
7285
    if self.adopt_disks:
7286
      # rename LVs to the newly-generated names; we need to construct
7287
      # 'fake' LV disks with the old data, plus the new unique_id
7288
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7289
      rename_to = []
7290
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7291
        rename_to.append(t_dsk.logical_id)
7292
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7293
        self.cfg.SetDiskID(t_dsk, pnode_name)
7294
      result = self.rpc.call_blockdev_rename(pnode_name,
7295
                                             zip(tmp_disks, rename_to))
7296
      result.Raise("Failed to rename adoped LVs")
7297
    else:
7298
      feedback_fn("* creating instance disks...")
7299
      try:
7300
        _CreateDisks(self, iobj)
7301
      except errors.OpExecError:
7302
        self.LogWarning("Device creation failed, reverting...")
7303
        try:
7304
          _RemoveDisks(self, iobj)
7305
        finally:
7306
          self.cfg.ReleaseDRBDMinors(instance)
7307
          raise
7308

    
7309
    feedback_fn("adding instance %s to cluster config" % instance)
7310

    
7311
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7312

    
7313
    # Declare that we don't want to remove the instance lock anymore, as we've
7314
    # added the instance to the config
7315
    del self.remove_locks[locking.LEVEL_INSTANCE]
7316
    # Unlock all the nodes
7317
    if self.op.mode == constants.INSTANCE_IMPORT:
7318
      nodes_keep = [self.op.src_node]
7319
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7320
                       if node != self.op.src_node]
7321
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7322
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7323
    else:
7324
      self.context.glm.release(locking.LEVEL_NODE)
7325
      del self.acquired_locks[locking.LEVEL_NODE]
7326

    
7327
    if self.op.wait_for_sync:
7328
      disk_abort = not _WaitForSync(self, iobj)
7329
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7330
      # make sure the disks are not degraded (still sync-ing is ok)
7331
      time.sleep(15)
7332
      feedback_fn("* checking mirrors status")
7333
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7334
    else:
7335
      disk_abort = False
7336

    
7337
    if disk_abort:
7338
      _RemoveDisks(self, iobj)
7339
      self.cfg.RemoveInstance(iobj.name)
7340
      # Make sure the instance lock gets removed
7341
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7342
      raise errors.OpExecError("There are some degraded disks for"
7343
                               " this instance")
7344

    
7345
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7346
      if self.op.mode == constants.INSTANCE_CREATE:
7347
        if not self.op.no_install:
7348
          feedback_fn("* running the instance OS create scripts...")
7349
          # FIXME: pass debug option from opcode to backend
7350
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7351
                                                 self.op.debug_level)
7352
          result.Raise("Could not add os for instance %s"
7353
                       " on node %s" % (instance, pnode_name))
7354

    
7355
      elif self.op.mode == constants.INSTANCE_IMPORT:
7356
        feedback_fn("* running the instance OS import scripts...")
7357

    
7358
        transfers = []
7359

    
7360
        for idx, image in enumerate(self.src_images):
7361
          if not image:
7362
            continue
7363

    
7364
          # FIXME: pass debug option from opcode to backend
7365
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7366
                                             constants.IEIO_FILE, (image, ),
7367
                                             constants.IEIO_SCRIPT,
7368
                                             (iobj.disks[idx], idx),
7369
                                             None)
7370
          transfers.append(dt)
7371

    
7372
        import_result = \
7373
          masterd.instance.TransferInstanceData(self, feedback_fn,
7374
                                                self.op.src_node, pnode_name,
7375
                                                self.pnode.secondary_ip,
7376
                                                iobj, transfers)
7377
        if not compat.all(import_result):
7378
          self.LogWarning("Some disks for instance %s on node %s were not"
7379
                          " imported successfully" % (instance, pnode_name))
7380

    
7381
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7382
        feedback_fn("* preparing remote import...")
7383
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
7384
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7385

    
7386
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7387
                                                     self.source_x509_ca,
7388
                                                     self._cds, timeouts)
7389
        if not compat.all(disk_results):
7390
          # TODO: Should the instance still be started, even if some disks
7391
          # failed to import (valid for local imports, too)?
7392
          self.LogWarning("Some disks for instance %s on node %s were not"
7393
                          " imported successfully" % (instance, pnode_name))
7394

    
7395
        # Run rename script on newly imported instance
7396
        assert iobj.name == instance
7397
        feedback_fn("Running rename script for %s" % instance)
7398
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7399
                                                   self.source_instance_name,
7400
                                                   self.op.debug_level)
7401
        if result.fail_msg:
7402
          self.LogWarning("Failed to run rename script for %s on node"
7403
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7404

    
7405
      else:
7406
        # also checked in the prereq part
7407
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7408
                                     % self.op.mode)
7409

    
7410
    if self.op.start:
7411
      iobj.admin_up = True
7412
      self.cfg.Update(iobj, feedback_fn)
7413
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7414
      feedback_fn("* starting instance...")
7415
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7416
      result.Raise("Could not start instance")
7417

    
7418
    return list(iobj.all_nodes)
7419

    
7420

    
7421
class LUConnectConsole(NoHooksLU):
7422
  """Connect to an instance's console.
7423

7424
  This is somewhat special in that it returns the command line that
7425
  you need to run on the master node in order to connect to the
7426
  console.
7427

7428
  """
7429
  _OP_PARAMS = [
7430
    _PInstanceName
7431
    ]
7432
  REQ_BGL = False
7433

    
7434
  def ExpandNames(self):
7435
    self._ExpandAndLockInstance()
7436

    
7437
  def CheckPrereq(self):
7438
    """Check prerequisites.
7439

7440
    This checks that the instance is in the cluster.
7441

7442
    """
7443
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7444
    assert self.instance is not None, \
7445
      "Cannot retrieve locked instance %s" % self.op.instance_name
7446
    _CheckNodeOnline(self, self.instance.primary_node)
7447

    
7448
  def Exec(self, feedback_fn):
7449
    """Connect to the console of an instance
7450

7451
    """
7452
    instance = self.instance
7453
    node = instance.primary_node
7454

    
7455
    node_insts = self.rpc.call_instance_list([node],
7456
                                             [instance.hypervisor])[node]
7457
    node_insts.Raise("Can't get node information from %s" % node)
7458

    
7459
    if instance.name not in node_insts.payload:
7460
      raise errors.OpExecError("Instance %s is not running." % instance.name)
7461

    
7462
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7463

    
7464
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7465
    cluster = self.cfg.GetClusterInfo()
7466
    # beparams and hvparams are passed separately, to avoid editing the
7467
    # instance and then saving the defaults in the instance itself.
7468
    hvparams = cluster.FillHV(instance)
7469
    beparams = cluster.FillBE(instance)
7470
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7471

    
7472
    # build ssh cmdline
7473
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7474

    
7475

    
7476
class LUReplaceDisks(LogicalUnit):
7477
  """Replace the disks of an instance.
7478

7479
  """
7480
  HPATH = "mirrors-replace"
7481
  HTYPE = constants.HTYPE_INSTANCE
7482
  _OP_PARAMS = [
7483
    _PInstanceName,
7484
    ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7485
    ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7486
    ("remote_node", None, _TMaybeString),
7487
    ("iallocator", None, _TMaybeString),
7488
    ("early_release", False, _TBool),
7489
    ]
7490
  REQ_BGL = False
7491

    
7492
  def CheckArguments(self):
7493
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7494
                                  self.op.iallocator)
7495

    
7496
  def ExpandNames(self):
7497
    self._ExpandAndLockInstance()
7498

    
7499
    if self.op.iallocator is not None:
7500
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7501

    
7502
    elif self.op.remote_node is not None:
7503
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7504
      self.op.remote_node = remote_node
7505

    
7506
      # Warning: do not remove the locking of the new secondary here
7507
      # unless DRBD8.AddChildren is changed to work in parallel;
7508
      # currently it doesn't since parallel invocations of
7509
      # FindUnusedMinor will conflict
7510
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7511
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7512

    
7513
    else:
7514
      self.needed_locks[locking.LEVEL_NODE] = []
7515
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7516

    
7517
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7518
                                   self.op.iallocator, self.op.remote_node,
7519
                                   self.op.disks, False, self.op.early_release)
7520

    
7521
    self.tasklets = [self.replacer]
7522

    
7523
  def DeclareLocks(self, level):
7524
    # If we're not already locking all nodes in the set we have to declare the
7525
    # instance's primary/secondary nodes.
7526
    if (level == locking.LEVEL_NODE and
7527
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7528
      self._LockInstancesNodes()
7529

    
7530
  def BuildHooksEnv(self):
7531
    """Build hooks env.
7532

7533
    This runs on the master, the primary and all the secondaries.
7534

7535
    """
7536
    instance = self.replacer.instance
7537
    env = {
7538
      "MODE": self.op.mode,
7539
      "NEW_SECONDARY": self.op.remote_node,
7540
      "OLD_SECONDARY": instance.secondary_nodes[0],
7541
      }
7542
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7543
    nl = [
7544
      self.cfg.GetMasterNode(),
7545
      instance.primary_node,
7546
      ]
7547
    if self.op.remote_node is not None:
7548
      nl.append(self.op.remote_node)
7549
    return env, nl, nl
7550

    
7551

    
7552
class TLReplaceDisks(Tasklet):
7553
  """Replaces disks for an instance.
7554

7555
  Note: Locking is not within the scope of this class.
7556

7557
  """
7558
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7559
               disks, delay_iallocator, early_release):
7560
    """Initializes this class.
7561

7562
    """
7563
    Tasklet.__init__(self, lu)
7564

    
7565
    # Parameters
7566
    self.instance_name = instance_name
7567
    self.mode = mode
7568
    self.iallocator_name = iallocator_name
7569
    self.remote_node = remote_node
7570
    self.disks = disks
7571
    self.delay_iallocator = delay_iallocator
7572
    self.early_release = early_release
7573

    
7574
    # Runtime data
7575
    self.instance = None
7576
    self.new_node = None
7577
    self.target_node = None
7578
    self.other_node = None
7579
    self.remote_node_info = None
7580
    self.node_secondary_ip = None
7581

    
7582
  @staticmethod
7583
  def CheckArguments(mode, remote_node, iallocator):
7584
    """Helper function for users of this class.
7585

7586
    """
7587
    # check for valid parameter combination
7588
    if mode == constants.REPLACE_DISK_CHG:
7589
      if remote_node is None and iallocator is None:
7590
        raise errors.OpPrereqError("When changing the secondary either an"
7591
                                   " iallocator script must be used or the"
7592
                                   " new node given", errors.ECODE_INVAL)
7593

    
7594
      if remote_node is not None and iallocator is not None:
7595
        raise errors.OpPrereqError("Give either the iallocator or the new"
7596
                                   " secondary, not both", errors.ECODE_INVAL)
7597

    
7598
    elif remote_node is not None or iallocator is not None:
7599
      # Not replacing the secondary
7600
      raise errors.OpPrereqError("The iallocator and new node options can"
7601
                                 " only be used when changing the"
7602
                                 " secondary node", errors.ECODE_INVAL)
7603

    
7604
  @staticmethod
7605
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7606
    """Compute a new secondary node using an IAllocator.
7607

7608
    """
7609
    ial = IAllocator(lu.cfg, lu.rpc,
7610
                     mode=constants.IALLOCATOR_MODE_RELOC,
7611
                     name=instance_name,
7612
                     relocate_from=relocate_from)
7613

    
7614
    ial.Run(iallocator_name)
7615

    
7616
    if not ial.success:
7617
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7618
                                 " %s" % (iallocator_name, ial.info),
7619
                                 errors.ECODE_NORES)
7620

    
7621
    if len(ial.result) != ial.required_nodes:
7622
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7623
                                 " of nodes (%s), required %s" %
7624
                                 (iallocator_name,
7625
                                  len(ial.result), ial.required_nodes),
7626
                                 errors.ECODE_FAULT)
7627

    
7628
    remote_node_name = ial.result[0]
7629

    
7630
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7631
               instance_name, remote_node_name)
7632

    
7633
    return remote_node_name
7634

    
7635
  def _FindFaultyDisks(self, node_name):
7636
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7637
                                    node_name, True)
7638

    
7639
  def CheckPrereq(self):
7640
    """Check prerequisites.
7641

7642
    This checks that the instance is in the cluster.
7643

7644
    """
7645
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7646
    assert instance is not None, \
7647
      "Cannot retrieve locked instance %s" % self.instance_name
7648

    
7649
    if instance.disk_template != constants.DT_DRBD8:
7650
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7651
                                 " instances", errors.ECODE_INVAL)
7652

    
7653
    if len(instance.secondary_nodes) != 1:
7654
      raise errors.OpPrereqError("The instance has a strange layout,"
7655
                                 " expected one secondary but found %d" %
7656
                                 len(instance.secondary_nodes),
7657
                                 errors.ECODE_FAULT)
7658

    
7659
    if not self.delay_iallocator:
7660
      self._CheckPrereq2()
7661

    
7662
  def _CheckPrereq2(self):
7663
    """Check prerequisites, second part.
7664

7665
    This function should always be part of CheckPrereq. It was separated and is
7666
    now called from Exec because during node evacuation iallocator was only
7667
    called with an unmodified cluster model, not taking planned changes into
7668
    account.
7669

7670
    """
7671
    instance = self.instance
7672
    secondary_node = instance.secondary_nodes[0]
7673

    
7674
    if self.iallocator_name is None:
7675
      remote_node = self.remote_node
7676
    else:
7677
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7678
                                       instance.name, instance.secondary_nodes)
7679

    
7680
    if remote_node is not None:
7681
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7682
      assert self.remote_node_info is not None, \
7683
        "Cannot retrieve locked node %s" % remote_node
7684
    else:
7685
      self.remote_node_info = None
7686

    
7687
    if remote_node == self.instance.primary_node:
7688
      raise errors.OpPrereqError("The specified node is the primary node of"
7689
                                 " the instance.", errors.ECODE_INVAL)
7690

    
7691
    if remote_node == secondary_node:
7692
      raise errors.OpPrereqError("The specified node is already the"
7693
                                 " secondary node of the instance.",
7694
                                 errors.ECODE_INVAL)
7695

    
7696
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7697
                                    constants.REPLACE_DISK_CHG):
7698
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7699
                                 errors.ECODE_INVAL)
7700

    
7701
    if self.mode == constants.REPLACE_DISK_AUTO:
7702
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7703
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7704

    
7705
      if faulty_primary and faulty_secondary:
7706
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7707
                                   " one node and can not be repaired"
7708
                                   " automatically" % self.instance_name,
7709
                                   errors.ECODE_STATE)
7710

    
7711
      if faulty_primary:
7712
        self.disks = faulty_primary
7713
        self.target_node = instance.primary_node
7714
        self.other_node = secondary_node
7715
        check_nodes = [self.target_node, self.other_node]
7716
      elif faulty_secondary:
7717
        self.disks = faulty_secondary
7718
        self.target_node = secondary_node
7719
        self.other_node = instance.primary_node
7720
        check_nodes = [self.target_node, self.other_node]
7721
      else:
7722
        self.disks = []
7723
        check_nodes = []
7724

    
7725
    else:
7726
      # Non-automatic modes
7727
      if self.mode == constants.REPLACE_DISK_PRI:
7728
        self.target_node = instance.primary_node
7729
        self.other_node = secondary_node
7730
        check_nodes = [self.target_node, self.other_node]
7731

    
7732
      elif self.mode == constants.REPLACE_DISK_SEC:
7733
        self.target_node = secondary_node
7734
        self.other_node = instance.primary_node
7735
        check_nodes = [self.target_node, self.other_node]
7736

    
7737
      elif self.mode == constants.REPLACE_DISK_CHG:
7738
        self.new_node = remote_node
7739
        self.other_node = instance.primary_node
7740
        self.target_node = secondary_node
7741
        check_nodes = [self.new_node, self.other_node]
7742

    
7743
        _CheckNodeNotDrained(self.lu, remote_node)
7744

    
7745
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7746
        assert old_node_info is not None
7747
        if old_node_info.offline and not self.early_release:
7748
          # doesn't make sense to delay the release
7749
          self.early_release = True
7750
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7751
                          " early-release mode", secondary_node)
7752

    
7753
      else:
7754
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7755
                                     self.mode)
7756

    
7757
      # If not specified all disks should be replaced
7758
      if not self.disks:
7759
        self.disks = range(len(self.instance.disks))
7760

    
7761
    for node in check_nodes:
7762
      _CheckNodeOnline(self.lu, node)
7763

    
7764
    # Check whether disks are valid
7765
    for disk_idx in self.disks:
7766
      instance.FindDisk(disk_idx)
7767

    
7768
    # Get secondary node IP addresses
7769
    node_2nd_ip = {}
7770

    
7771
    for node_name in [self.target_node, self.other_node, self.new_node]:
7772
      if node_name is not None:
7773
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7774

    
7775
    self.node_secondary_ip = node_2nd_ip
7776

    
7777
  def Exec(self, feedback_fn):
7778
    """Execute disk replacement.
7779

7780
    This dispatches the disk replacement to the appropriate handler.
7781

7782
    """
7783
    if self.delay_iallocator:
7784
      self._CheckPrereq2()
7785

    
7786
    if not self.disks:
7787
      feedback_fn("No disks need replacement")
7788
      return
7789

    
7790
    feedback_fn("Replacing disk(s) %s for %s" %
7791
                (utils.CommaJoin(self.disks), self.instance.name))
7792

    
7793
    activate_disks = (not self.instance.admin_up)
7794

    
7795
    # Activate the instance disks if we're replacing them on a down instance
7796
    if activate_disks:
7797
      _StartInstanceDisks(self.lu, self.instance, True)
7798

    
7799
    try:
7800
      # Should we replace the secondary node?
7801
      if self.new_node is not None:
7802
        fn = self._ExecDrbd8Secondary
7803
      else:
7804
        fn = self._ExecDrbd8DiskOnly
7805

    
7806
      return fn(feedback_fn)
7807

    
7808
    finally:
7809
      # Deactivate the instance disks if we're replacing them on a
7810
      # down instance
7811
      if activate_disks:
7812
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7813

    
7814
  def _CheckVolumeGroup(self, nodes):
7815
    self.lu.LogInfo("Checking volume groups")
7816

    
7817
    vgname = self.cfg.GetVGName()
7818

    
7819
    # Make sure volume group exists on all involved nodes
7820
    results = self.rpc.call_vg_list(nodes)
7821
    if not results:
7822
      raise errors.OpExecError("Can't list volume groups on the nodes")
7823

    
7824
    for node in nodes:
7825
      res = results[node]
7826
      res.Raise("Error checking node %s" % node)
7827
      if vgname not in res.payload:
7828
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7829
                                 (vgname, node))
7830

    
7831
  def _CheckDisksExistence(self, nodes):
7832
    # Check disk existence
7833
    for idx, dev in enumerate(self.instance.disks):
7834
      if idx not in self.disks:
7835
        continue
7836

    
7837
      for node in nodes:
7838
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7839
        self.cfg.SetDiskID(dev, node)
7840

    
7841
        result = self.rpc.call_blockdev_find(node, dev)
7842

    
7843
        msg = result.fail_msg
7844
        if msg or not result.payload:
7845
          if not msg:
7846
            msg = "disk not found"
7847
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7848
                                   (idx, node, msg))
7849

    
7850
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7851
    for idx, dev in enumerate(self.instance.disks):
7852
      if idx not in self.disks:
7853
        continue
7854

    
7855
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7856
                      (idx, node_name))
7857

    
7858
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7859
                                   ldisk=ldisk):
7860
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7861
                                 " replace disks for instance %s" %
7862
                                 (node_name, self.instance.name))
7863

    
7864
  def _CreateNewStorage(self, node_name):
7865
    vgname = self.cfg.GetVGName()
7866
    iv_names = {}
7867

    
7868
    for idx, dev in enumerate(self.instance.disks):
7869
      if idx not in self.disks:
7870
        continue
7871

    
7872
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7873

    
7874
      self.cfg.SetDiskID(dev, node_name)
7875

    
7876
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7877
      names = _GenerateUniqueNames(self.lu, lv_names)
7878

    
7879
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7880
                             logical_id=(vgname, names[0]))
7881
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7882
                             logical_id=(vgname, names[1]))
7883

    
7884
      new_lvs = [lv_data, lv_meta]
7885
      old_lvs = dev.children
7886
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7887

    
7888
      # we pass force_create=True to force the LVM creation
7889
      for new_lv in new_lvs:
7890
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7891
                        _GetInstanceInfoText(self.instance), False)
7892

    
7893
    return iv_names
7894

    
7895
  def _CheckDevices(self, node_name, iv_names):
7896
    for name, (dev, _, _) in iv_names.iteritems():
7897
      self.cfg.SetDiskID(dev, node_name)
7898

    
7899
      result = self.rpc.call_blockdev_find(node_name, dev)
7900

    
7901
      msg = result.fail_msg
7902
      if msg or not result.payload:
7903
        if not msg:
7904
          msg = "disk not found"
7905
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7906
                                 (name, msg))
7907

    
7908
      if result.payload.is_degraded:
7909
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7910

    
7911
  def _RemoveOldStorage(self, node_name, iv_names):
7912
    for name, (_, old_lvs, _) in iv_names.iteritems():
7913
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7914

    
7915
      for lv in old_lvs:
7916
        self.cfg.SetDiskID(lv, node_name)
7917

    
7918
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7919
        if msg:
7920
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7921
                             hint="remove unused LVs manually")
7922

    
7923
  def _ReleaseNodeLock(self, node_name):
7924
    """Releases the lock for a given node."""
7925
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7926

    
7927
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7928
    """Replace a disk on the primary or secondary for DRBD 8.
7929

7930
    The algorithm for replace is quite complicated:
7931

7932
      1. for each disk to be replaced:
7933

7934
        1. create new LVs on the target node with unique names
7935
        1. detach old LVs from the drbd device
7936
        1. rename old LVs to name_replaced.<time_t>
7937
        1. rename new LVs to old LVs
7938
        1. attach the new LVs (with the old names now) to the drbd device
7939

7940
      1. wait for sync across all devices
7941

7942
      1. for each modified disk:
7943

7944
        1. remove old LVs (which have the name name_replaces.<time_t>)
7945

7946
    Failures are not very well handled.
7947

7948
    """
7949
    steps_total = 6
7950

    
7951
    # Step: check device activation
7952
    self.lu.LogStep(1, steps_total, "Check device existence")
7953
    self._CheckDisksExistence([self.other_node, self.target_node])
7954
    self._CheckVolumeGroup([self.target_node, self.other_node])
7955

    
7956
    # Step: check other node consistency
7957
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7958
    self._CheckDisksConsistency(self.other_node,
7959
                                self.other_node == self.instance.primary_node,
7960
                                False)
7961

    
7962
    # Step: create new storage
7963
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7964
    iv_names = self._CreateNewStorage(self.target_node)
7965

    
7966
    # Step: for each lv, detach+rename*2+attach
7967
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7968
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7969
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7970

    
7971
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7972
                                                     old_lvs)
7973
      result.Raise("Can't detach drbd from local storage on node"
7974
                   " %s for device %s" % (self.target_node, dev.iv_name))
7975
      #dev.children = []
7976
      #cfg.Update(instance)
7977

    
7978
      # ok, we created the new LVs, so now we know we have the needed
7979
      # storage; as such, we proceed on the target node to rename
7980
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7981
      # using the assumption that logical_id == physical_id (which in
7982
      # turn is the unique_id on that node)
7983

    
7984
      # FIXME(iustin): use a better name for the replaced LVs
7985
      temp_suffix = int(time.time())
7986
      ren_fn = lambda d, suff: (d.physical_id[0],
7987
                                d.physical_id[1] + "_replaced-%s" % suff)
7988

    
7989
      # Build the rename list based on what LVs exist on the node
7990
      rename_old_to_new = []
7991
      for to_ren in old_lvs:
7992
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7993
        if not result.fail_msg and result.payload:
7994
          # device exists
7995
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7996

    
7997
      self.lu.LogInfo("Renaming the old LVs on the target node")
7998
      result = self.rpc.call_blockdev_rename(self.target_node,
7999
                                             rename_old_to_new)
8000
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8001

    
8002
      # Now we rename the new LVs to the old LVs
8003
      self.lu.LogInfo("Renaming the new LVs on the target node")
8004
      rename_new_to_old = [(new, old.physical_id)
8005
                           for old, new in zip(old_lvs, new_lvs)]
8006
      result = self.rpc.call_blockdev_rename(self.target_node,
8007
                                             rename_new_to_old)
8008
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8009

    
8010
      for old, new in zip(old_lvs, new_lvs):
8011
        new.logical_id = old.logical_id
8012
        self.cfg.SetDiskID(new, self.target_node)
8013

    
8014
      for disk in old_lvs:
8015
        disk.logical_id = ren_fn(disk, temp_suffix)
8016
        self.cfg.SetDiskID(disk, self.target_node)
8017

    
8018
      # Now that the new lvs have the old name, we can add them to the device
8019
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8020
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8021
                                                  new_lvs)
8022
      msg = result.fail_msg
8023
      if msg:
8024
        for new_lv in new_lvs:
8025
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8026
                                               new_lv).fail_msg
8027
          if msg2:
8028
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8029
                               hint=("cleanup manually the unused logical"
8030
                                     "volumes"))
8031
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8032

    
8033
      dev.children = new_lvs
8034

    
8035
      self.cfg.Update(self.instance, feedback_fn)
8036

    
8037
    cstep = 5
8038
    if self.early_release:
8039
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8040
      cstep += 1
8041
      self._RemoveOldStorage(self.target_node, iv_names)
8042
      # WARNING: we release both node locks here, do not do other RPCs
8043
      # than WaitForSync to the primary node
8044
      self._ReleaseNodeLock([self.target_node, self.other_node])
8045

    
8046
    # Wait for sync
8047
    # This can fail as the old devices are degraded and _WaitForSync
8048
    # does a combined result over all disks, so we don't check its return value
8049
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8050
    cstep += 1
8051
    _WaitForSync(self.lu, self.instance)
8052

    
8053
    # Check all devices manually
8054
    self._CheckDevices(self.instance.primary_node, iv_names)
8055

    
8056
    # Step: remove old storage
8057
    if not self.early_release:
8058
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8059
      cstep += 1
8060
      self._RemoveOldStorage(self.target_node, iv_names)
8061

    
8062
  def _ExecDrbd8Secondary(self, feedback_fn):
8063
    """Replace the secondary node for DRBD 8.
8064

8065
    The algorithm for replace is quite complicated:
8066
      - for all disks of the instance:
8067
        - create new LVs on the new node with same names
8068
        - shutdown the drbd device on the old secondary
8069
        - disconnect the drbd network on the primary
8070
        - create the drbd device on the new secondary
8071
        - network attach the drbd on the primary, using an artifice:
8072
          the drbd code for Attach() will connect to the network if it
8073
          finds a device which is connected to the good local disks but
8074
          not network enabled
8075
      - wait for sync across all devices
8076
      - remove all disks from the old secondary
8077

8078
    Failures are not very well handled.
8079

8080
    """
8081
    steps_total = 6
8082

    
8083
    # Step: check device activation
8084
    self.lu.LogStep(1, steps_total, "Check device existence")
8085
    self._CheckDisksExistence([self.instance.primary_node])
8086
    self._CheckVolumeGroup([self.instance.primary_node])
8087

    
8088
    # Step: check other node consistency
8089
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8090
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8091

    
8092
    # Step: create new storage
8093
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8094
    for idx, dev in enumerate(self.instance.disks):
8095
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8096
                      (self.new_node, idx))
8097
      # we pass force_create=True to force LVM creation
8098
      for new_lv in dev.children:
8099
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8100
                        _GetInstanceInfoText(self.instance), False)
8101

    
8102
    # Step 4: dbrd minors and drbd setups changes
8103
    # after this, we must manually remove the drbd minors on both the
8104
    # error and the success paths
8105
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8106
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8107
                                         for dev in self.instance.disks],
8108
                                        self.instance.name)
8109
    logging.debug("Allocated minors %r", minors)
8110

    
8111
    iv_names = {}
8112
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8113
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8114
                      (self.new_node, idx))
8115
      # create new devices on new_node; note that we create two IDs:
8116
      # one without port, so the drbd will be activated without
8117
      # networking information on the new node at this stage, and one
8118
      # with network, for the latter activation in step 4
8119
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8120
      if self.instance.primary_node == o_node1:
8121
        p_minor = o_minor1
8122
      else:
8123
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8124
        p_minor = o_minor2
8125

    
8126
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8127
                      p_minor, new_minor, o_secret)
8128
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8129
                    p_minor, new_minor, o_secret)
8130

    
8131
      iv_names[idx] = (dev, dev.children, new_net_id)
8132
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8133
                    new_net_id)
8134
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8135
                              logical_id=new_alone_id,
8136
                              children=dev.children,
8137
                              size=dev.size)
8138
      try:
8139
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8140
                              _GetInstanceInfoText(self.instance), False)
8141
      except errors.GenericError:
8142
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8143
        raise
8144

    
8145
    # We have new devices, shutdown the drbd on the old secondary
8146
    for idx, dev in enumerate(self.instance.disks):
8147
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8148
      self.cfg.SetDiskID(dev, self.target_node)
8149
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8150
      if msg:
8151
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8152
                           "node: %s" % (idx, msg),
8153
                           hint=("Please cleanup this device manually as"
8154
                                 " soon as possible"))
8155

    
8156
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8157
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8158
                                               self.node_secondary_ip,
8159
                                               self.instance.disks)\
8160
                                              [self.instance.primary_node]
8161

    
8162
    msg = result.fail_msg
8163
    if msg:
8164
      # detaches didn't succeed (unlikely)
8165
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8166
      raise errors.OpExecError("Can't detach the disks from the network on"
8167
                               " old node: %s" % (msg,))
8168

    
8169
    # if we managed to detach at least one, we update all the disks of
8170
    # the instance to point to the new secondary
8171
    self.lu.LogInfo("Updating instance configuration")
8172
    for dev, _, new_logical_id in iv_names.itervalues():
8173
      dev.logical_id = new_logical_id
8174
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8175

    
8176
    self.cfg.Update(self.instance, feedback_fn)
8177

    
8178
    # and now perform the drbd attach
8179
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8180
                    " (standalone => connected)")
8181
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8182
                                            self.new_node],
8183
                                           self.node_secondary_ip,
8184
                                           self.instance.disks,
8185
                                           self.instance.name,
8186
                                           False)
8187
    for to_node, to_result in result.items():
8188
      msg = to_result.fail_msg
8189
      if msg:
8190
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8191
                           to_node, msg,
8192
                           hint=("please do a gnt-instance info to see the"
8193
                                 " status of disks"))
8194
    cstep = 5
8195
    if self.early_release:
8196
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8197
      cstep += 1
8198
      self._RemoveOldStorage(self.target_node, iv_names)
8199
      # WARNING: we release all node locks here, do not do other RPCs
8200
      # than WaitForSync to the primary node
8201
      self._ReleaseNodeLock([self.instance.primary_node,
8202
                             self.target_node,
8203
                             self.new_node])
8204

    
8205
    # Wait for sync
8206
    # This can fail as the old devices are degraded and _WaitForSync
8207
    # does a combined result over all disks, so we don't check its return value
8208
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8209
    cstep += 1
8210
    _WaitForSync(self.lu, self.instance)
8211

    
8212
    # Check all devices manually
8213
    self._CheckDevices(self.instance.primary_node, iv_names)
8214

    
8215
    # Step: remove old storage
8216
    if not self.early_release:
8217
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8218
      self._RemoveOldStorage(self.target_node, iv_names)
8219

    
8220

    
8221
class LURepairNodeStorage(NoHooksLU):
8222
  """Repairs the volume group on a node.
8223

8224
  """
8225
  _OP_PARAMS = [
8226
    _PNodeName,
8227
    ("storage_type", _NoDefault, _CheckStorageType),
8228
    ("name", _NoDefault, _TNonEmptyString),
8229
    ("ignore_consistency", False, _TBool),
8230
    ]
8231
  REQ_BGL = False
8232

    
8233
  def CheckArguments(self):
8234
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8235

    
8236
    storage_type = self.op.storage_type
8237

    
8238
    if (constants.SO_FIX_CONSISTENCY not in
8239
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8240
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8241
                                 " repaired" % storage_type,
8242
                                 errors.ECODE_INVAL)
8243

    
8244
  def ExpandNames(self):
8245
    self.needed_locks = {
8246
      locking.LEVEL_NODE: [self.op.node_name],
8247
      }
8248

    
8249
  def _CheckFaultyDisks(self, instance, node_name):
8250
    """Ensure faulty disks abort the opcode or at least warn."""
8251
    try:
8252
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8253
                                  node_name, True):
8254
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8255
                                   " node '%s'" % (instance.name, node_name),
8256
                                   errors.ECODE_STATE)
8257
    except errors.OpPrereqError, err:
8258
      if self.op.ignore_consistency:
8259
        self.proc.LogWarning(str(err.args[0]))
8260
      else:
8261
        raise
8262

    
8263
  def CheckPrereq(self):
8264
    """Check prerequisites.
8265

8266
    """
8267
    # Check whether any instance on this node has faulty disks
8268
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8269
      if not inst.admin_up:
8270
        continue
8271
      check_nodes = set(inst.all_nodes)
8272
      check_nodes.discard(self.op.node_name)
8273
      for inst_node_name in check_nodes:
8274
        self._CheckFaultyDisks(inst, inst_node_name)
8275

    
8276
  def Exec(self, feedback_fn):
8277
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8278
                (self.op.name, self.op.node_name))
8279

    
8280
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8281
    result = self.rpc.call_storage_execute(self.op.node_name,
8282
                                           self.op.storage_type, st_args,
8283
                                           self.op.name,
8284
                                           constants.SO_FIX_CONSISTENCY)
8285
    result.Raise("Failed to repair storage unit '%s' on %s" %
8286
                 (self.op.name, self.op.node_name))
8287

    
8288

    
8289
class LUNodeEvacuationStrategy(NoHooksLU):
8290
  """Computes the node evacuation strategy.
8291

8292
  """
8293
  _OP_PARAMS = [
8294
    ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8295
    ("remote_node", None, _TMaybeString),
8296
    ("iallocator", None, _TMaybeString),
8297
    ]
8298
  REQ_BGL = False
8299

    
8300
  def CheckArguments(self):
8301
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8302

    
8303
  def ExpandNames(self):
8304
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8305
    self.needed_locks = locks = {}
8306
    if self.op.remote_node is None:
8307
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8308
    else:
8309
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8310
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8311

    
8312
  def Exec(self, feedback_fn):
8313
    if self.op.remote_node is not None:
8314
      instances = []
8315
      for node in self.op.nodes:
8316
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8317
      result = []
8318
      for i in instances:
8319
        if i.primary_node == self.op.remote_node:
8320
          raise errors.OpPrereqError("Node %s is the primary node of"
8321
                                     " instance %s, cannot use it as"
8322
                                     " secondary" %
8323
                                     (self.op.remote_node, i.name),
8324
                                     errors.ECODE_INVAL)
8325
        result.append([i.name, self.op.remote_node])
8326
    else:
8327
      ial = IAllocator(self.cfg, self.rpc,
8328
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8329
                       evac_nodes=self.op.nodes)
8330
      ial.Run(self.op.iallocator, validate=True)
8331
      if not ial.success:
8332
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8333
                                 errors.ECODE_NORES)
8334
      result = ial.result
8335
    return result
8336

    
8337

    
8338
class LUGrowDisk(LogicalUnit):
8339
  """Grow a disk of an instance.
8340

8341
  """
8342
  HPATH = "disk-grow"
8343
  HTYPE = constants.HTYPE_INSTANCE
8344
  _OP_PARAMS = [
8345
    _PInstanceName,
8346
    ("disk", _NoDefault, _TInt),
8347
    ("amount", _NoDefault, _TInt),
8348
    ("wait_for_sync", True, _TBool),
8349
    ]
8350
  REQ_BGL = False
8351

    
8352
  def ExpandNames(self):
8353
    self._ExpandAndLockInstance()
8354
    self.needed_locks[locking.LEVEL_NODE] = []
8355
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8356

    
8357
  def DeclareLocks(self, level):
8358
    if level == locking.LEVEL_NODE:
8359
      self._LockInstancesNodes()
8360

    
8361
  def BuildHooksEnv(self):
8362
    """Build hooks env.
8363

8364
    This runs on the master, the primary and all the secondaries.
8365

8366
    """
8367
    env = {
8368
      "DISK": self.op.disk,
8369
      "AMOUNT": self.op.amount,
8370
      }
8371
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8372
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8373
    return env, nl, nl
8374

    
8375
  def CheckPrereq(self):
8376
    """Check prerequisites.
8377

8378
    This checks that the instance is in the cluster.
8379

8380
    """
8381
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8382
    assert instance is not None, \
8383
      "Cannot retrieve locked instance %s" % self.op.instance_name
8384
    nodenames = list(instance.all_nodes)
8385
    for node in nodenames:
8386
      _CheckNodeOnline(self, node)
8387

    
8388
    self.instance = instance
8389

    
8390
    if instance.disk_template not in constants.DTS_GROWABLE:
8391
      raise errors.OpPrereqError("Instance's disk layout does not support"
8392
                                 " growing.", errors.ECODE_INVAL)
8393

    
8394
    self.disk = instance.FindDisk(self.op.disk)
8395

    
8396
    if instance.disk_template != constants.DT_FILE:
8397
      # TODO: check the free disk space for file, when that feature will be
8398
      # supported
8399
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8400

    
8401
  def Exec(self, feedback_fn):
8402
    """Execute disk grow.
8403

8404
    """
8405
    instance = self.instance
8406
    disk = self.disk
8407

    
8408
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8409
    if not disks_ok:
8410
      raise errors.OpExecError("Cannot activate block device to grow")
8411

    
8412
    for node in instance.all_nodes:
8413
      self.cfg.SetDiskID(disk, node)
8414
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8415
      result.Raise("Grow request failed to node %s" % node)
8416

    
8417
      # TODO: Rewrite code to work properly
8418
      # DRBD goes into sync mode for a short amount of time after executing the
8419
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8420
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8421
      # time is a work-around.
8422
      time.sleep(5)
8423

    
8424
    disk.RecordGrow(self.op.amount)
8425
    self.cfg.Update(instance, feedback_fn)
8426
    if self.op.wait_for_sync:
8427
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8428
      if disk_abort:
8429
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8430
                             " status.\nPlease check the instance.")
8431
      if not instance.admin_up:
8432
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8433
    elif not instance.admin_up:
8434
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8435
                           " not supposed to be running because no wait for"
8436
                           " sync mode was requested.")
8437

    
8438

    
8439
class LUQueryInstanceData(NoHooksLU):
8440
  """Query runtime instance data.
8441

8442
  """
8443
  _OP_PARAMS = [
8444
    ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8445
    ("static", False, _TBool),
8446
    ]
8447
  REQ_BGL = False
8448

    
8449
  def ExpandNames(self):
8450
    self.needed_locks = {}
8451
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8452

    
8453
    if self.op.instances:
8454
      self.wanted_names = []
8455
      for name in self.op.instances:
8456
        full_name = _ExpandInstanceName(self.cfg, name)
8457
        self.wanted_names.append(full_name)
8458
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8459
    else:
8460
      self.wanted_names = None
8461
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8462

    
8463
    self.needed_locks[locking.LEVEL_NODE] = []
8464
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8465

    
8466
  def DeclareLocks(self, level):
8467
    if level == locking.LEVEL_NODE:
8468
      self._LockInstancesNodes()
8469

    
8470
  def CheckPrereq(self):
8471
    """Check prerequisites.
8472

8473
    This only checks the optional instance list against the existing names.
8474

8475
    """
8476
    if self.wanted_names is None:
8477
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8478

    
8479
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8480
                             in self.wanted_names]
8481

    
8482
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8483
    """Returns the status of a block device
8484

8485
    """
8486
    if self.op.static or not node:
8487
      return None
8488

    
8489
    self.cfg.SetDiskID(dev, node)
8490

    
8491
    result = self.rpc.call_blockdev_find(node, dev)
8492
    if result.offline:
8493
      return None
8494

    
8495
    result.Raise("Can't compute disk status for %s" % instance_name)
8496

    
8497
    status = result.payload
8498
    if status is None:
8499
      return None
8500

    
8501
    return (status.dev_path, status.major, status.minor,
8502
            status.sync_percent, status.estimated_time,
8503
            status.is_degraded, status.ldisk_status)
8504

    
8505
  def _ComputeDiskStatus(self, instance, snode, dev):
8506
    """Compute block device status.
8507

8508
    """
8509
    if dev.dev_type in constants.LDS_DRBD:
8510
      # we change the snode then (otherwise we use the one passed in)
8511
      if dev.logical_id[0] == instance.primary_node:
8512
        snode = dev.logical_id[1]
8513
      else:
8514
        snode = dev.logical_id[0]
8515

    
8516
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8517
                                              instance.name, dev)
8518
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8519

    
8520
    if dev.children:
8521
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8522
                      for child in dev.children]
8523
    else:
8524
      dev_children = []
8525

    
8526
    data = {
8527
      "iv_name": dev.iv_name,
8528
      "dev_type": dev.dev_type,
8529
      "logical_id": dev.logical_id,
8530
      "physical_id": dev.physical_id,
8531
      "pstatus": dev_pstatus,
8532
      "sstatus": dev_sstatus,
8533
      "children": dev_children,
8534
      "mode": dev.mode,
8535
      "size": dev.size,
8536
      }
8537

    
8538
    return data
8539

    
8540
  def Exec(self, feedback_fn):
8541
    """Gather and return data"""
8542
    result = {}
8543

    
8544
    cluster = self.cfg.GetClusterInfo()
8545

    
8546
    for instance in self.wanted_instances:
8547
      if not self.op.static:
8548
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8549
                                                  instance.name,
8550
                                                  instance.hypervisor)
8551
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8552
        remote_info = remote_info.payload
8553
        if remote_info and "state" in remote_info:
8554
          remote_state = "up"
8555
        else:
8556
          remote_state = "down"
8557
      else:
8558
        remote_state = None
8559
      if instance.admin_up:
8560
        config_state = "up"
8561
      else:
8562
        config_state = "down"
8563

    
8564
      disks = [self._ComputeDiskStatus(instance, None, device)
8565
               for device in instance.disks]
8566

    
8567
      idict = {
8568
        "name": instance.name,
8569
        "config_state": config_state,
8570
        "run_state": remote_state,
8571
        "pnode": instance.primary_node,
8572
        "snodes": instance.secondary_nodes,
8573
        "os": instance.os,
8574
        # this happens to be the same format used for hooks
8575
        "nics": _NICListToTuple(self, instance.nics),
8576
        "disk_template": instance.disk_template,
8577
        "disks": disks,
8578
        "hypervisor": instance.hypervisor,
8579
        "network_port": instance.network_port,
8580
        "hv_instance": instance.hvparams,
8581
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8582
        "be_instance": instance.beparams,
8583
        "be_actual": cluster.FillBE(instance),
8584
        "os_instance": instance.osparams,
8585
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8586
        "serial_no": instance.serial_no,
8587
        "mtime": instance.mtime,
8588
        "ctime": instance.ctime,
8589
        "uuid": instance.uuid,
8590
        }
8591

    
8592
      result[instance.name] = idict
8593

    
8594
    return result
8595

    
8596

    
8597
class LUSetInstanceParams(LogicalUnit):
8598
  """Modifies an instances's parameters.
8599

8600
  """
8601
  HPATH = "instance-modify"
8602
  HTYPE = constants.HTYPE_INSTANCE
8603
  _OP_PARAMS = [
8604
    _PInstanceName,
8605
    ("nics", _EmptyList, _TList),
8606
    ("disks", _EmptyList, _TList),
8607
    ("beparams", _EmptyDict, _TDict),
8608
    ("hvparams", _EmptyDict, _TDict),
8609
    ("disk_template", None, _TMaybeString),
8610
    ("remote_node", None, _TMaybeString),
8611
    ("os_name", None, _TMaybeString),
8612
    ("force_variant", False, _TBool),
8613
    ("osparams", None, _TOr(_TDict, _TNone)),
8614
    _PForce,
8615
    ]
8616
  REQ_BGL = False
8617

    
8618
  def CheckArguments(self):
8619
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8620
            self.op.hvparams or self.op.beparams or self.op.os_name):
8621
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8622

    
8623
    if self.op.hvparams:
8624
      _CheckGlobalHvParams(self.op.hvparams)
8625

    
8626
    # Disk validation
8627
    disk_addremove = 0
8628
    for disk_op, disk_dict in self.op.disks:
8629
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8630
      if disk_op == constants.DDM_REMOVE:
8631
        disk_addremove += 1
8632
        continue
8633
      elif disk_op == constants.DDM_ADD:
8634
        disk_addremove += 1
8635
      else:
8636
        if not isinstance(disk_op, int):
8637
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8638
        if not isinstance(disk_dict, dict):
8639
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8640
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8641

    
8642
      if disk_op == constants.DDM_ADD:
8643
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8644
        if mode not in constants.DISK_ACCESS_SET:
8645
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8646
                                     errors.ECODE_INVAL)
8647
        size = disk_dict.get('size', None)
8648
        if size is None:
8649
          raise errors.OpPrereqError("Required disk parameter size missing",
8650
                                     errors.ECODE_INVAL)
8651
        try:
8652
          size = int(size)
8653
        except (TypeError, ValueError), err:
8654
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8655
                                     str(err), errors.ECODE_INVAL)
8656
        disk_dict['size'] = size
8657
      else:
8658
        # modification of disk
8659
        if 'size' in disk_dict:
8660
          raise errors.OpPrereqError("Disk size change not possible, use"
8661
                                     " grow-disk", errors.ECODE_INVAL)
8662

    
8663
    if disk_addremove > 1:
8664
      raise errors.OpPrereqError("Only one disk add or remove operation"
8665
                                 " supported at a time", errors.ECODE_INVAL)
8666

    
8667
    if self.op.disks and self.op.disk_template is not None:
8668
      raise errors.OpPrereqError("Disk template conversion and other disk"
8669
                                 " changes not supported at the same time",
8670
                                 errors.ECODE_INVAL)
8671

    
8672
    if self.op.disk_template:
8673
      _CheckDiskTemplate(self.op.disk_template)
8674
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8675
          self.op.remote_node is None):
8676
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8677
                                   " one requires specifying a secondary node",
8678
                                   errors.ECODE_INVAL)
8679

    
8680
    # NIC validation
8681
    nic_addremove = 0
8682
    for nic_op, nic_dict in self.op.nics:
8683
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8684
      if nic_op == constants.DDM_REMOVE:
8685
        nic_addremove += 1
8686
        continue
8687
      elif nic_op == constants.DDM_ADD:
8688
        nic_addremove += 1
8689
      else:
8690
        if not isinstance(nic_op, int):
8691
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8692
        if not isinstance(nic_dict, dict):
8693
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8694
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8695

    
8696
      # nic_dict should be a dict
8697
      nic_ip = nic_dict.get('ip', None)
8698
      if nic_ip is not None:
8699
        if nic_ip.lower() == constants.VALUE_NONE:
8700
          nic_dict['ip'] = None
8701
        else:
8702
          if not netutils.IsValidIP4(nic_ip):
8703
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8704
                                       errors.ECODE_INVAL)
8705

    
8706
      nic_bridge = nic_dict.get('bridge', None)
8707
      nic_link = nic_dict.get('link', None)
8708
      if nic_bridge and nic_link:
8709
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8710
                                   " at the same time", errors.ECODE_INVAL)
8711
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8712
        nic_dict['bridge'] = None
8713
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8714
        nic_dict['link'] = None
8715

    
8716
      if nic_op == constants.DDM_ADD:
8717
        nic_mac = nic_dict.get('mac', None)
8718
        if nic_mac is None:
8719
          nic_dict['mac'] = constants.VALUE_AUTO
8720

    
8721
      if 'mac' in nic_dict:
8722
        nic_mac = nic_dict['mac']
8723
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8724
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8725

    
8726
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8727
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8728
                                     " modifying an existing nic",
8729
                                     errors.ECODE_INVAL)
8730

    
8731
    if nic_addremove > 1:
8732
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8733
                                 " supported at a time", errors.ECODE_INVAL)
8734

    
8735
  def ExpandNames(self):
8736
    self._ExpandAndLockInstance()
8737
    self.needed_locks[locking.LEVEL_NODE] = []
8738
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8739

    
8740
  def DeclareLocks(self, level):
8741
    if level == locking.LEVEL_NODE:
8742
      self._LockInstancesNodes()
8743
      if self.op.disk_template and self.op.remote_node:
8744
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8745
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8746

    
8747
  def BuildHooksEnv(self):
8748
    """Build hooks env.
8749

8750
    This runs on the master, primary and secondaries.
8751

8752
    """
8753
    args = dict()
8754
    if constants.BE_MEMORY in self.be_new:
8755
      args['memory'] = self.be_new[constants.BE_MEMORY]
8756
    if constants.BE_VCPUS in self.be_new:
8757
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8758
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8759
    # information at all.
8760
    if self.op.nics:
8761
      args['nics'] = []
8762
      nic_override = dict(self.op.nics)
8763
      for idx, nic in enumerate(self.instance.nics):
8764
        if idx in nic_override:
8765
          this_nic_override = nic_override[idx]
8766
        else:
8767
          this_nic_override = {}
8768
        if 'ip' in this_nic_override:
8769
          ip = this_nic_override['ip']
8770
        else:
8771
          ip = nic.ip
8772
        if 'mac' in this_nic_override:
8773
          mac = this_nic_override['mac']
8774
        else:
8775
          mac = nic.mac
8776
        if idx in self.nic_pnew:
8777
          nicparams = self.nic_pnew[idx]
8778
        else:
8779
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8780
        mode = nicparams[constants.NIC_MODE]
8781
        link = nicparams[constants.NIC_LINK]
8782
        args['nics'].append((ip, mac, mode, link))
8783
      if constants.DDM_ADD in nic_override:
8784
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8785
        mac = nic_override[constants.DDM_ADD]['mac']
8786
        nicparams = self.nic_pnew[constants.DDM_ADD]
8787
        mode = nicparams[constants.NIC_MODE]
8788
        link = nicparams[constants.NIC_LINK]
8789
        args['nics'].append((ip, mac, mode, link))
8790
      elif constants.DDM_REMOVE in nic_override:
8791
        del args['nics'][-1]
8792

    
8793
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8794
    if self.op.disk_template:
8795
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8796
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8797
    return env, nl, nl
8798

    
8799
  def CheckPrereq(self):
8800
    """Check prerequisites.
8801

8802
    This only checks the instance list against the existing names.
8803

8804
    """
8805
    # checking the new params on the primary/secondary nodes
8806

    
8807
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8808
    cluster = self.cluster = self.cfg.GetClusterInfo()
8809
    assert self.instance is not None, \
8810
      "Cannot retrieve locked instance %s" % self.op.instance_name
8811
    pnode = instance.primary_node
8812
    nodelist = list(instance.all_nodes)
8813

    
8814
    # OS change
8815
    if self.op.os_name and not self.op.force:
8816
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8817
                      self.op.force_variant)
8818
      instance_os = self.op.os_name
8819
    else:
8820
      instance_os = instance.os
8821

    
8822
    if self.op.disk_template:
8823
      if instance.disk_template == self.op.disk_template:
8824
        raise errors.OpPrereqError("Instance already has disk template %s" %
8825
                                   instance.disk_template, errors.ECODE_INVAL)
8826

    
8827
      if (instance.disk_template,
8828
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8829
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8830
                                   " %s to %s" % (instance.disk_template,
8831
                                                  self.op.disk_template),
8832
                                   errors.ECODE_INVAL)
8833
      _CheckInstanceDown(self, instance, "cannot change disk template")
8834
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8835
        if self.op.remote_node == pnode:
8836
          raise errors.OpPrereqError("Given new secondary node %s is the same"
8837
                                     " as the primary node of the instance" %
8838
                                     self.op.remote_node, errors.ECODE_STATE)
8839
        _CheckNodeOnline(self, self.op.remote_node)
8840
        _CheckNodeNotDrained(self, self.op.remote_node)
8841
        disks = [{"size": d.size} for d in instance.disks]
8842
        required = _ComputeDiskSize(self.op.disk_template, disks)
8843
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8844

    
8845
    # hvparams processing
8846
    if self.op.hvparams:
8847
      hv_type = instance.hypervisor
8848
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8849
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8850
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8851

    
8852
      # local check
8853
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8854
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8855
      self.hv_new = hv_new # the new actual values
8856
      self.hv_inst = i_hvdict # the new dict (without defaults)
8857
    else:
8858
      self.hv_new = self.hv_inst = {}
8859

    
8860
    # beparams processing
8861
    if self.op.beparams:
8862
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8863
                                   use_none=True)
8864
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8865
      be_new = cluster.SimpleFillBE(i_bedict)
8866
      self.be_new = be_new # the new actual values
8867
      self.be_inst = i_bedict # the new dict (without defaults)
8868
    else:
8869
      self.be_new = self.be_inst = {}
8870

    
8871
    # osparams processing
8872
    if self.op.osparams:
8873
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8874
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8875
      self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8876
      self.os_inst = i_osdict # the new dict (without defaults)
8877
    else:
8878
      self.os_new = self.os_inst = {}
8879

    
8880
    self.warn = []
8881

    
8882
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8883
      mem_check_list = [pnode]
8884
      if be_new[constants.BE_AUTO_BALANCE]:
8885
        # either we changed auto_balance to yes or it was from before
8886
        mem_check_list.extend(instance.secondary_nodes)
8887
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8888
                                                  instance.hypervisor)
8889
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8890
                                         instance.hypervisor)
8891
      pninfo = nodeinfo[pnode]
8892
      msg = pninfo.fail_msg
8893
      if msg:
8894
        # Assume the primary node is unreachable and go ahead
8895
        self.warn.append("Can't get info from primary node %s: %s" %
8896
                         (pnode,  msg))
8897
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8898
        self.warn.append("Node data from primary node %s doesn't contain"
8899
                         " free memory information" % pnode)
8900
      elif instance_info.fail_msg:
8901
        self.warn.append("Can't get instance runtime information: %s" %
8902
                        instance_info.fail_msg)
8903
      else:
8904
        if instance_info.payload:
8905
          current_mem = int(instance_info.payload['memory'])
8906
        else:
8907
          # Assume instance not running
8908
          # (there is a slight race condition here, but it's not very probable,
8909
          # and we have no other way to check)
8910
          current_mem = 0
8911
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8912
                    pninfo.payload['memory_free'])
8913
        if miss_mem > 0:
8914
          raise errors.OpPrereqError("This change will prevent the instance"
8915
                                     " from starting, due to %d MB of memory"
8916
                                     " missing on its primary node" % miss_mem,
8917
                                     errors.ECODE_NORES)
8918

    
8919
      if be_new[constants.BE_AUTO_BALANCE]:
8920
        for node, nres in nodeinfo.items():
8921
          if node not in instance.secondary_nodes:
8922
            continue
8923
          msg = nres.fail_msg
8924
          if msg:
8925
            self.warn.append("Can't get info from secondary node %s: %s" %
8926
                             (node, msg))
8927
          elif not isinstance(nres.payload.get('memory_free', None), int):
8928
            self.warn.append("Secondary node %s didn't return free"
8929
                             " memory information" % node)
8930
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8931
            self.warn.append("Not enough memory to failover instance to"
8932
                             " secondary node %s" % node)
8933

    
8934
    # NIC processing
8935
    self.nic_pnew = {}
8936
    self.nic_pinst = {}
8937
    for nic_op, nic_dict in self.op.nics:
8938
      if nic_op == constants.DDM_REMOVE:
8939
        if not instance.nics:
8940
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8941
                                     errors.ECODE_INVAL)
8942
        continue
8943
      if nic_op != constants.DDM_ADD:
8944
        # an existing nic
8945
        if not instance.nics:
8946
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8947
                                     " no NICs" % nic_op,
8948
                                     errors.ECODE_INVAL)
8949
        if nic_op < 0 or nic_op >= len(instance.nics):
8950
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8951
                                     " are 0 to %d" %
8952
                                     (nic_op, len(instance.nics) - 1),
8953
                                     errors.ECODE_INVAL)
8954
        old_nic_params = instance.nics[nic_op].nicparams
8955
        old_nic_ip = instance.nics[nic_op].ip
8956
      else:
8957
        old_nic_params = {}
8958
        old_nic_ip = None
8959

    
8960
      update_params_dict = dict([(key, nic_dict[key])
8961
                                 for key in constants.NICS_PARAMETERS
8962
                                 if key in nic_dict])
8963

    
8964
      if 'bridge' in nic_dict:
8965
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8966

    
8967
      new_nic_params = _GetUpdatedParams(old_nic_params,
8968
                                         update_params_dict)
8969
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8970
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8971
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8972
      self.nic_pinst[nic_op] = new_nic_params
8973
      self.nic_pnew[nic_op] = new_filled_nic_params
8974
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8975

    
8976
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8977
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8978
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8979
        if msg:
8980
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8981
          if self.op.force:
8982
            self.warn.append(msg)
8983
          else:
8984
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8985
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8986
        if 'ip' in nic_dict:
8987
          nic_ip = nic_dict['ip']
8988
        else:
8989
          nic_ip = old_nic_ip
8990
        if nic_ip is None:
8991
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8992
                                     ' on a routed nic', errors.ECODE_INVAL)
8993
      if 'mac' in nic_dict:
8994
        nic_mac = nic_dict['mac']
8995
        if nic_mac is None:
8996
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8997
                                     errors.ECODE_INVAL)
8998
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8999
          # otherwise generate the mac
9000
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9001
        else:
9002
          # or validate/reserve the current one
9003
          try:
9004
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9005
          except errors.ReservationError:
9006
            raise errors.OpPrereqError("MAC address %s already in use"
9007
                                       " in cluster" % nic_mac,
9008
                                       errors.ECODE_NOTUNIQUE)
9009

    
9010
    # DISK processing
9011
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9012
      raise errors.OpPrereqError("Disk operations not supported for"
9013
                                 " diskless instances",
9014
                                 errors.ECODE_INVAL)
9015
    for disk_op, _ in self.op.disks:
9016
      if disk_op == constants.DDM_REMOVE:
9017
        if len(instance.disks) == 1:
9018
          raise errors.OpPrereqError("Cannot remove the last disk of"
9019
                                     " an instance", errors.ECODE_INVAL)
9020
        _CheckInstanceDown(self, instance, "cannot remove disks")
9021

    
9022
      if (disk_op == constants.DDM_ADD and
9023
          len(instance.nics) >= constants.MAX_DISKS):
9024
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9025
                                   " add more" % constants.MAX_DISKS,
9026
                                   errors.ECODE_STATE)
9027
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9028
        # an existing disk
9029
        if disk_op < 0 or disk_op >= len(instance.disks):
9030
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9031
                                     " are 0 to %d" %
9032
                                     (disk_op, len(instance.disks)),
9033
                                     errors.ECODE_INVAL)
9034

    
9035
    return
9036

    
9037
  def _ConvertPlainToDrbd(self, feedback_fn):
9038
    """Converts an instance from plain to drbd.
9039

9040
    """
9041
    feedback_fn("Converting template to drbd")
9042
    instance = self.instance
9043
    pnode = instance.primary_node
9044
    snode = self.op.remote_node
9045

    
9046
    # create a fake disk info for _GenerateDiskTemplate
9047
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9048
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9049
                                      instance.name, pnode, [snode],
9050
                                      disk_info, None, None, 0)
9051
    info = _GetInstanceInfoText(instance)
9052
    feedback_fn("Creating aditional volumes...")
9053
    # first, create the missing data and meta devices
9054
    for disk in new_disks:
9055
      # unfortunately this is... not too nice
9056
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9057
                            info, True)
9058
      for child in disk.children:
9059
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9060
    # at this stage, all new LVs have been created, we can rename the
9061
    # old ones
9062
    feedback_fn("Renaming original volumes...")
9063
    rename_list = [(o, n.children[0].logical_id)
9064
                   for (o, n) in zip(instance.disks, new_disks)]
9065
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9066
    result.Raise("Failed to rename original LVs")
9067

    
9068
    feedback_fn("Initializing DRBD devices...")
9069
    # all child devices are in place, we can now create the DRBD devices
9070
    for disk in new_disks:
9071
      for node in [pnode, snode]:
9072
        f_create = node == pnode
9073
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9074

    
9075
    # at this point, the instance has been modified
9076
    instance.disk_template = constants.DT_DRBD8
9077
    instance.disks = new_disks
9078
    self.cfg.Update(instance, feedback_fn)
9079

    
9080
    # disks are created, waiting for sync
9081
    disk_abort = not _WaitForSync(self, instance)
9082
    if disk_abort:
9083
      raise errors.OpExecError("There are some degraded disks for"
9084
                               " this instance, please cleanup manually")
9085

    
9086
  def _ConvertDrbdToPlain(self, feedback_fn):
9087
    """Converts an instance from drbd to plain.
9088

9089
    """
9090
    instance = self.instance
9091
    assert len(instance.secondary_nodes) == 1
9092
    pnode = instance.primary_node
9093
    snode = instance.secondary_nodes[0]
9094
    feedback_fn("Converting template to plain")
9095

    
9096
    old_disks = instance.disks
9097
    new_disks = [d.children[0] for d in old_disks]
9098

    
9099
    # copy over size and mode
9100
    for parent, child in zip(old_disks, new_disks):
9101
      child.size = parent.size
9102
      child.mode = parent.mode
9103

    
9104
    # update instance structure
9105
    instance.disks = new_disks
9106
    instance.disk_template = constants.DT_PLAIN
9107
    self.cfg.Update(instance, feedback_fn)
9108

    
9109
    feedback_fn("Removing volumes on the secondary node...")
9110
    for disk in old_disks:
9111
      self.cfg.SetDiskID(disk, snode)
9112
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9113
      if msg:
9114
        self.LogWarning("Could not remove block device %s on node %s,"
9115
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9116

    
9117
    feedback_fn("Removing unneeded volumes on the primary node...")
9118
    for idx, disk in enumerate(old_disks):
9119
      meta = disk.children[1]
9120
      self.cfg.SetDiskID(meta, pnode)
9121
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9122
      if msg:
9123
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9124
                        " continuing anyway: %s", idx, pnode, msg)
9125

    
9126

    
9127
  def Exec(self, feedback_fn):
9128
    """Modifies an instance.
9129

9130
    All parameters take effect only at the next restart of the instance.
9131

9132
    """
9133
    # Process here the warnings from CheckPrereq, as we don't have a
9134
    # feedback_fn there.
9135
    for warn in self.warn:
9136
      feedback_fn("WARNING: %s" % warn)
9137

    
9138
    result = []
9139
    instance = self.instance
9140
    # disk changes
9141
    for disk_op, disk_dict in self.op.disks:
9142
      if disk_op == constants.DDM_REMOVE:
9143
        # remove the last disk
9144
        device = instance.disks.pop()
9145
        device_idx = len(instance.disks)
9146
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9147
          self.cfg.SetDiskID(disk, node)
9148
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9149
          if msg:
9150
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9151
                            " continuing anyway", device_idx, node, msg)
9152
        result.append(("disk/%d" % device_idx, "remove"))
9153
      elif disk_op == constants.DDM_ADD:
9154
        # add a new disk
9155
        if instance.disk_template == constants.DT_FILE:
9156
          file_driver, file_path = instance.disks[0].logical_id
9157
          file_path = os.path.dirname(file_path)
9158
        else:
9159
          file_driver = file_path = None
9160
        disk_idx_base = len(instance.disks)
9161
        new_disk = _GenerateDiskTemplate(self,
9162
                                         instance.disk_template,
9163
                                         instance.name, instance.primary_node,
9164
                                         instance.secondary_nodes,
9165
                                         [disk_dict],
9166
                                         file_path,
9167
                                         file_driver,
9168
                                         disk_idx_base)[0]
9169
        instance.disks.append(new_disk)
9170
        info = _GetInstanceInfoText(instance)
9171

    
9172
        logging.info("Creating volume %s for instance %s",
9173
                     new_disk.iv_name, instance.name)
9174
        # Note: this needs to be kept in sync with _CreateDisks
9175
        #HARDCODE
9176
        for node in instance.all_nodes:
9177
          f_create = node == instance.primary_node
9178
          try:
9179
            _CreateBlockDev(self, node, instance, new_disk,
9180
                            f_create, info, f_create)
9181
          except errors.OpExecError, err:
9182
            self.LogWarning("Failed to create volume %s (%s) on"
9183
                            " node %s: %s",
9184
                            new_disk.iv_name, new_disk, node, err)
9185
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9186
                       (new_disk.size, new_disk.mode)))
9187
      else:
9188
        # change a given disk
9189
        instance.disks[disk_op].mode = disk_dict['mode']
9190
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9191

    
9192
    if self.op.disk_template:
9193
      r_shut = _ShutdownInstanceDisks(self, instance)
9194
      if not r_shut:
9195
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9196
                                 " proceed with disk template conversion")
9197
      mode = (instance.disk_template, self.op.disk_template)
9198
      try:
9199
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9200
      except:
9201
        self.cfg.ReleaseDRBDMinors(instance.name)
9202
        raise
9203
      result.append(("disk_template", self.op.disk_template))
9204

    
9205
    # NIC changes
9206
    for nic_op, nic_dict in self.op.nics:
9207
      if nic_op == constants.DDM_REMOVE:
9208
        # remove the last nic
9209
        del instance.nics[-1]
9210
        result.append(("nic.%d" % len(instance.nics), "remove"))
9211
      elif nic_op == constants.DDM_ADD:
9212
        # mac and bridge should be set, by now
9213
        mac = nic_dict['mac']
9214
        ip = nic_dict.get('ip', None)
9215
        nicparams = self.nic_pinst[constants.DDM_ADD]
9216
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9217
        instance.nics.append(new_nic)
9218
        result.append(("nic.%d" % (len(instance.nics) - 1),
9219
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9220
                       (new_nic.mac, new_nic.ip,
9221
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9222
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9223
                       )))
9224
      else:
9225
        for key in 'mac', 'ip':
9226
          if key in nic_dict:
9227
            setattr(instance.nics[nic_op], key, nic_dict[key])
9228
        if nic_op in self.nic_pinst:
9229
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9230
        for key, val in nic_dict.iteritems():
9231
          result.append(("nic.%s/%d" % (key, nic_op), val))
9232

    
9233
    # hvparams changes
9234
    if self.op.hvparams:
9235
      instance.hvparams = self.hv_inst
9236
      for key, val in self.op.hvparams.iteritems():
9237
        result.append(("hv/%s" % key, val))
9238

    
9239
    # beparams changes
9240
    if self.op.beparams:
9241
      instance.beparams = self.be_inst
9242
      for key, val in self.op.beparams.iteritems():
9243
        result.append(("be/%s" % key, val))
9244

    
9245
    # OS change
9246
    if self.op.os_name:
9247
      instance.os = self.op.os_name
9248

    
9249
    # osparams changes
9250
    if self.op.osparams:
9251
      instance.osparams = self.os_inst
9252
      for key, val in self.op.osparams.iteritems():
9253
        result.append(("os/%s" % key, val))
9254

    
9255
    self.cfg.Update(instance, feedback_fn)
9256

    
9257
    return result
9258

    
9259
  _DISK_CONVERSIONS = {
9260
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9261
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9262
    }
9263

    
9264

    
9265
class LUQueryExports(NoHooksLU):
9266
  """Query the exports list
9267

9268
  """
9269
  _OP_PARAMS = [
9270
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9271
    ("use_locking", False, _TBool),
9272
    ]
9273
  REQ_BGL = False
9274

    
9275
  def ExpandNames(self):
9276
    self.needed_locks = {}
9277
    self.share_locks[locking.LEVEL_NODE] = 1
9278
    if not self.op.nodes:
9279
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9280
    else:
9281
      self.needed_locks[locking.LEVEL_NODE] = \
9282
        _GetWantedNodes(self, self.op.nodes)
9283

    
9284
  def Exec(self, feedback_fn):
9285
    """Compute the list of all the exported system images.
9286

9287
    @rtype: dict
9288
    @return: a dictionary with the structure node->(export-list)
9289
        where export-list is a list of the instances exported on
9290
        that node.
9291

9292
    """
9293
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9294
    rpcresult = self.rpc.call_export_list(self.nodes)
9295
    result = {}
9296
    for node in rpcresult:
9297
      if rpcresult[node].fail_msg:
9298
        result[node] = False
9299
      else:
9300
        result[node] = rpcresult[node].payload
9301

    
9302
    return result
9303

    
9304

    
9305
class LUPrepareExport(NoHooksLU):
9306
  """Prepares an instance for an export and returns useful information.
9307

9308
  """
9309
  _OP_PARAMS = [
9310
    _PInstanceName,
9311
    ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9312
    ]
9313
  REQ_BGL = False
9314

    
9315
  def ExpandNames(self):
9316
    self._ExpandAndLockInstance()
9317

    
9318
  def CheckPrereq(self):
9319
    """Check prerequisites.
9320

9321
    """
9322
    instance_name = self.op.instance_name
9323

    
9324
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9325
    assert self.instance is not None, \
9326
          "Cannot retrieve locked instance %s" % self.op.instance_name
9327
    _CheckNodeOnline(self, self.instance.primary_node)
9328

    
9329
    self._cds = _GetClusterDomainSecret()
9330

    
9331
  def Exec(self, feedback_fn):
9332
    """Prepares an instance for an export.
9333

9334
    """
9335
    instance = self.instance
9336

    
9337
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9338
      salt = utils.GenerateSecret(8)
9339

    
9340
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9341
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9342
                                              constants.RIE_CERT_VALIDITY)
9343
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9344

    
9345
      (name, cert_pem) = result.payload
9346

    
9347
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9348
                                             cert_pem)
9349

    
9350
      return {
9351
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9352
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9353
                          salt),
9354
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9355
        }
9356

    
9357
    return None
9358

    
9359

    
9360
class LUExportInstance(LogicalUnit):
9361
  """Export an instance to an image in the cluster.
9362

9363
  """
9364
  HPATH = "instance-export"
9365
  HTYPE = constants.HTYPE_INSTANCE
9366
  _OP_PARAMS = [
9367
    _PInstanceName,
9368
    ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9369
    ("shutdown", True, _TBool),
9370
    _PShutdownTimeout,
9371
    ("remove_instance", False, _TBool),
9372
    ("ignore_remove_failures", False, _TBool),
9373
    ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9374
    ("x509_key_name", None, _TOr(_TList, _TNone)),
9375
    ("destination_x509_ca", None, _TMaybeString),
9376
    ]
9377
  REQ_BGL = False
9378

    
9379
  def CheckArguments(self):
9380
    """Check the arguments.
9381

9382
    """
9383
    self.x509_key_name = self.op.x509_key_name
9384
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9385

    
9386
    if self.op.remove_instance and not self.op.shutdown:
9387
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9388
                                 " down before")
9389

    
9390
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9391
      if not self.x509_key_name:
9392
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9393
                                   errors.ECODE_INVAL)
9394

    
9395
      if not self.dest_x509_ca_pem:
9396
        raise errors.OpPrereqError("Missing destination X509 CA",
9397
                                   errors.ECODE_INVAL)
9398

    
9399
  def ExpandNames(self):
9400
    self._ExpandAndLockInstance()
9401

    
9402
    # Lock all nodes for local exports
9403
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9404
      # FIXME: lock only instance primary and destination node
9405
      #
9406
      # Sad but true, for now we have do lock all nodes, as we don't know where
9407
      # the previous export might be, and in this LU we search for it and
9408
      # remove it from its current node. In the future we could fix this by:
9409
      #  - making a tasklet to search (share-lock all), then create the
9410
      #    new one, then one to remove, after
9411
      #  - removing the removal operation altogether
9412
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9413

    
9414
  def DeclareLocks(self, level):
9415
    """Last minute lock declaration."""
9416
    # All nodes are locked anyway, so nothing to do here.
9417

    
9418
  def BuildHooksEnv(self):
9419
    """Build hooks env.
9420

9421
    This will run on the master, primary node and target node.
9422

9423
    """
9424
    env = {
9425
      "EXPORT_MODE": self.op.mode,
9426
      "EXPORT_NODE": self.op.target_node,
9427
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9428
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9429
      # TODO: Generic function for boolean env variables
9430
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9431
      }
9432

    
9433
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9434

    
9435
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9436

    
9437
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9438
      nl.append(self.op.target_node)
9439

    
9440
    return env, nl, nl
9441

    
9442
  def CheckPrereq(self):
9443
    """Check prerequisites.
9444

9445
    This checks that the instance and node names are valid.
9446

9447
    """
9448
    instance_name = self.op.instance_name
9449

    
9450
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9451
    assert self.instance is not None, \
9452
          "Cannot retrieve locked instance %s" % self.op.instance_name
9453
    _CheckNodeOnline(self, self.instance.primary_node)
9454

    
9455
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9456
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9457
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9458
      assert self.dst_node is not None
9459

    
9460
      _CheckNodeOnline(self, self.dst_node.name)
9461
      _CheckNodeNotDrained(self, self.dst_node.name)
9462

    
9463
      self._cds = None
9464
      self.dest_disk_info = None
9465
      self.dest_x509_ca = None
9466

    
9467
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9468
      self.dst_node = None
9469

    
9470
      if len(self.op.target_node) != len(self.instance.disks):
9471
        raise errors.OpPrereqError(("Received destination information for %s"
9472
                                    " disks, but instance %s has %s disks") %
9473
                                   (len(self.op.target_node), instance_name,
9474
                                    len(self.instance.disks)),
9475
                                   errors.ECODE_INVAL)
9476

    
9477
      cds = _GetClusterDomainSecret()
9478

    
9479
      # Check X509 key name
9480
      try:
9481
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9482
      except (TypeError, ValueError), err:
9483
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9484

    
9485
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9486
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9487
                                   errors.ECODE_INVAL)
9488

    
9489
      # Load and verify CA
9490
      try:
9491
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9492
      except OpenSSL.crypto.Error, err:
9493
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9494
                                   (err, ), errors.ECODE_INVAL)
9495

    
9496
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9497
      if errcode is not None:
9498
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9499
                                   (msg, ), errors.ECODE_INVAL)
9500

    
9501
      self.dest_x509_ca = cert
9502

    
9503
      # Verify target information
9504
      disk_info = []
9505
      for idx, disk_data in enumerate(self.op.target_node):
9506
        try:
9507
          (host, port, magic) = \
9508
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9509
        except errors.GenericError, err:
9510
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9511
                                     (idx, err), errors.ECODE_INVAL)
9512

    
9513
        disk_info.append((host, port, magic))
9514

    
9515
      assert len(disk_info) == len(self.op.target_node)
9516
      self.dest_disk_info = disk_info
9517

    
9518
    else:
9519
      raise errors.ProgrammerError("Unhandled export mode %r" %
9520
                                   self.op.mode)
9521

    
9522
    # instance disk type verification
9523
    # TODO: Implement export support for file-based disks
9524
    for disk in self.instance.disks:
9525
      if disk.dev_type == constants.LD_FILE:
9526
        raise errors.OpPrereqError("Export not supported for instances with"
9527
                                   " file-based disks", errors.ECODE_INVAL)
9528

    
9529
  def _CleanupExports(self, feedback_fn):
9530
    """Removes exports of current instance from all other nodes.
9531

9532
    If an instance in a cluster with nodes A..D was exported to node C, its
9533
    exports will be removed from the nodes A, B and D.
9534

9535
    """
9536
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9537

    
9538
    nodelist = self.cfg.GetNodeList()
9539
    nodelist.remove(self.dst_node.name)
9540

    
9541
    # on one-node clusters nodelist will be empty after the removal
9542
    # if we proceed the backup would be removed because OpQueryExports
9543
    # substitutes an empty list with the full cluster node list.
9544
    iname = self.instance.name
9545
    if nodelist:
9546
      feedback_fn("Removing old exports for instance %s" % iname)
9547
      exportlist = self.rpc.call_export_list(nodelist)
9548
      for node in exportlist:
9549
        if exportlist[node].fail_msg:
9550
          continue
9551
        if iname in exportlist[node].payload:
9552
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9553
          if msg:
9554
            self.LogWarning("Could not remove older export for instance %s"
9555
                            " on node %s: %s", iname, node, msg)
9556

    
9557
  def Exec(self, feedback_fn):
9558
    """Export an instance to an image in the cluster.
9559

9560
    """
9561
    assert self.op.mode in constants.EXPORT_MODES
9562

    
9563
    instance = self.instance
9564
    src_node = instance.primary_node
9565

    
9566
    if self.op.shutdown:
9567
      # shutdown the instance, but not the disks
9568
      feedback_fn("Shutting down instance %s" % instance.name)
9569
      result = self.rpc.call_instance_shutdown(src_node, instance,
9570
                                               self.op.shutdown_timeout)
9571
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9572
      result.Raise("Could not shutdown instance %s on"
9573
                   " node %s" % (instance.name, src_node))
9574

    
9575
    # set the disks ID correctly since call_instance_start needs the
9576
    # correct drbd minor to create the symlinks
9577
    for disk in instance.disks:
9578
      self.cfg.SetDiskID(disk, src_node)
9579

    
9580
    activate_disks = (not instance.admin_up)
9581

    
9582
    if activate_disks:
9583
      # Activate the instance disks if we'exporting a stopped instance
9584
      feedback_fn("Activating disks for %s" % instance.name)
9585
      _StartInstanceDisks(self, instance, None)
9586

    
9587
    try:
9588
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9589
                                                     instance)
9590

    
9591
      helper.CreateSnapshots()
9592
      try:
9593
        if (self.op.shutdown and instance.admin_up and
9594
            not self.op.remove_instance):
9595
          assert not activate_disks
9596
          feedback_fn("Starting instance %s" % instance.name)
9597
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9598
          msg = result.fail_msg
9599
          if msg:
9600
            feedback_fn("Failed to start instance: %s" % msg)
9601
            _ShutdownInstanceDisks(self, instance)
9602
            raise errors.OpExecError("Could not start instance: %s" % msg)
9603

    
9604
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9605
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9606
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9607
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9608
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9609

    
9610
          (key_name, _, _) = self.x509_key_name
9611

    
9612
          dest_ca_pem = \
9613
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9614
                                            self.dest_x509_ca)
9615

    
9616
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9617
                                                     key_name, dest_ca_pem,
9618
                                                     timeouts)
9619
      finally:
9620
        helper.Cleanup()
9621

    
9622
      # Check for backwards compatibility
9623
      assert len(dresults) == len(instance.disks)
9624
      assert compat.all(isinstance(i, bool) for i in dresults), \
9625
             "Not all results are boolean: %r" % dresults
9626

    
9627
    finally:
9628
      if activate_disks:
9629
        feedback_fn("Deactivating disks for %s" % instance.name)
9630
        _ShutdownInstanceDisks(self, instance)
9631

    
9632
    if not (compat.all(dresults) and fin_resu):
9633
      failures = []
9634
      if not fin_resu:
9635
        failures.append("export finalization")
9636
      if not compat.all(dresults):
9637
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9638
                               if not dsk)
9639
        failures.append("disk export: disk(s) %s" % fdsk)
9640

    
9641
      raise errors.OpExecError("Export failed, errors in %s" %
9642
                               utils.CommaJoin(failures))
9643

    
9644
    # At this point, the export was successful, we can cleanup/finish
9645

    
9646
    # Remove instance if requested
9647
    if self.op.remove_instance:
9648
      feedback_fn("Removing instance %s" % instance.name)
9649
      _RemoveInstance(self, feedback_fn, instance,
9650
                      self.op.ignore_remove_failures)
9651

    
9652
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9653
      self._CleanupExports(feedback_fn)
9654

    
9655
    return fin_resu, dresults
9656

    
9657

    
9658
class LURemoveExport(NoHooksLU):
9659
  """Remove exports related to the named instance.
9660

9661
  """
9662
  _OP_PARAMS = [
9663
    _PInstanceName,
9664
    ]
9665
  REQ_BGL = False
9666

    
9667
  def ExpandNames(self):
9668
    self.needed_locks = {}
9669
    # We need all nodes to be locked in order for RemoveExport to work, but we
9670
    # don't need to lock the instance itself, as nothing will happen to it (and
9671
    # we can remove exports also for a removed instance)
9672
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9673

    
9674
  def Exec(self, feedback_fn):
9675
    """Remove any export.
9676

9677
    """
9678
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9679
    # If the instance was not found we'll try with the name that was passed in.
9680
    # This will only work if it was an FQDN, though.
9681
    fqdn_warn = False
9682
    if not instance_name:
9683
      fqdn_warn = True
9684
      instance_name = self.op.instance_name
9685

    
9686
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9687
    exportlist = self.rpc.call_export_list(locked_nodes)
9688
    found = False
9689
    for node in exportlist:
9690
      msg = exportlist[node].fail_msg
9691
      if msg:
9692
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9693
        continue
9694
      if instance_name in exportlist[node].payload:
9695
        found = True
9696
        result = self.rpc.call_export_remove(node, instance_name)
9697
        msg = result.fail_msg
9698
        if msg:
9699
          logging.error("Could not remove export for instance %s"
9700
                        " on node %s: %s", instance_name, node, msg)
9701

    
9702
    if fqdn_warn and not found:
9703
      feedback_fn("Export not found. If trying to remove an export belonging"
9704
                  " to a deleted instance please use its Fully Qualified"
9705
                  " Domain Name.")
9706

    
9707

    
9708
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9709
  """Generic tags LU.
9710

9711
  This is an abstract class which is the parent of all the other tags LUs.
9712

9713
  """
9714

    
9715
  def ExpandNames(self):
9716
    self.needed_locks = {}
9717
    if self.op.kind == constants.TAG_NODE:
9718
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9719
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9720
    elif self.op.kind == constants.TAG_INSTANCE:
9721
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9722
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9723

    
9724
  def CheckPrereq(self):
9725
    """Check prerequisites.
9726

9727
    """
9728
    if self.op.kind == constants.TAG_CLUSTER:
9729
      self.target = self.cfg.GetClusterInfo()
9730
    elif self.op.kind == constants.TAG_NODE:
9731
      self.target = self.cfg.GetNodeInfo(self.op.name)
9732
    elif self.op.kind == constants.TAG_INSTANCE:
9733
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9734
    else:
9735
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9736
                                 str(self.op.kind), errors.ECODE_INVAL)
9737

    
9738

    
9739
class LUGetTags(TagsLU):
9740
  """Returns the tags of a given object.
9741

9742
  """
9743
  _OP_PARAMS = [
9744
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9745
    # Name is only meaningful for nodes and instances
9746
    ("name", _NoDefault, _TMaybeString),
9747
    ]
9748
  REQ_BGL = False
9749

    
9750
  def Exec(self, feedback_fn):
9751
    """Returns the tag list.
9752

9753
    """
9754
    return list(self.target.GetTags())
9755

    
9756

    
9757
class LUSearchTags(NoHooksLU):
9758
  """Searches the tags for a given pattern.
9759

9760
  """
9761
  _OP_PARAMS = [
9762
    ("pattern", _NoDefault, _TNonEmptyString),
9763
    ]
9764
  REQ_BGL = False
9765

    
9766
  def ExpandNames(self):
9767
    self.needed_locks = {}
9768

    
9769
  def CheckPrereq(self):
9770
    """Check prerequisites.
9771

9772
    This checks the pattern passed for validity by compiling it.
9773

9774
    """
9775
    try:
9776
      self.re = re.compile(self.op.pattern)
9777
    except re.error, err:
9778
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9779
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9780

    
9781
  def Exec(self, feedback_fn):
9782
    """Returns the tag list.
9783

9784
    """
9785
    cfg = self.cfg
9786
    tgts = [("/cluster", cfg.GetClusterInfo())]
9787
    ilist = cfg.GetAllInstancesInfo().values()
9788
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9789
    nlist = cfg.GetAllNodesInfo().values()
9790
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9791
    results = []
9792
    for path, target in tgts:
9793
      for tag in target.GetTags():
9794
        if self.re.search(tag):
9795
          results.append((path, tag))
9796
    return results
9797

    
9798

    
9799
class LUAddTags(TagsLU):
9800
  """Sets a tag on a given object.
9801

9802
  """
9803
  _OP_PARAMS = [
9804
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9805
    # Name is only meaningful for nodes and instances
9806
    ("name", _NoDefault, _TMaybeString),
9807
    ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9808
    ]
9809
  REQ_BGL = False
9810

    
9811
  def CheckPrereq(self):
9812
    """Check prerequisites.
9813

9814
    This checks the type and length of the tag name and value.
9815

9816
    """
9817
    TagsLU.CheckPrereq(self)
9818
    for tag in self.op.tags:
9819
      objects.TaggableObject.ValidateTag(tag)
9820

    
9821
  def Exec(self, feedback_fn):
9822
    """Sets the tag.
9823

9824
    """
9825
    try:
9826
      for tag in self.op.tags:
9827
        self.target.AddTag(tag)
9828
    except errors.TagError, err:
9829
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9830
    self.cfg.Update(self.target, feedback_fn)
9831

    
9832

    
9833
class LUDelTags(TagsLU):
9834
  """Delete a list of tags from a given object.
9835

9836
  """
9837
  _OP_PARAMS = [
9838
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9839
    # Name is only meaningful for nodes and instances
9840
    ("name", _NoDefault, _TMaybeString),
9841
    ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9842
    ]
9843
  REQ_BGL = False
9844

    
9845
  def CheckPrereq(self):
9846
    """Check prerequisites.
9847

9848
    This checks that we have the given tag.
9849

9850
    """
9851
    TagsLU.CheckPrereq(self)
9852
    for tag in self.op.tags:
9853
      objects.TaggableObject.ValidateTag(tag)
9854
    del_tags = frozenset(self.op.tags)
9855
    cur_tags = self.target.GetTags()
9856
    if not del_tags <= cur_tags:
9857
      diff_tags = del_tags - cur_tags
9858
      diff_names = ["'%s'" % tag for tag in diff_tags]
9859
      diff_names.sort()
9860
      raise errors.OpPrereqError("Tag(s) %s not found" %
9861
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9862

    
9863
  def Exec(self, feedback_fn):
9864
    """Remove the tag from the object.
9865

9866
    """
9867
    for tag in self.op.tags:
9868
      self.target.RemoveTag(tag)
9869
    self.cfg.Update(self.target, feedback_fn)
9870

    
9871

    
9872
class LUTestDelay(NoHooksLU):
9873
  """Sleep for a specified amount of time.
9874

9875
  This LU sleeps on the master and/or nodes for a specified amount of
9876
  time.
9877

9878
  """
9879
  _OP_PARAMS = [
9880
    ("duration", _NoDefault, _TFloat),
9881
    ("on_master", True, _TBool),
9882
    ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9883
    ("repeat", 0, _TPositiveInt)
9884
    ]
9885
  REQ_BGL = False
9886

    
9887
  def ExpandNames(self):
9888
    """Expand names and set required locks.
9889

9890
    This expands the node list, if any.
9891

9892
    """
9893
    self.needed_locks = {}
9894
    if self.op.on_nodes:
9895
      # _GetWantedNodes can be used here, but is not always appropriate to use
9896
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9897
      # more information.
9898
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9899
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9900

    
9901
  def _TestDelay(self):
9902
    """Do the actual sleep.
9903

9904
    """
9905
    if self.op.on_master:
9906
      if not utils.TestDelay(self.op.duration):
9907
        raise errors.OpExecError("Error during master delay test")
9908
    if self.op.on_nodes:
9909
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9910
      for node, node_result in result.items():
9911
        node_result.Raise("Failure during rpc call to node %s" % node)
9912

    
9913
  def Exec(self, feedback_fn):
9914
    """Execute the test delay opcode, with the wanted repetitions.
9915

9916
    """
9917
    if self.op.repeat == 0:
9918
      self._TestDelay()
9919
    else:
9920
      top_value = self.op.repeat - 1
9921
      for i in range(self.op.repeat):
9922
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9923
        self._TestDelay()
9924

    
9925

    
9926
class LUTestJobqueue(NoHooksLU):
9927
  """Utility LU to test some aspects of the job queue.
9928

9929
  """
9930
  _OP_PARAMS = [
9931
    ("notify_waitlock", False, _TBool),
9932
    ("notify_exec", False, _TBool),
9933
    ("log_messages", _EmptyList, _TListOf(_TString)),
9934
    ("fail", False, _TBool),
9935
    ]
9936
  REQ_BGL = False
9937

    
9938
  # Must be lower than default timeout for WaitForJobChange to see whether it
9939
  # notices changed jobs
9940
  _CLIENT_CONNECT_TIMEOUT = 20.0
9941
  _CLIENT_CONFIRM_TIMEOUT = 60.0
9942

    
9943
  @classmethod
9944
  def _NotifyUsingSocket(cls, cb, errcls):
9945
    """Opens a Unix socket and waits for another program to connect.
9946

9947
    @type cb: callable
9948
    @param cb: Callback to send socket name to client
9949
    @type errcls: class
9950
    @param errcls: Exception class to use for errors
9951

9952
    """
9953
    # Using a temporary directory as there's no easy way to create temporary
9954
    # sockets without writing a custom loop around tempfile.mktemp and
9955
    # socket.bind
9956
    tmpdir = tempfile.mkdtemp()
9957
    try:
9958
      tmpsock = utils.PathJoin(tmpdir, "sock")
9959

    
9960
      logging.debug("Creating temporary socket at %s", tmpsock)
9961
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
9962
      try:
9963
        sock.bind(tmpsock)
9964
        sock.listen(1)
9965

    
9966
        # Send details to client
9967
        cb(tmpsock)
9968

    
9969
        # Wait for client to connect before continuing
9970
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
9971
        try:
9972
          (conn, _) = sock.accept()
9973
        except socket.error, err:
9974
          raise errcls("Client didn't connect in time (%s)" % err)
9975
      finally:
9976
        sock.close()
9977
    finally:
9978
      # Remove as soon as client is connected
9979
      shutil.rmtree(tmpdir)
9980

    
9981
    # Wait for client to close
9982
    try:
9983
      try:
9984
        # pylint: disable-msg=E1101
9985
        # Instance of '_socketobject' has no ... member
9986
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
9987
        conn.recv(1)
9988
      except socket.error, err:
9989
        raise errcls("Client failed to confirm notification (%s)" % err)
9990
    finally:
9991
      conn.close()
9992

    
9993
  def _SendNotification(self, test, arg, sockname):
9994
    """Sends a notification to the client.
9995

9996
    @type test: string
9997
    @param test: Test name
9998
    @param arg: Test argument (depends on test)
9999
    @type sockname: string
10000
    @param sockname: Socket path
10001

10002
    """
10003
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10004

    
10005
  def _Notify(self, prereq, test, arg):
10006
    """Notifies the client of a test.
10007

10008
    @type prereq: bool
10009
    @param prereq: Whether this is a prereq-phase test
10010
    @type test: string
10011
    @param test: Test name
10012
    @param arg: Test argument (depends on test)
10013

10014
    """
10015
    if prereq:
10016
      errcls = errors.OpPrereqError
10017
    else:
10018
      errcls = errors.OpExecError
10019

    
10020
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10021
                                                  test, arg),
10022
                                   errcls)
10023

    
10024
  def CheckArguments(self):
10025
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10026
    self.expandnames_calls = 0
10027

    
10028
  def ExpandNames(self):
10029
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10030
    if checkargs_calls < 1:
10031
      raise errors.ProgrammerError("CheckArguments was not called")
10032

    
10033
    self.expandnames_calls += 1
10034

    
10035
    if self.op.notify_waitlock:
10036
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10037

    
10038
    self.LogInfo("Expanding names")
10039

    
10040
    # Get lock on master node (just to get a lock, not for a particular reason)
10041
    self.needed_locks = {
10042
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10043
      }
10044

    
10045
  def Exec(self, feedback_fn):
10046
    if self.expandnames_calls < 1:
10047
      raise errors.ProgrammerError("ExpandNames was not called")
10048

    
10049
    if self.op.notify_exec:
10050
      self._Notify(False, constants.JQT_EXEC, None)
10051

    
10052
    self.LogInfo("Executing")
10053

    
10054
    if self.op.log_messages:
10055
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10056
      for idx, msg in enumerate(self.op.log_messages):
10057
        self.LogInfo("Sending log message %s", idx + 1)
10058
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10059
        # Report how many test messages have been sent
10060
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10061

    
10062
    if self.op.fail:
10063
      raise errors.OpExecError("Opcode failure was requested")
10064

    
10065
    return True
10066

    
10067

    
10068
class IAllocator(object):
10069
  """IAllocator framework.
10070

10071
  An IAllocator instance has three sets of attributes:
10072
    - cfg that is needed to query the cluster
10073
    - input data (all members of the _KEYS class attribute are required)
10074
    - four buffer attributes (in|out_data|text), that represent the
10075
      input (to the external script) in text and data structure format,
10076
      and the output from it, again in two formats
10077
    - the result variables from the script (success, info, nodes) for
10078
      easy usage
10079

10080
  """
10081
  # pylint: disable-msg=R0902
10082
  # lots of instance attributes
10083
  _ALLO_KEYS = [
10084
    "name", "mem_size", "disks", "disk_template",
10085
    "os", "tags", "nics", "vcpus", "hypervisor",
10086
    ]
10087
  _RELO_KEYS = [
10088
    "name", "relocate_from",
10089
    ]
10090
  _EVAC_KEYS = [
10091
    "evac_nodes",
10092
    ]
10093

    
10094
  def __init__(self, cfg, rpc, mode, **kwargs):
10095
    self.cfg = cfg
10096
    self.rpc = rpc
10097
    # init buffer variables
10098
    self.in_text = self.out_text = self.in_data = self.out_data = None
10099
    # init all input fields so that pylint is happy
10100
    self.mode = mode
10101
    self.mem_size = self.disks = self.disk_template = None
10102
    self.os = self.tags = self.nics = self.vcpus = None
10103
    self.hypervisor = None
10104
    self.relocate_from = None
10105
    self.name = None
10106
    self.evac_nodes = None
10107
    # computed fields
10108
    self.required_nodes = None
10109
    # init result fields
10110
    self.success = self.info = self.result = None
10111
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10112
      keyset = self._ALLO_KEYS
10113
      fn = self._AddNewInstance
10114
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10115
      keyset = self._RELO_KEYS
10116
      fn = self._AddRelocateInstance
10117
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10118
      keyset = self._EVAC_KEYS
10119
      fn = self._AddEvacuateNodes
10120
    else:
10121
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10122
                                   " IAllocator" % self.mode)
10123
    for key in kwargs:
10124
      if key not in keyset:
10125
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
10126
                                     " IAllocator" % key)
10127
      setattr(self, key, kwargs[key])
10128

    
10129
    for key in keyset:
10130
      if key not in kwargs:
10131
        raise errors.ProgrammerError("Missing input parameter '%s' to"
10132
                                     " IAllocator" % key)
10133
    self._BuildInputData(fn)
10134

    
10135
  def _ComputeClusterData(self):
10136
    """Compute the generic allocator input data.
10137

10138
    This is the data that is independent of the actual operation.
10139

10140
    """
10141
    cfg = self.cfg
10142
    cluster_info = cfg.GetClusterInfo()
10143
    # cluster data
10144
    data = {
10145
      "version": constants.IALLOCATOR_VERSION,
10146
      "cluster_name": cfg.GetClusterName(),
10147
      "cluster_tags": list(cluster_info.GetTags()),
10148
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10149
      # we don't have job IDs
10150
      }
10151
    iinfo = cfg.GetAllInstancesInfo().values()
10152
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10153

    
10154
    # node data
10155
    node_results = {}
10156
    node_list = cfg.GetNodeList()
10157

    
10158
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10159
      hypervisor_name = self.hypervisor
10160
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10161
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10162
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10163
      hypervisor_name = cluster_info.enabled_hypervisors[0]
10164

    
10165
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10166
                                        hypervisor_name)
10167
    node_iinfo = \
10168
      self.rpc.call_all_instances_info(node_list,
10169
                                       cluster_info.enabled_hypervisors)
10170
    for nname, nresult in node_data.items():
10171
      # first fill in static (config-based) values
10172
      ninfo = cfg.GetNodeInfo(nname)
10173
      pnr = {
10174
        "tags": list(ninfo.GetTags()),
10175
        "primary_ip": ninfo.primary_ip,
10176
        "secondary_ip": ninfo.secondary_ip,
10177
        "offline": ninfo.offline,
10178
        "drained": ninfo.drained,
10179
        "master_candidate": ninfo.master_candidate,
10180
        }
10181

    
10182
      if not (ninfo.offline or ninfo.drained):
10183
        nresult.Raise("Can't get data for node %s" % nname)
10184
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10185
                                nname)
10186
        remote_info = nresult.payload
10187

    
10188
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
10189
                     'vg_size', 'vg_free', 'cpu_total']:
10190
          if attr not in remote_info:
10191
            raise errors.OpExecError("Node '%s' didn't return attribute"
10192
                                     " '%s'" % (nname, attr))
10193
          if not isinstance(remote_info[attr], int):
10194
            raise errors.OpExecError("Node '%s' returned invalid value"
10195
                                     " for '%s': %s" %
10196
                                     (nname, attr, remote_info[attr]))
10197
        # compute memory used by primary instances
10198
        i_p_mem = i_p_up_mem = 0
10199
        for iinfo, beinfo in i_list:
10200
          if iinfo.primary_node == nname:
10201
            i_p_mem += beinfo[constants.BE_MEMORY]
10202
            if iinfo.name not in node_iinfo[nname].payload:
10203
              i_used_mem = 0
10204
            else:
10205
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10206
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10207
            remote_info['memory_free'] -= max(0, i_mem_diff)
10208

    
10209
            if iinfo.admin_up:
10210
              i_p_up_mem += beinfo[constants.BE_MEMORY]
10211

    
10212
        # compute memory used by instances
10213
        pnr_dyn = {
10214
          "total_memory": remote_info['memory_total'],
10215
          "reserved_memory": remote_info['memory_dom0'],
10216
          "free_memory": remote_info['memory_free'],
10217
          "total_disk": remote_info['vg_size'],
10218
          "free_disk": remote_info['vg_free'],
10219
          "total_cpus": remote_info['cpu_total'],
10220
          "i_pri_memory": i_p_mem,
10221
          "i_pri_up_memory": i_p_up_mem,
10222
          }
10223
        pnr.update(pnr_dyn)
10224

    
10225
      node_results[nname] = pnr
10226
    data["nodes"] = node_results
10227

    
10228
    # instance data
10229
    instance_data = {}
10230
    for iinfo, beinfo in i_list:
10231
      nic_data = []
10232
      for nic in iinfo.nics:
10233
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10234
        nic_dict = {"mac": nic.mac,
10235
                    "ip": nic.ip,
10236
                    "mode": filled_params[constants.NIC_MODE],
10237
                    "link": filled_params[constants.NIC_LINK],
10238
                   }
10239
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10240
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10241
        nic_data.append(nic_dict)
10242
      pir = {
10243
        "tags": list(iinfo.GetTags()),
10244
        "admin_up": iinfo.admin_up,
10245
        "vcpus": beinfo[constants.BE_VCPUS],
10246
        "memory": beinfo[constants.BE_MEMORY],
10247
        "os": iinfo.os,
10248
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10249
        "nics": nic_data,
10250
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10251
        "disk_template": iinfo.disk_template,
10252
        "hypervisor": iinfo.hypervisor,
10253
        }
10254
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10255
                                                 pir["disks"])
10256
      instance_data[iinfo.name] = pir
10257

    
10258
    data["instances"] = instance_data
10259

    
10260
    self.in_data = data
10261

    
10262
  def _AddNewInstance(self):
10263
    """Add new instance data to allocator structure.
10264

10265
    This in combination with _AllocatorGetClusterData will create the
10266
    correct structure needed as input for the allocator.
10267

10268
    The checks for the completeness of the opcode must have already been
10269
    done.
10270

10271
    """
10272
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10273

    
10274
    if self.disk_template in constants.DTS_NET_MIRROR:
10275
      self.required_nodes = 2
10276
    else:
10277
      self.required_nodes = 1
10278
    request = {
10279
      "name": self.name,
10280
      "disk_template": self.disk_template,
10281
      "tags": self.tags,
10282
      "os": self.os,
10283
      "vcpus": self.vcpus,
10284
      "memory": self.mem_size,
10285
      "disks": self.disks,
10286
      "disk_space_total": disk_space,
10287
      "nics": self.nics,
10288
      "required_nodes": self.required_nodes,
10289
      }
10290
    return request
10291

    
10292
  def _AddRelocateInstance(self):
10293
    """Add relocate instance data to allocator structure.
10294

10295
    This in combination with _IAllocatorGetClusterData will create the
10296
    correct structure needed as input for the allocator.
10297

10298
    The checks for the completeness of the opcode must have already been
10299
    done.
10300

10301
    """
10302
    instance = self.cfg.GetInstanceInfo(self.name)
10303
    if instance is None:
10304
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
10305
                                   " IAllocator" % self.name)
10306

    
10307
    if instance.disk_template not in constants.DTS_NET_MIRROR:
10308
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10309
                                 errors.ECODE_INVAL)
10310

    
10311
    if len(instance.secondary_nodes) != 1:
10312
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
10313
                                 errors.ECODE_STATE)
10314

    
10315
    self.required_nodes = 1
10316
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
10317
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10318

    
10319
    request = {
10320
      "name": self.name,
10321
      "disk_space_total": disk_space,
10322
      "required_nodes": self.required_nodes,
10323
      "relocate_from": self.relocate_from,
10324
      }
10325
    return request
10326

    
10327
  def _AddEvacuateNodes(self):
10328
    """Add evacuate nodes data to allocator structure.
10329

10330
    """
10331
    request = {
10332
      "evac_nodes": self.evac_nodes
10333
      }
10334
    return request
10335

    
10336
  def _BuildInputData(self, fn):
10337
    """Build input data structures.
10338

10339
    """
10340
    self._ComputeClusterData()
10341

    
10342
    request = fn()
10343
    request["type"] = self.mode
10344
    self.in_data["request"] = request
10345

    
10346
    self.in_text = serializer.Dump(self.in_data)
10347

    
10348
  def Run(self, name, validate=True, call_fn=None):
10349
    """Run an instance allocator and return the results.
10350

10351
    """
10352
    if call_fn is None:
10353
      call_fn = self.rpc.call_iallocator_runner
10354

    
10355
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10356
    result.Raise("Failure while running the iallocator script")
10357

    
10358
    self.out_text = result.payload
10359
    if validate:
10360
      self._ValidateResult()
10361

    
10362
  def _ValidateResult(self):
10363
    """Process the allocator results.
10364

10365
    This will process and if successful save the result in
10366
    self.out_data and the other parameters.
10367

10368
    """
10369
    try:
10370
      rdict = serializer.Load(self.out_text)
10371
    except Exception, err:
10372
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10373

    
10374
    if not isinstance(rdict, dict):
10375
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10376

    
10377
    # TODO: remove backwards compatiblity in later versions
10378
    if "nodes" in rdict and "result" not in rdict:
10379
      rdict["result"] = rdict["nodes"]
10380
      del rdict["nodes"]
10381

    
10382
    for key in "success", "info", "result":
10383
      if key not in rdict:
10384
        raise errors.OpExecError("Can't parse iallocator results:"
10385
                                 " missing key '%s'" % key)
10386
      setattr(self, key, rdict[key])
10387

    
10388
    if not isinstance(rdict["result"], list):
10389
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10390
                               " is not a list")
10391
    self.out_data = rdict
10392

    
10393

    
10394
class LUTestAllocator(NoHooksLU):
10395
  """Run allocator tests.
10396

10397
  This LU runs the allocator tests
10398

10399
  """
10400
  _OP_PARAMS = [
10401
    ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10402
    ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10403
    ("name", _NoDefault, _TNonEmptyString),
10404
    ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10405
      _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10406
               _TOr(_TNone, _TNonEmptyString))))),
10407
    ("disks", _NoDefault, _TOr(_TNone, _TList)),
10408
    ("hypervisor", None, _TMaybeString),
10409
    ("allocator", None, _TMaybeString),
10410
    ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10411
    ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10412
    ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10413
    ("os", None, _TMaybeString),
10414
    ("disk_template", None, _TMaybeString),
10415
    ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10416
    ]
10417

    
10418
  def CheckPrereq(self):
10419
    """Check prerequisites.
10420

10421
    This checks the opcode parameters depending on the director and mode test.
10422

10423
    """
10424
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10425
      for attr in ["mem_size", "disks", "disk_template",
10426
                   "os", "tags", "nics", "vcpus"]:
10427
        if not hasattr(self.op, attr):
10428
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10429
                                     attr, errors.ECODE_INVAL)
10430
      iname = self.cfg.ExpandInstanceName(self.op.name)
10431
      if iname is not None:
10432
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10433
                                   iname, errors.ECODE_EXISTS)
10434
      if not isinstance(self.op.nics, list):
10435
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10436
                                   errors.ECODE_INVAL)
10437
      if not isinstance(self.op.disks, list):
10438
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10439
                                   errors.ECODE_INVAL)
10440
      for row in self.op.disks:
10441
        if (not isinstance(row, dict) or
10442
            "size" not in row or
10443
            not isinstance(row["size"], int) or
10444
            "mode" not in row or
10445
            row["mode"] not in ['r', 'w']):
10446
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10447
                                     " parameter", errors.ECODE_INVAL)
10448
      if self.op.hypervisor is None:
10449
        self.op.hypervisor = self.cfg.GetHypervisorType()
10450
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10451
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10452
      self.op.name = fname
10453
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10454
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10455
      if not hasattr(self.op, "evac_nodes"):
10456
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10457
                                   " opcode input", errors.ECODE_INVAL)
10458
    else:
10459
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10460
                                 self.op.mode, errors.ECODE_INVAL)
10461

    
10462
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10463
      if self.op.allocator is None:
10464
        raise errors.OpPrereqError("Missing allocator name",
10465
                                   errors.ECODE_INVAL)
10466
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10467
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10468
                                 self.op.direction, errors.ECODE_INVAL)
10469

    
10470
  def Exec(self, feedback_fn):
10471
    """Run the allocator test.
10472

10473
    """
10474
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10475
      ial = IAllocator(self.cfg, self.rpc,
10476
                       mode=self.op.mode,
10477
                       name=self.op.name,
10478
                       mem_size=self.op.mem_size,
10479
                       disks=self.op.disks,
10480
                       disk_template=self.op.disk_template,
10481
                       os=self.op.os,
10482
                       tags=self.op.tags,
10483
                       nics=self.op.nics,
10484
                       vcpus=self.op.vcpus,
10485
                       hypervisor=self.op.hypervisor,
10486
                       )
10487
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10488
      ial = IAllocator(self.cfg, self.rpc,
10489
                       mode=self.op.mode,
10490
                       name=self.op.name,
10491
                       relocate_from=list(self.relocate_from),
10492
                       )
10493
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10494
      ial = IAllocator(self.cfg, self.rpc,
10495
                       mode=self.op.mode,
10496
                       evac_nodes=self.op.evac_nodes)
10497
    else:
10498
      raise errors.ProgrammerError("Uncatched mode %s in"
10499
                                   " LUTestAllocator.Exec", self.op.mode)
10500

    
10501
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10502
      result = ial.in_text
10503
    else:
10504
      ial.Run(self.op.allocator, validate=False)
10505
      result = ial.out_text
10506
    return result