Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ f99010b2

History | View | Annotate | Download (365.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42

    
43
from ganeti import ssh
44
from ganeti import utils
45
from ganeti import errors
46
from ganeti import hypervisor
47
from ganeti import locking
48
from ganeti import constants
49
from ganeti import objects
50
from ganeti import serializer
51
from ganeti import ssconf
52
from ganeti import uidpool
53
from ganeti import compat
54
from ganeti import masterd
55
from ganeti import netutils
56

    
57
import ganeti.masterd.instance # pylint: disable-msg=W0611
58

    
59

    
60
# Modifiable default values; need to define these here before the
61
# actual LUs
62

    
63
def _EmptyList():
64
  """Returns an empty list.
65

66
  """
67
  return []
68

    
69

    
70
def _EmptyDict():
71
  """Returns an empty dict.
72

73
  """
74
  return {}
75

    
76

    
77
#: The without-default default value
78
_NoDefault = object()
79

    
80

    
81
#: The no-type (value to complex to check it in the type system)
82
_NoType = object()
83

    
84

    
85
# Some basic types
86
def _TNotNone(val):
87
  """Checks if the given value is not None.
88

89
  """
90
  return val is not None
91

    
92

    
93
def _TNone(val):
94
  """Checks if the given value is None.
95

96
  """
97
  return val is None
98

    
99

    
100
def _TBool(val):
101
  """Checks if the given value is a boolean.
102

103
  """
104
  return isinstance(val, bool)
105

    
106

    
107
def _TInt(val):
108
  """Checks if the given value is an integer.
109

110
  """
111
  return isinstance(val, int)
112

    
113

    
114
def _TFloat(val):
115
  """Checks if the given value is a float.
116

117
  """
118
  return isinstance(val, float)
119

    
120

    
121
def _TString(val):
122
  """Checks if the given value is a string.
123

124
  """
125
  return isinstance(val, basestring)
126

    
127

    
128
def _TTrue(val):
129
  """Checks if a given value evaluates to a boolean True value.
130

131
  """
132
  return bool(val)
133

    
134

    
135
def _TElemOf(target_list):
136
  """Builds a function that checks if a given value is a member of a list.
137

138
  """
139
  return lambda val: val in target_list
140

    
141

    
142
# Container types
143
def _TList(val):
144
  """Checks if the given value is a list.
145

146
  """
147
  return isinstance(val, list)
148

    
149

    
150
def _TDict(val):
151
  """Checks if the given value is a dictionary.
152

153
  """
154
  return isinstance(val, dict)
155

    
156

    
157
# Combinator types
158
def _TAnd(*args):
159
  """Combine multiple functions using an AND operation.
160

161
  """
162
  def fn(val):
163
    return compat.all(t(val) for t in args)
164
  return fn
165

    
166

    
167
def _TOr(*args):
168
  """Combine multiple functions using an AND operation.
169

170
  """
171
  def fn(val):
172
    return compat.any(t(val) for t in args)
173
  return fn
174

    
175

    
176
# Type aliases
177

    
178
#: a non-empty string
179
_TNonEmptyString = _TAnd(_TString, _TTrue)
180

    
181

    
182
#: a maybe non-empty string
183
_TMaybeString = _TOr(_TNonEmptyString, _TNone)
184

    
185

    
186
#: a maybe boolean (bool or none)
187
_TMaybeBool = _TOr(_TBool, _TNone)
188

    
189

    
190
#: a positive integer
191
_TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
192

    
193
#: a strictly positive integer
194
_TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
195

    
196

    
197
def _TListOf(my_type):
198
  """Checks if a given value is a list with all elements of the same type.
199

200
  """
201
  return _TAnd(_TList,
202
               lambda lst: compat.all(my_type(v) for v in lst))
203

    
204

    
205
def _TDictOf(key_type, val_type):
206
  """Checks a dict type for the type of its key/values.
207

208
  """
209
  return _TAnd(_TDict,
210
               lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
211
                                and compat.all(val_type(v)
212
                                               for v in my_dict.values())))
213

    
214

    
215
# Common opcode attributes
216

    
217
#: output fields for a query operation
218
_POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
219

    
220

    
221
#: the shutdown timeout
222
_PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
223
                     _TPositiveInt)
224

    
225
#: the force parameter
226
_PForce = ("force", False, _TBool)
227

    
228
#: a required instance name (for single-instance LUs)
229
_PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
230

    
231

    
232
#: a required node name (for single-node LUs)
233
_PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
234

    
235
#: the migration type (live/non-live)
236
_PMigrationMode = ("mode", None, _TOr(_TNone,
237
                                      _TElemOf(constants.HT_MIGRATION_MODES)))
238

    
239
#: the obsolete 'live' mode (boolean)
240
_PMigrationLive = ("live", None, _TMaybeBool)
241

    
242

    
243
# End types
244
class LogicalUnit(object):
245
  """Logical Unit base class.
246

247
  Subclasses must follow these rules:
248
    - implement ExpandNames
249
    - implement CheckPrereq (except when tasklets are used)
250
    - implement Exec (except when tasklets are used)
251
    - implement BuildHooksEnv
252
    - redefine HPATH and HTYPE
253
    - optionally redefine their run requirements:
254
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
255

256
  Note that all commands require root permissions.
257

258
  @ivar dry_run_result: the value (if any) that will be returned to the caller
259
      in dry-run mode (signalled by opcode dry_run parameter)
260
  @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
261
      they should get if not already defined, and types they must match
262

263
  """
264
  HPATH = None
265
  HTYPE = None
266
  _OP_PARAMS = []
267
  REQ_BGL = True
268

    
269
  def __init__(self, processor, op, context, rpc):
270
    """Constructor for LogicalUnit.
271

272
    This needs to be overridden in derived classes in order to check op
273
    validity.
274

275
    """
276
    self.proc = processor
277
    self.op = op
278
    self.cfg = context.cfg
279
    self.context = context
280
    self.rpc = rpc
281
    # Dicts used to declare locking needs to mcpu
282
    self.needed_locks = None
283
    self.acquired_locks = {}
284
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
285
    self.add_locks = {}
286
    self.remove_locks = {}
287
    # Used to force good behavior when calling helper functions
288
    self.recalculate_locks = {}
289
    self.__ssh = None
290
    # logging
291
    self.Log = processor.Log # pylint: disable-msg=C0103
292
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
293
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
294
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
295
    # support for dry-run
296
    self.dry_run_result = None
297
    # support for generic debug attribute
298
    if (not hasattr(self.op, "debug_level") or
299
        not isinstance(self.op.debug_level, int)):
300
      self.op.debug_level = 0
301

    
302
    # Tasklets
303
    self.tasklets = None
304

    
305
    # The new kind-of-type-system
306
    op_id = self.op.OP_ID
307
    for attr_name, aval, test in self._OP_PARAMS:
308
      if not hasattr(op, attr_name):
309
        if aval == _NoDefault:
310
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
311
                                     (op_id, attr_name), errors.ECODE_INVAL)
312
        else:
313
          if callable(aval):
314
            dval = aval()
315
          else:
316
            dval = aval
317
          setattr(self.op, attr_name, dval)
318
      attr_val = getattr(op, attr_name)
319
      if test == _NoType:
320
        # no tests here
321
        continue
322
      if not callable(test):
323
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
324
                                     " given type is not a proper type (%s)" %
325
                                     (op_id, attr_name, test))
326
      if not test(attr_val):
327
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
328
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
329
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
330
                                   (op_id, attr_name), errors.ECODE_INVAL)
331

    
332
    self.CheckArguments()
333

    
334
  def __GetSSH(self):
335
    """Returns the SshRunner object
336

337
    """
338
    if not self.__ssh:
339
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
340
    return self.__ssh
341

    
342
  ssh = property(fget=__GetSSH)
343

    
344
  def CheckArguments(self):
345
    """Check syntactic validity for the opcode arguments.
346

347
    This method is for doing a simple syntactic check and ensure
348
    validity of opcode parameters, without any cluster-related
349
    checks. While the same can be accomplished in ExpandNames and/or
350
    CheckPrereq, doing these separate is better because:
351

352
      - ExpandNames is left as as purely a lock-related function
353
      - CheckPrereq is run after we have acquired locks (and possible
354
        waited for them)
355

356
    The function is allowed to change the self.op attribute so that
357
    later methods can no longer worry about missing parameters.
358

359
    """
360
    pass
361

    
362
  def ExpandNames(self):
363
    """Expand names for this LU.
364

365
    This method is called before starting to execute the opcode, and it should
366
    update all the parameters of the opcode to their canonical form (e.g. a
367
    short node name must be fully expanded after this method has successfully
368
    completed). This way locking, hooks, logging, ecc. can work correctly.
369

370
    LUs which implement this method must also populate the self.needed_locks
371
    member, as a dict with lock levels as keys, and a list of needed lock names
372
    as values. Rules:
373

374
      - use an empty dict if you don't need any lock
375
      - if you don't need any lock at a particular level omit that level
376
      - don't put anything for the BGL level
377
      - if you want all locks at a level use locking.ALL_SET as a value
378

379
    If you need to share locks (rather than acquire them exclusively) at one
380
    level you can modify self.share_locks, setting a true value (usually 1) for
381
    that level. By default locks are not shared.
382

383
    This function can also define a list of tasklets, which then will be
384
    executed in order instead of the usual LU-level CheckPrereq and Exec
385
    functions, if those are not defined by the LU.
386

387
    Examples::
388

389
      # Acquire all nodes and one instance
390
      self.needed_locks = {
391
        locking.LEVEL_NODE: locking.ALL_SET,
392
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
393
      }
394
      # Acquire just two nodes
395
      self.needed_locks = {
396
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
397
      }
398
      # Acquire no locks
399
      self.needed_locks = {} # No, you can't leave it to the default value None
400

401
    """
402
    # The implementation of this method is mandatory only if the new LU is
403
    # concurrent, so that old LUs don't need to be changed all at the same
404
    # time.
405
    if self.REQ_BGL:
406
      self.needed_locks = {} # Exclusive LUs don't need locks.
407
    else:
408
      raise NotImplementedError
409

    
410
  def DeclareLocks(self, level):
411
    """Declare LU locking needs for a level
412

413
    While most LUs can just declare their locking needs at ExpandNames time,
414
    sometimes there's the need to calculate some locks after having acquired
415
    the ones before. This function is called just before acquiring locks at a
416
    particular level, but after acquiring the ones at lower levels, and permits
417
    such calculations. It can be used to modify self.needed_locks, and by
418
    default it does nothing.
419

420
    This function is only called if you have something already set in
421
    self.needed_locks for the level.
422

423
    @param level: Locking level which is going to be locked
424
    @type level: member of ganeti.locking.LEVELS
425

426
    """
427

    
428
  def CheckPrereq(self):
429
    """Check prerequisites for this LU.
430

431
    This method should check that the prerequisites for the execution
432
    of this LU are fulfilled. It can do internode communication, but
433
    it should be idempotent - no cluster or system changes are
434
    allowed.
435

436
    The method should raise errors.OpPrereqError in case something is
437
    not fulfilled. Its return value is ignored.
438

439
    This method should also update all the parameters of the opcode to
440
    their canonical form if it hasn't been done by ExpandNames before.
441

442
    """
443
    if self.tasklets is not None:
444
      for (idx, tl) in enumerate(self.tasklets):
445
        logging.debug("Checking prerequisites for tasklet %s/%s",
446
                      idx + 1, len(self.tasklets))
447
        tl.CheckPrereq()
448
    else:
449
      pass
450

    
451
  def Exec(self, feedback_fn):
452
    """Execute the LU.
453

454
    This method should implement the actual work. It should raise
455
    errors.OpExecError for failures that are somewhat dealt with in
456
    code, or expected.
457

458
    """
459
    if self.tasklets is not None:
460
      for (idx, tl) in enumerate(self.tasklets):
461
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
462
        tl.Exec(feedback_fn)
463
    else:
464
      raise NotImplementedError
465

    
466
  def BuildHooksEnv(self):
467
    """Build hooks environment for this LU.
468

469
    This method should return a three-node tuple consisting of: a dict
470
    containing the environment that will be used for running the
471
    specific hook for this LU, a list of node names on which the hook
472
    should run before the execution, and a list of node names on which
473
    the hook should run after the execution.
474

475
    The keys of the dict must not have 'GANETI_' prefixed as this will
476
    be handled in the hooks runner. Also note additional keys will be
477
    added by the hooks runner. If the LU doesn't define any
478
    environment, an empty dict (and not None) should be returned.
479

480
    No nodes should be returned as an empty list (and not None).
481

482
    Note that if the HPATH for a LU class is None, this function will
483
    not be called.
484

485
    """
486
    raise NotImplementedError
487

    
488
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
489
    """Notify the LU about the results of its hooks.
490

491
    This method is called every time a hooks phase is executed, and notifies
492
    the Logical Unit about the hooks' result. The LU can then use it to alter
493
    its result based on the hooks.  By default the method does nothing and the
494
    previous result is passed back unchanged but any LU can define it if it
495
    wants to use the local cluster hook-scripts somehow.
496

497
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
498
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
499
    @param hook_results: the results of the multi-node hooks rpc call
500
    @param feedback_fn: function used send feedback back to the caller
501
    @param lu_result: the previous Exec result this LU had, or None
502
        in the PRE phase
503
    @return: the new Exec result, based on the previous result
504
        and hook results
505

506
    """
507
    # API must be kept, thus we ignore the unused argument and could
508
    # be a function warnings
509
    # pylint: disable-msg=W0613,R0201
510
    return lu_result
511

    
512
  def _ExpandAndLockInstance(self):
513
    """Helper function to expand and lock an instance.
514

515
    Many LUs that work on an instance take its name in self.op.instance_name
516
    and need to expand it and then declare the expanded name for locking. This
517
    function does it, and then updates self.op.instance_name to the expanded
518
    name. It also initializes needed_locks as a dict, if this hasn't been done
519
    before.
520

521
    """
522
    if self.needed_locks is None:
523
      self.needed_locks = {}
524
    else:
525
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
526
        "_ExpandAndLockInstance called with instance-level locks set"
527
    self.op.instance_name = _ExpandInstanceName(self.cfg,
528
                                                self.op.instance_name)
529
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
530

    
531
  def _LockInstancesNodes(self, primary_only=False):
532
    """Helper function to declare instances' nodes for locking.
533

534
    This function should be called after locking one or more instances to lock
535
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
536
    with all primary or secondary nodes for instances already locked and
537
    present in self.needed_locks[locking.LEVEL_INSTANCE].
538

539
    It should be called from DeclareLocks, and for safety only works if
540
    self.recalculate_locks[locking.LEVEL_NODE] is set.
541

542
    In the future it may grow parameters to just lock some instance's nodes, or
543
    to just lock primaries or secondary nodes, if needed.
544

545
    If should be called in DeclareLocks in a way similar to::
546

547
      if level == locking.LEVEL_NODE:
548
        self._LockInstancesNodes()
549

550
    @type primary_only: boolean
551
    @param primary_only: only lock primary nodes of locked instances
552

553
    """
554
    assert locking.LEVEL_NODE in self.recalculate_locks, \
555
      "_LockInstancesNodes helper function called with no nodes to recalculate"
556

    
557
    # TODO: check if we're really been called with the instance locks held
558

    
559
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
560
    # future we might want to have different behaviors depending on the value
561
    # of self.recalculate_locks[locking.LEVEL_NODE]
562
    wanted_nodes = []
563
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
564
      instance = self.context.cfg.GetInstanceInfo(instance_name)
565
      wanted_nodes.append(instance.primary_node)
566
      if not primary_only:
567
        wanted_nodes.extend(instance.secondary_nodes)
568

    
569
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
570
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
571
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
572
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
573

    
574
    del self.recalculate_locks[locking.LEVEL_NODE]
575

    
576

    
577
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
578
  """Simple LU which runs no hooks.
579

580
  This LU is intended as a parent for other LogicalUnits which will
581
  run no hooks, in order to reduce duplicate code.
582

583
  """
584
  HPATH = None
585
  HTYPE = None
586

    
587
  def BuildHooksEnv(self):
588
    """Empty BuildHooksEnv for NoHooksLu.
589

590
    This just raises an error.
591

592
    """
593
    assert False, "BuildHooksEnv called for NoHooksLUs"
594

    
595

    
596
class Tasklet:
597
  """Tasklet base class.
598

599
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
600
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
601
  tasklets know nothing about locks.
602

603
  Subclasses must follow these rules:
604
    - Implement CheckPrereq
605
    - Implement Exec
606

607
  """
608
  def __init__(self, lu):
609
    self.lu = lu
610

    
611
    # Shortcuts
612
    self.cfg = lu.cfg
613
    self.rpc = lu.rpc
614

    
615
  def CheckPrereq(self):
616
    """Check prerequisites for this tasklets.
617

618
    This method should check whether the prerequisites for the execution of
619
    this tasklet are fulfilled. It can do internode communication, but it
620
    should be idempotent - no cluster or system changes are allowed.
621

622
    The method should raise errors.OpPrereqError in case something is not
623
    fulfilled. Its return value is ignored.
624

625
    This method should also update all parameters to their canonical form if it
626
    hasn't been done before.
627

628
    """
629
    pass
630

    
631
  def Exec(self, feedback_fn):
632
    """Execute the tasklet.
633

634
    This method should implement the actual work. It should raise
635
    errors.OpExecError for failures that are somewhat dealt with in code, or
636
    expected.
637

638
    """
639
    raise NotImplementedError
640

    
641

    
642
def _GetWantedNodes(lu, nodes):
643
  """Returns list of checked and expanded node names.
644

645
  @type lu: L{LogicalUnit}
646
  @param lu: the logical unit on whose behalf we execute
647
  @type nodes: list
648
  @param nodes: list of node names or None for all nodes
649
  @rtype: list
650
  @return: the list of nodes, sorted
651
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
652

653
  """
654
  if not nodes:
655
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
656
      " non-empty list of nodes whose name is to be expanded.")
657

    
658
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
659
  return utils.NiceSort(wanted)
660

    
661

    
662
def _GetWantedInstances(lu, instances):
663
  """Returns list of checked and expanded instance names.
664

665
  @type lu: L{LogicalUnit}
666
  @param lu: the logical unit on whose behalf we execute
667
  @type instances: list
668
  @param instances: list of instance names or None for all instances
669
  @rtype: list
670
  @return: the list of instances, sorted
671
  @raise errors.OpPrereqError: if the instances parameter is wrong type
672
  @raise errors.OpPrereqError: if any of the passed instances is not found
673

674
  """
675
  if instances:
676
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
677
  else:
678
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
679
  return wanted
680

    
681

    
682
def _GetUpdatedParams(old_params, update_dict,
683
                      use_default=True, use_none=False):
684
  """Return the new version of a parameter dictionary.
685

686
  @type old_params: dict
687
  @param old_params: old parameters
688
  @type update_dict: dict
689
  @param update_dict: dict containing new parameter values, or
690
      constants.VALUE_DEFAULT to reset the parameter to its default
691
      value
692
  @param use_default: boolean
693
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
694
      values as 'to be deleted' values
695
  @param use_none: boolean
696
  @type use_none: whether to recognise C{None} values as 'to be
697
      deleted' values
698
  @rtype: dict
699
  @return: the new parameter dictionary
700

701
  """
702
  params_copy = copy.deepcopy(old_params)
703
  for key, val in update_dict.iteritems():
704
    if ((use_default and val == constants.VALUE_DEFAULT) or
705
        (use_none and val is None)):
706
      try:
707
        del params_copy[key]
708
      except KeyError:
709
        pass
710
    else:
711
      params_copy[key] = val
712
  return params_copy
713

    
714

    
715
def _CheckOutputFields(static, dynamic, selected):
716
  """Checks whether all selected fields are valid.
717

718
  @type static: L{utils.FieldSet}
719
  @param static: static fields set
720
  @type dynamic: L{utils.FieldSet}
721
  @param dynamic: dynamic fields set
722

723
  """
724
  f = utils.FieldSet()
725
  f.Extend(static)
726
  f.Extend(dynamic)
727

    
728
  delta = f.NonMatching(selected)
729
  if delta:
730
    raise errors.OpPrereqError("Unknown output fields selected: %s"
731
                               % ",".join(delta), errors.ECODE_INVAL)
732

    
733

    
734
def _CheckGlobalHvParams(params):
735
  """Validates that given hypervisor params are not global ones.
736

737
  This will ensure that instances don't get customised versions of
738
  global params.
739

740
  """
741
  used_globals = constants.HVC_GLOBALS.intersection(params)
742
  if used_globals:
743
    msg = ("The following hypervisor parameters are global and cannot"
744
           " be customized at instance level, please modify them at"
745
           " cluster level: %s" % utils.CommaJoin(used_globals))
746
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
747

    
748

    
749
def _CheckNodeOnline(lu, node):
750
  """Ensure that a given node is online.
751

752
  @param lu: the LU on behalf of which we make the check
753
  @param node: the node to check
754
  @raise errors.OpPrereqError: if the node is offline
755

756
  """
757
  if lu.cfg.GetNodeInfo(node).offline:
758
    raise errors.OpPrereqError("Can't use offline node %s" % node,
759
                               errors.ECODE_INVAL)
760

    
761

    
762
def _CheckNodeNotDrained(lu, node):
763
  """Ensure that a given node is not drained.
764

765
  @param lu: the LU on behalf of which we make the check
766
  @param node: the node to check
767
  @raise errors.OpPrereqError: if the node is drained
768

769
  """
770
  if lu.cfg.GetNodeInfo(node).drained:
771
    raise errors.OpPrereqError("Can't use drained node %s" % node,
772
                               errors.ECODE_INVAL)
773

    
774

    
775
def _CheckNodeHasOS(lu, node, os_name, force_variant):
776
  """Ensure that a node supports a given OS.
777

778
  @param lu: the LU on behalf of which we make the check
779
  @param node: the node to check
780
  @param os_name: the OS to query about
781
  @param force_variant: whether to ignore variant errors
782
  @raise errors.OpPrereqError: if the node is not supporting the OS
783

784
  """
785
  result = lu.rpc.call_os_get(node, os_name)
786
  result.Raise("OS '%s' not in supported OS list for node %s" %
787
               (os_name, node),
788
               prereq=True, ecode=errors.ECODE_INVAL)
789
  if not force_variant:
790
    _CheckOSVariant(result.payload, os_name)
791

    
792

    
793
def _RequireFileStorage():
794
  """Checks that file storage is enabled.
795

796
  @raise errors.OpPrereqError: when file storage is disabled
797

798
  """
799
  if not constants.ENABLE_FILE_STORAGE:
800
    raise errors.OpPrereqError("File storage disabled at configure time",
801
                               errors.ECODE_INVAL)
802

    
803

    
804
def _CheckDiskTemplate(template):
805
  """Ensure a given disk template is valid.
806

807
  """
808
  if template not in constants.DISK_TEMPLATES:
809
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
810
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
811
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
812
  if template == constants.DT_FILE:
813
    _RequireFileStorage()
814
  return True
815

    
816

    
817
def _CheckStorageType(storage_type):
818
  """Ensure a given storage type is valid.
819

820
  """
821
  if storage_type not in constants.VALID_STORAGE_TYPES:
822
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
823
                               errors.ECODE_INVAL)
824
  if storage_type == constants.ST_FILE:
825
    _RequireFileStorage()
826
  return True
827

    
828

    
829
def _GetClusterDomainSecret():
830
  """Reads the cluster domain secret.
831

832
  """
833
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
834
                               strict=True)
835

    
836

    
837
def _CheckInstanceDown(lu, instance, reason):
838
  """Ensure that an instance is not running."""
839
  if instance.admin_up:
840
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
841
                               (instance.name, reason), errors.ECODE_STATE)
842

    
843
  pnode = instance.primary_node
844
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
845
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
846
              prereq=True, ecode=errors.ECODE_ENVIRON)
847

    
848
  if instance.name in ins_l.payload:
849
    raise errors.OpPrereqError("Instance %s is running, %s" %
850
                               (instance.name, reason), errors.ECODE_STATE)
851

    
852

    
853
def _ExpandItemName(fn, name, kind):
854
  """Expand an item name.
855

856
  @param fn: the function to use for expansion
857
  @param name: requested item name
858
  @param kind: text description ('Node' or 'Instance')
859
  @return: the resolved (full) name
860
  @raise errors.OpPrereqError: if the item is not found
861

862
  """
863
  full_name = fn(name)
864
  if full_name is None:
865
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
866
                               errors.ECODE_NOENT)
867
  return full_name
868

    
869

    
870
def _ExpandNodeName(cfg, name):
871
  """Wrapper over L{_ExpandItemName} for nodes."""
872
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
873

    
874

    
875
def _ExpandInstanceName(cfg, name):
876
  """Wrapper over L{_ExpandItemName} for instance."""
877
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
878

    
879

    
880
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
881
                          memory, vcpus, nics, disk_template, disks,
882
                          bep, hvp, hypervisor_name):
883
  """Builds instance related env variables for hooks
884

885
  This builds the hook environment from individual variables.
886

887
  @type name: string
888
  @param name: the name of the instance
889
  @type primary_node: string
890
  @param primary_node: the name of the instance's primary node
891
  @type secondary_nodes: list
892
  @param secondary_nodes: list of secondary nodes as strings
893
  @type os_type: string
894
  @param os_type: the name of the instance's OS
895
  @type status: boolean
896
  @param status: the should_run status of the instance
897
  @type memory: string
898
  @param memory: the memory size of the instance
899
  @type vcpus: string
900
  @param vcpus: the count of VCPUs the instance has
901
  @type nics: list
902
  @param nics: list of tuples (ip, mac, mode, link) representing
903
      the NICs the instance has
904
  @type disk_template: string
905
  @param disk_template: the disk template of the instance
906
  @type disks: list
907
  @param disks: the list of (size, mode) pairs
908
  @type bep: dict
909
  @param bep: the backend parameters for the instance
910
  @type hvp: dict
911
  @param hvp: the hypervisor parameters for the instance
912
  @type hypervisor_name: string
913
  @param hypervisor_name: the hypervisor for the instance
914
  @rtype: dict
915
  @return: the hook environment for this instance
916

917
  """
918
  if status:
919
    str_status = "up"
920
  else:
921
    str_status = "down"
922
  env = {
923
    "OP_TARGET": name,
924
    "INSTANCE_NAME": name,
925
    "INSTANCE_PRIMARY": primary_node,
926
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
927
    "INSTANCE_OS_TYPE": os_type,
928
    "INSTANCE_STATUS": str_status,
929
    "INSTANCE_MEMORY": memory,
930
    "INSTANCE_VCPUS": vcpus,
931
    "INSTANCE_DISK_TEMPLATE": disk_template,
932
    "INSTANCE_HYPERVISOR": hypervisor_name,
933
  }
934

    
935
  if nics:
936
    nic_count = len(nics)
937
    for idx, (ip, mac, mode, link) in enumerate(nics):
938
      if ip is None:
939
        ip = ""
940
      env["INSTANCE_NIC%d_IP" % idx] = ip
941
      env["INSTANCE_NIC%d_MAC" % idx] = mac
942
      env["INSTANCE_NIC%d_MODE" % idx] = mode
943
      env["INSTANCE_NIC%d_LINK" % idx] = link
944
      if mode == constants.NIC_MODE_BRIDGED:
945
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
946
  else:
947
    nic_count = 0
948

    
949
  env["INSTANCE_NIC_COUNT"] = nic_count
950

    
951
  if disks:
952
    disk_count = len(disks)
953
    for idx, (size, mode) in enumerate(disks):
954
      env["INSTANCE_DISK%d_SIZE" % idx] = size
955
      env["INSTANCE_DISK%d_MODE" % idx] = mode
956
  else:
957
    disk_count = 0
958

    
959
  env["INSTANCE_DISK_COUNT"] = disk_count
960

    
961
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
962
    for key, value in source.items():
963
      env["INSTANCE_%s_%s" % (kind, key)] = value
964

    
965
  return env
966

    
967

    
968
def _NICListToTuple(lu, nics):
969
  """Build a list of nic information tuples.
970

971
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
972
  value in LUQueryInstanceData.
973

974
  @type lu:  L{LogicalUnit}
975
  @param lu: the logical unit on whose behalf we execute
976
  @type nics: list of L{objects.NIC}
977
  @param nics: list of nics to convert to hooks tuples
978

979
  """
980
  hooks_nics = []
981
  cluster = lu.cfg.GetClusterInfo()
982
  for nic in nics:
983
    ip = nic.ip
984
    mac = nic.mac
985
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
986
    mode = filled_params[constants.NIC_MODE]
987
    link = filled_params[constants.NIC_LINK]
988
    hooks_nics.append((ip, mac, mode, link))
989
  return hooks_nics
990

    
991

    
992
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
993
  """Builds instance related env variables for hooks from an object.
994

995
  @type lu: L{LogicalUnit}
996
  @param lu: the logical unit on whose behalf we execute
997
  @type instance: L{objects.Instance}
998
  @param instance: the instance for which we should build the
999
      environment
1000
  @type override: dict
1001
  @param override: dictionary with key/values that will override
1002
      our values
1003
  @rtype: dict
1004
  @return: the hook environment dictionary
1005

1006
  """
1007
  cluster = lu.cfg.GetClusterInfo()
1008
  bep = cluster.FillBE(instance)
1009
  hvp = cluster.FillHV(instance)
1010
  args = {
1011
    'name': instance.name,
1012
    'primary_node': instance.primary_node,
1013
    'secondary_nodes': instance.secondary_nodes,
1014
    'os_type': instance.os,
1015
    'status': instance.admin_up,
1016
    'memory': bep[constants.BE_MEMORY],
1017
    'vcpus': bep[constants.BE_VCPUS],
1018
    'nics': _NICListToTuple(lu, instance.nics),
1019
    'disk_template': instance.disk_template,
1020
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1021
    'bep': bep,
1022
    'hvp': hvp,
1023
    'hypervisor_name': instance.hypervisor,
1024
  }
1025
  if override:
1026
    args.update(override)
1027
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1028

    
1029

    
1030
def _AdjustCandidatePool(lu, exceptions):
1031
  """Adjust the candidate pool after node operations.
1032

1033
  """
1034
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1035
  if mod_list:
1036
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1037
               utils.CommaJoin(node.name for node in mod_list))
1038
    for name in mod_list:
1039
      lu.context.ReaddNode(name)
1040
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1041
  if mc_now > mc_max:
1042
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1043
               (mc_now, mc_max))
1044

    
1045

    
1046
def _DecideSelfPromotion(lu, exceptions=None):
1047
  """Decide whether I should promote myself as a master candidate.
1048

1049
  """
1050
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1051
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1052
  # the new node will increase mc_max with one, so:
1053
  mc_should = min(mc_should + 1, cp_size)
1054
  return mc_now < mc_should
1055

    
1056

    
1057
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1058
  """Check that the brigdes needed by a list of nics exist.
1059

1060
  """
1061
  cluster = lu.cfg.GetClusterInfo()
1062
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1063
  brlist = [params[constants.NIC_LINK] for params in paramslist
1064
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1065
  if brlist:
1066
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1067
    result.Raise("Error checking bridges on destination node '%s'" %
1068
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1069

    
1070

    
1071
def _CheckInstanceBridgesExist(lu, instance, node=None):
1072
  """Check that the brigdes needed by an instance exist.
1073

1074
  """
1075
  if node is None:
1076
    node = instance.primary_node
1077
  _CheckNicsBridgesExist(lu, instance.nics, node)
1078

    
1079

    
1080
def _CheckOSVariant(os_obj, name):
1081
  """Check whether an OS name conforms to the os variants specification.
1082

1083
  @type os_obj: L{objects.OS}
1084
  @param os_obj: OS object to check
1085
  @type name: string
1086
  @param name: OS name passed by the user, to check for validity
1087

1088
  """
1089
  if not os_obj.supported_variants:
1090
    return
1091
  try:
1092
    variant = name.split("+", 1)[1]
1093
  except IndexError:
1094
    raise errors.OpPrereqError("OS name must include a variant",
1095
                               errors.ECODE_INVAL)
1096

    
1097
  if variant not in os_obj.supported_variants:
1098
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1099

    
1100

    
1101
def _GetNodeInstancesInner(cfg, fn):
1102
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1103

    
1104

    
1105
def _GetNodeInstances(cfg, node_name):
1106
  """Returns a list of all primary and secondary instances on a node.
1107

1108
  """
1109

    
1110
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1111

    
1112

    
1113
def _GetNodePrimaryInstances(cfg, node_name):
1114
  """Returns primary instances on a node.
1115

1116
  """
1117
  return _GetNodeInstancesInner(cfg,
1118
                                lambda inst: node_name == inst.primary_node)
1119

    
1120

    
1121
def _GetNodeSecondaryInstances(cfg, node_name):
1122
  """Returns secondary instances on a node.
1123

1124
  """
1125
  return _GetNodeInstancesInner(cfg,
1126
                                lambda inst: node_name in inst.secondary_nodes)
1127

    
1128

    
1129
def _GetStorageTypeArgs(cfg, storage_type):
1130
  """Returns the arguments for a storage type.
1131

1132
  """
1133
  # Special case for file storage
1134
  if storage_type == constants.ST_FILE:
1135
    # storage.FileStorage wants a list of storage directories
1136
    return [[cfg.GetFileStorageDir()]]
1137

    
1138
  return []
1139

    
1140

    
1141
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1142
  faulty = []
1143

    
1144
  for dev in instance.disks:
1145
    cfg.SetDiskID(dev, node_name)
1146

    
1147
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1148
  result.Raise("Failed to get disk status from node %s" % node_name,
1149
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1150

    
1151
  for idx, bdev_status in enumerate(result.payload):
1152
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1153
      faulty.append(idx)
1154

    
1155
  return faulty
1156

    
1157

    
1158
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1159
  """Check the sanity of iallocator and node arguments and use the
1160
  cluster-wide iallocator if appropriate.
1161

1162
  Check that at most one of (iallocator, node) is specified. If none is
1163
  specified, then the LU's opcode's iallocator slot is filled with the
1164
  cluster-wide default iallocator.
1165

1166
  @type iallocator_slot: string
1167
  @param iallocator_slot: the name of the opcode iallocator slot
1168
  @type node_slot: string
1169
  @param node_slot: the name of the opcode target node slot
1170

1171
  """
1172
  node = getattr(lu.op, node_slot, None)
1173
  iallocator = getattr(lu.op, iallocator_slot, None)
1174

    
1175
  if node is not None and iallocator is not None:
1176
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1177
                               errors.ECODE_INVAL)
1178
  elif node is None and iallocator is None:
1179
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1180
    if default_iallocator:
1181
      setattr(lu.op, iallocator_slot, default_iallocator)
1182
    else:
1183
      raise errors.OpPrereqError("No iallocator or node given and no"
1184
                                 " cluster-wide default iallocator found."
1185
                                 " Please specify either an iallocator or a"
1186
                                 " node, or set a cluster-wide default"
1187
                                 " iallocator.")
1188

    
1189

    
1190
class LUPostInitCluster(LogicalUnit):
1191
  """Logical unit for running hooks after cluster initialization.
1192

1193
  """
1194
  HPATH = "cluster-init"
1195
  HTYPE = constants.HTYPE_CLUSTER
1196

    
1197
  def BuildHooksEnv(self):
1198
    """Build hooks env.
1199

1200
    """
1201
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1202
    mn = self.cfg.GetMasterNode()
1203
    return env, [], [mn]
1204

    
1205
  def Exec(self, feedback_fn):
1206
    """Nothing to do.
1207

1208
    """
1209
    return True
1210

    
1211

    
1212
class LUDestroyCluster(LogicalUnit):
1213
  """Logical unit for destroying the cluster.
1214

1215
  """
1216
  HPATH = "cluster-destroy"
1217
  HTYPE = constants.HTYPE_CLUSTER
1218

    
1219
  def BuildHooksEnv(self):
1220
    """Build hooks env.
1221

1222
    """
1223
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1224
    return env, [], []
1225

    
1226
  def CheckPrereq(self):
1227
    """Check prerequisites.
1228

1229
    This checks whether the cluster is empty.
1230

1231
    Any errors are signaled by raising errors.OpPrereqError.
1232

1233
    """
1234
    master = self.cfg.GetMasterNode()
1235

    
1236
    nodelist = self.cfg.GetNodeList()
1237
    if len(nodelist) != 1 or nodelist[0] != master:
1238
      raise errors.OpPrereqError("There are still %d node(s) in"
1239
                                 " this cluster." % (len(nodelist) - 1),
1240
                                 errors.ECODE_INVAL)
1241
    instancelist = self.cfg.GetInstanceList()
1242
    if instancelist:
1243
      raise errors.OpPrereqError("There are still %d instance(s) in"
1244
                                 " this cluster." % len(instancelist),
1245
                                 errors.ECODE_INVAL)
1246

    
1247
  def Exec(self, feedback_fn):
1248
    """Destroys the cluster.
1249

1250
    """
1251
    master = self.cfg.GetMasterNode()
1252
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1253

    
1254
    # Run post hooks on master node before it's removed
1255
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1256
    try:
1257
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1258
    except:
1259
      # pylint: disable-msg=W0702
1260
      self.LogWarning("Errors occurred running hooks on %s" % master)
1261

    
1262
    result = self.rpc.call_node_stop_master(master, False)
1263
    result.Raise("Could not disable the master role")
1264

    
1265
    if modify_ssh_setup:
1266
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1267
      utils.CreateBackup(priv_key)
1268
      utils.CreateBackup(pub_key)
1269

    
1270
    return master
1271

    
1272

    
1273
def _VerifyCertificate(filename):
1274
  """Verifies a certificate for LUVerifyCluster.
1275

1276
  @type filename: string
1277
  @param filename: Path to PEM file
1278

1279
  """
1280
  try:
1281
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1282
                                           utils.ReadFile(filename))
1283
  except Exception, err: # pylint: disable-msg=W0703
1284
    return (LUVerifyCluster.ETYPE_ERROR,
1285
            "Failed to load X509 certificate %s: %s" % (filename, err))
1286

    
1287
  (errcode, msg) = \
1288
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1289
                                constants.SSL_CERT_EXPIRATION_ERROR)
1290

    
1291
  if msg:
1292
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1293
  else:
1294
    fnamemsg = None
1295

    
1296
  if errcode is None:
1297
    return (None, fnamemsg)
1298
  elif errcode == utils.CERT_WARNING:
1299
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1300
  elif errcode == utils.CERT_ERROR:
1301
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1302

    
1303
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1304

    
1305

    
1306
class LUVerifyCluster(LogicalUnit):
1307
  """Verifies the cluster status.
1308

1309
  """
1310
  HPATH = "cluster-verify"
1311
  HTYPE = constants.HTYPE_CLUSTER
1312
  _OP_PARAMS = [
1313
    ("skip_checks", _EmptyList,
1314
     _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1315
    ("verbose", False, _TBool),
1316
    ("error_codes", False, _TBool),
1317
    ("debug_simulate_errors", False, _TBool),
1318
    ]
1319
  REQ_BGL = False
1320

    
1321
  TCLUSTER = "cluster"
1322
  TNODE = "node"
1323
  TINSTANCE = "instance"
1324

    
1325
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1326
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1327
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1328
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1329
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1330
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1331
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1332
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1333
  ENODEDRBD = (TNODE, "ENODEDRBD")
1334
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1335
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1336
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1337
  ENODEHV = (TNODE, "ENODEHV")
1338
  ENODELVM = (TNODE, "ENODELVM")
1339
  ENODEN1 = (TNODE, "ENODEN1")
1340
  ENODENET = (TNODE, "ENODENET")
1341
  ENODEOS = (TNODE, "ENODEOS")
1342
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1343
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1344
  ENODERPC = (TNODE, "ENODERPC")
1345
  ENODESSH = (TNODE, "ENODESSH")
1346
  ENODEVERSION = (TNODE, "ENODEVERSION")
1347
  ENODESETUP = (TNODE, "ENODESETUP")
1348
  ENODETIME = (TNODE, "ENODETIME")
1349

    
1350
  ETYPE_FIELD = "code"
1351
  ETYPE_ERROR = "ERROR"
1352
  ETYPE_WARNING = "WARNING"
1353

    
1354
  class NodeImage(object):
1355
    """A class representing the logical and physical status of a node.
1356

1357
    @type name: string
1358
    @ivar name: the node name to which this object refers
1359
    @ivar volumes: a structure as returned from
1360
        L{ganeti.backend.GetVolumeList} (runtime)
1361
    @ivar instances: a list of running instances (runtime)
1362
    @ivar pinst: list of configured primary instances (config)
1363
    @ivar sinst: list of configured secondary instances (config)
1364
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1365
        of this node (config)
1366
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1367
    @ivar dfree: free disk, as reported by the node (runtime)
1368
    @ivar offline: the offline status (config)
1369
    @type rpc_fail: boolean
1370
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1371
        not whether the individual keys were correct) (runtime)
1372
    @type lvm_fail: boolean
1373
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1374
    @type hyp_fail: boolean
1375
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1376
    @type ghost: boolean
1377
    @ivar ghost: whether this is a known node or not (config)
1378
    @type os_fail: boolean
1379
    @ivar os_fail: whether the RPC call didn't return valid OS data
1380
    @type oslist: list
1381
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1382

1383
    """
1384
    def __init__(self, offline=False, name=None):
1385
      self.name = name
1386
      self.volumes = {}
1387
      self.instances = []
1388
      self.pinst = []
1389
      self.sinst = []
1390
      self.sbp = {}
1391
      self.mfree = 0
1392
      self.dfree = 0
1393
      self.offline = offline
1394
      self.rpc_fail = False
1395
      self.lvm_fail = False
1396
      self.hyp_fail = False
1397
      self.ghost = False
1398
      self.os_fail = False
1399
      self.oslist = {}
1400

    
1401
  def ExpandNames(self):
1402
    self.needed_locks = {
1403
      locking.LEVEL_NODE: locking.ALL_SET,
1404
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1405
    }
1406
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1407

    
1408
  def _Error(self, ecode, item, msg, *args, **kwargs):
1409
    """Format an error message.
1410

1411
    Based on the opcode's error_codes parameter, either format a
1412
    parseable error code, or a simpler error string.
1413

1414
    This must be called only from Exec and functions called from Exec.
1415

1416
    """
1417
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1418
    itype, etxt = ecode
1419
    # first complete the msg
1420
    if args:
1421
      msg = msg % args
1422
    # then format the whole message
1423
    if self.op.error_codes:
1424
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1425
    else:
1426
      if item:
1427
        item = " " + item
1428
      else:
1429
        item = ""
1430
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1431
    # and finally report it via the feedback_fn
1432
    self._feedback_fn("  - %s" % msg)
1433

    
1434
  def _ErrorIf(self, cond, *args, **kwargs):
1435
    """Log an error message if the passed condition is True.
1436

1437
    """
1438
    cond = bool(cond) or self.op.debug_simulate_errors
1439
    if cond:
1440
      self._Error(*args, **kwargs)
1441
    # do not mark the operation as failed for WARN cases only
1442
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1443
      self.bad = self.bad or cond
1444

    
1445
  def _VerifyNode(self, ninfo, nresult):
1446
    """Perform some basic validation on data returned from a node.
1447

1448
      - check the result data structure is well formed and has all the
1449
        mandatory fields
1450
      - check ganeti version
1451

1452
    @type ninfo: L{objects.Node}
1453
    @param ninfo: the node to check
1454
    @param nresult: the results from the node
1455
    @rtype: boolean
1456
    @return: whether overall this call was successful (and we can expect
1457
         reasonable values in the respose)
1458

1459
    """
1460
    node = ninfo.name
1461
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1462

    
1463
    # main result, nresult should be a non-empty dict
1464
    test = not nresult or not isinstance(nresult, dict)
1465
    _ErrorIf(test, self.ENODERPC, node,
1466
                  "unable to verify node: no data returned")
1467
    if test:
1468
      return False
1469

    
1470
    # compares ganeti version
1471
    local_version = constants.PROTOCOL_VERSION
1472
    remote_version = nresult.get("version", None)
1473
    test = not (remote_version and
1474
                isinstance(remote_version, (list, tuple)) and
1475
                len(remote_version) == 2)
1476
    _ErrorIf(test, self.ENODERPC, node,
1477
             "connection to node returned invalid data")
1478
    if test:
1479
      return False
1480

    
1481
    test = local_version != remote_version[0]
1482
    _ErrorIf(test, self.ENODEVERSION, node,
1483
             "incompatible protocol versions: master %s,"
1484
             " node %s", local_version, remote_version[0])
1485
    if test:
1486
      return False
1487

    
1488
    # node seems compatible, we can actually try to look into its results
1489

    
1490
    # full package version
1491
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1492
                  self.ENODEVERSION, node,
1493
                  "software version mismatch: master %s, node %s",
1494
                  constants.RELEASE_VERSION, remote_version[1],
1495
                  code=self.ETYPE_WARNING)
1496

    
1497
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1498
    if isinstance(hyp_result, dict):
1499
      for hv_name, hv_result in hyp_result.iteritems():
1500
        test = hv_result is not None
1501
        _ErrorIf(test, self.ENODEHV, node,
1502
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1503

    
1504

    
1505
    test = nresult.get(constants.NV_NODESETUP,
1506
                           ["Missing NODESETUP results"])
1507
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1508
             "; ".join(test))
1509

    
1510
    return True
1511

    
1512
  def _VerifyNodeTime(self, ninfo, nresult,
1513
                      nvinfo_starttime, nvinfo_endtime):
1514
    """Check the node time.
1515

1516
    @type ninfo: L{objects.Node}
1517
    @param ninfo: the node to check
1518
    @param nresult: the remote results for the node
1519
    @param nvinfo_starttime: the start time of the RPC call
1520
    @param nvinfo_endtime: the end time of the RPC call
1521

1522
    """
1523
    node = ninfo.name
1524
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1525

    
1526
    ntime = nresult.get(constants.NV_TIME, None)
1527
    try:
1528
      ntime_merged = utils.MergeTime(ntime)
1529
    except (ValueError, TypeError):
1530
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1531
      return
1532

    
1533
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1534
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1535
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1536
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1537
    else:
1538
      ntime_diff = None
1539

    
1540
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1541
             "Node time diverges by at least %s from master node time",
1542
             ntime_diff)
1543

    
1544
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1545
    """Check the node time.
1546

1547
    @type ninfo: L{objects.Node}
1548
    @param ninfo: the node to check
1549
    @param nresult: the remote results for the node
1550
    @param vg_name: the configured VG name
1551

1552
    """
1553
    if vg_name is None:
1554
      return
1555

    
1556
    node = ninfo.name
1557
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1558

    
1559
    # checks vg existence and size > 20G
1560
    vglist = nresult.get(constants.NV_VGLIST, None)
1561
    test = not vglist
1562
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1563
    if not test:
1564
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1565
                                            constants.MIN_VG_SIZE)
1566
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1567

    
1568
    # check pv names
1569
    pvlist = nresult.get(constants.NV_PVLIST, None)
1570
    test = pvlist is None
1571
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1572
    if not test:
1573
      # check that ':' is not present in PV names, since it's a
1574
      # special character for lvcreate (denotes the range of PEs to
1575
      # use on the PV)
1576
      for _, pvname, owner_vg in pvlist:
1577
        test = ":" in pvname
1578
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1579
                 " '%s' of VG '%s'", pvname, owner_vg)
1580

    
1581
  def _VerifyNodeNetwork(self, ninfo, nresult):
1582
    """Check the node time.
1583

1584
    @type ninfo: L{objects.Node}
1585
    @param ninfo: the node to check
1586
    @param nresult: the remote results for the node
1587

1588
    """
1589
    node = ninfo.name
1590
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1591

    
1592
    test = constants.NV_NODELIST not in nresult
1593
    _ErrorIf(test, self.ENODESSH, node,
1594
             "node hasn't returned node ssh connectivity data")
1595
    if not test:
1596
      if nresult[constants.NV_NODELIST]:
1597
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1598
          _ErrorIf(True, self.ENODESSH, node,
1599
                   "ssh communication with node '%s': %s", a_node, a_msg)
1600

    
1601
    test = constants.NV_NODENETTEST not in nresult
1602
    _ErrorIf(test, self.ENODENET, node,
1603
             "node hasn't returned node tcp connectivity data")
1604
    if not test:
1605
      if nresult[constants.NV_NODENETTEST]:
1606
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1607
        for anode in nlist:
1608
          _ErrorIf(True, self.ENODENET, node,
1609
                   "tcp communication with node '%s': %s",
1610
                   anode, nresult[constants.NV_NODENETTEST][anode])
1611

    
1612
    test = constants.NV_MASTERIP not in nresult
1613
    _ErrorIf(test, self.ENODENET, node,
1614
             "node hasn't returned node master IP reachability data")
1615
    if not test:
1616
      if not nresult[constants.NV_MASTERIP]:
1617
        if node == self.master_node:
1618
          msg = "the master node cannot reach the master IP (not configured?)"
1619
        else:
1620
          msg = "cannot reach the master IP"
1621
        _ErrorIf(True, self.ENODENET, node, msg)
1622

    
1623

    
1624
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1625
    """Verify an instance.
1626

1627
    This function checks to see if the required block devices are
1628
    available on the instance's node.
1629

1630
    """
1631
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1632
    node_current = instanceconfig.primary_node
1633

    
1634
    node_vol_should = {}
1635
    instanceconfig.MapLVsByNode(node_vol_should)
1636

    
1637
    for node in node_vol_should:
1638
      n_img = node_image[node]
1639
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1640
        # ignore missing volumes on offline or broken nodes
1641
        continue
1642
      for volume in node_vol_should[node]:
1643
        test = volume not in n_img.volumes
1644
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1645
                 "volume %s missing on node %s", volume, node)
1646

    
1647
    if instanceconfig.admin_up:
1648
      pri_img = node_image[node_current]
1649
      test = instance not in pri_img.instances and not pri_img.offline
1650
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1651
               "instance not running on its primary node %s",
1652
               node_current)
1653

    
1654
    for node, n_img in node_image.items():
1655
      if (not node == node_current):
1656
        test = instance in n_img.instances
1657
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1658
                 "instance should not run on node %s", node)
1659

    
1660
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1661
    """Verify if there are any unknown volumes in the cluster.
1662

1663
    The .os, .swap and backup volumes are ignored. All other volumes are
1664
    reported as unknown.
1665

1666
    @type reserved: L{ganeti.utils.FieldSet}
1667
    @param reserved: a FieldSet of reserved volume names
1668

1669
    """
1670
    for node, n_img in node_image.items():
1671
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1672
        # skip non-healthy nodes
1673
        continue
1674
      for volume in n_img.volumes:
1675
        test = ((node not in node_vol_should or
1676
                volume not in node_vol_should[node]) and
1677
                not reserved.Matches(volume))
1678
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1679
                      "volume %s is unknown", volume)
1680

    
1681
  def _VerifyOrphanInstances(self, instancelist, node_image):
1682
    """Verify the list of running instances.
1683

1684
    This checks what instances are running but unknown to the cluster.
1685

1686
    """
1687
    for node, n_img in node_image.items():
1688
      for o_inst in n_img.instances:
1689
        test = o_inst not in instancelist
1690
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1691
                      "instance %s on node %s should not exist", o_inst, node)
1692

    
1693
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1694
    """Verify N+1 Memory Resilience.
1695

1696
    Check that if one single node dies we can still start all the
1697
    instances it was primary for.
1698

1699
    """
1700
    for node, n_img in node_image.items():
1701
      # This code checks that every node which is now listed as
1702
      # secondary has enough memory to host all instances it is
1703
      # supposed to should a single other node in the cluster fail.
1704
      # FIXME: not ready for failover to an arbitrary node
1705
      # FIXME: does not support file-backed instances
1706
      # WARNING: we currently take into account down instances as well
1707
      # as up ones, considering that even if they're down someone
1708
      # might want to start them even in the event of a node failure.
1709
      for prinode, instances in n_img.sbp.items():
1710
        needed_mem = 0
1711
        for instance in instances:
1712
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1713
          if bep[constants.BE_AUTO_BALANCE]:
1714
            needed_mem += bep[constants.BE_MEMORY]
1715
        test = n_img.mfree < needed_mem
1716
        self._ErrorIf(test, self.ENODEN1, node,
1717
                      "not enough memory on to accommodate"
1718
                      " failovers should peer node %s fail", prinode)
1719

    
1720
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1721
                       master_files):
1722
    """Verifies and computes the node required file checksums.
1723

1724
    @type ninfo: L{objects.Node}
1725
    @param ninfo: the node to check
1726
    @param nresult: the remote results for the node
1727
    @param file_list: required list of files
1728
    @param local_cksum: dictionary of local files and their checksums
1729
    @param master_files: list of files that only masters should have
1730

1731
    """
1732
    node = ninfo.name
1733
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1734

    
1735
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1736
    test = not isinstance(remote_cksum, dict)
1737
    _ErrorIf(test, self.ENODEFILECHECK, node,
1738
             "node hasn't returned file checksum data")
1739
    if test:
1740
      return
1741

    
1742
    for file_name in file_list:
1743
      node_is_mc = ninfo.master_candidate
1744
      must_have = (file_name not in master_files) or node_is_mc
1745
      # missing
1746
      test1 = file_name not in remote_cksum
1747
      # invalid checksum
1748
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1749
      # existing and good
1750
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1751
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1752
               "file '%s' missing", file_name)
1753
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1754
               "file '%s' has wrong checksum", file_name)
1755
      # not candidate and this is not a must-have file
1756
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1757
               "file '%s' should not exist on non master"
1758
               " candidates (and the file is outdated)", file_name)
1759
      # all good, except non-master/non-must have combination
1760
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1761
               "file '%s' should not exist"
1762
               " on non master candidates", file_name)
1763

    
1764
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1765
                      drbd_map):
1766
    """Verifies and the node DRBD status.
1767

1768
    @type ninfo: L{objects.Node}
1769
    @param ninfo: the node to check
1770
    @param nresult: the remote results for the node
1771
    @param instanceinfo: the dict of instances
1772
    @param drbd_helper: the configured DRBD usermode helper
1773
    @param drbd_map: the DRBD map as returned by
1774
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1775

1776
    """
1777
    node = ninfo.name
1778
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1779

    
1780
    if drbd_helper:
1781
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1782
      test = (helper_result == None)
1783
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1784
               "no drbd usermode helper returned")
1785
      if helper_result:
1786
        status, payload = helper_result
1787
        test = not status
1788
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1789
                 "drbd usermode helper check unsuccessful: %s", payload)
1790
        test = status and (payload != drbd_helper)
1791
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1792
                 "wrong drbd usermode helper: %s", payload)
1793

    
1794
    # compute the DRBD minors
1795
    node_drbd = {}
1796
    for minor, instance in drbd_map[node].items():
1797
      test = instance not in instanceinfo
1798
      _ErrorIf(test, self.ECLUSTERCFG, None,
1799
               "ghost instance '%s' in temporary DRBD map", instance)
1800
        # ghost instance should not be running, but otherwise we
1801
        # don't give double warnings (both ghost instance and
1802
        # unallocated minor in use)
1803
      if test:
1804
        node_drbd[minor] = (instance, False)
1805
      else:
1806
        instance = instanceinfo[instance]
1807
        node_drbd[minor] = (instance.name, instance.admin_up)
1808

    
1809
    # and now check them
1810
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1811
    test = not isinstance(used_minors, (tuple, list))
1812
    _ErrorIf(test, self.ENODEDRBD, node,
1813
             "cannot parse drbd status file: %s", str(used_minors))
1814
    if test:
1815
      # we cannot check drbd status
1816
      return
1817

    
1818
    for minor, (iname, must_exist) in node_drbd.items():
1819
      test = minor not in used_minors and must_exist
1820
      _ErrorIf(test, self.ENODEDRBD, node,
1821
               "drbd minor %d of instance %s is not active", minor, iname)
1822
    for minor in used_minors:
1823
      test = minor not in node_drbd
1824
      _ErrorIf(test, self.ENODEDRBD, node,
1825
               "unallocated drbd minor %d is in use", minor)
1826

    
1827
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1828
    """Builds the node OS structures.
1829

1830
    @type ninfo: L{objects.Node}
1831
    @param ninfo: the node to check
1832
    @param nresult: the remote results for the node
1833
    @param nimg: the node image object
1834

1835
    """
1836
    node = ninfo.name
1837
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1838

    
1839
    remote_os = nresult.get(constants.NV_OSLIST, None)
1840
    test = (not isinstance(remote_os, list) or
1841
            not compat.all(isinstance(v, list) and len(v) == 7
1842
                           for v in remote_os))
1843

    
1844
    _ErrorIf(test, self.ENODEOS, node,
1845
             "node hasn't returned valid OS data")
1846

    
1847
    nimg.os_fail = test
1848

    
1849
    if test:
1850
      return
1851

    
1852
    os_dict = {}
1853

    
1854
    for (name, os_path, status, diagnose,
1855
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1856

    
1857
      if name not in os_dict:
1858
        os_dict[name] = []
1859

    
1860
      # parameters is a list of lists instead of list of tuples due to
1861
      # JSON lacking a real tuple type, fix it:
1862
      parameters = [tuple(v) for v in parameters]
1863
      os_dict[name].append((os_path, status, diagnose,
1864
                            set(variants), set(parameters), set(api_ver)))
1865

    
1866
    nimg.oslist = os_dict
1867

    
1868
  def _VerifyNodeOS(self, ninfo, nimg, base):
1869
    """Verifies the node OS list.
1870

1871
    @type ninfo: L{objects.Node}
1872
    @param ninfo: the node to check
1873
    @param nimg: the node image object
1874
    @param base: the 'template' node we match against (e.g. from the master)
1875

1876
    """
1877
    node = ninfo.name
1878
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1879

    
1880
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1881

    
1882
    for os_name, os_data in nimg.oslist.items():
1883
      assert os_data, "Empty OS status for OS %s?!" % os_name
1884
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1885
      _ErrorIf(not f_status, self.ENODEOS, node,
1886
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1887
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1888
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1889
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1890
      # this will catched in backend too
1891
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1892
               and not f_var, self.ENODEOS, node,
1893
               "OS %s with API at least %d does not declare any variant",
1894
               os_name, constants.OS_API_V15)
1895
      # comparisons with the 'base' image
1896
      test = os_name not in base.oslist
1897
      _ErrorIf(test, self.ENODEOS, node,
1898
               "Extra OS %s not present on reference node (%s)",
1899
               os_name, base.name)
1900
      if test:
1901
        continue
1902
      assert base.oslist[os_name], "Base node has empty OS status?"
1903
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1904
      if not b_status:
1905
        # base OS is invalid, skipping
1906
        continue
1907
      for kind, a, b in [("API version", f_api, b_api),
1908
                         ("variants list", f_var, b_var),
1909
                         ("parameters", f_param, b_param)]:
1910
        _ErrorIf(a != b, self.ENODEOS, node,
1911
                 "OS %s %s differs from reference node %s: %s vs. %s",
1912
                 kind, os_name, base.name,
1913
                 utils.CommaJoin(a), utils.CommaJoin(b))
1914

    
1915
    # check any missing OSes
1916
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1917
    _ErrorIf(missing, self.ENODEOS, node,
1918
             "OSes present on reference node %s but missing on this node: %s",
1919
             base.name, utils.CommaJoin(missing))
1920

    
1921
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1922
    """Verifies and updates the node volume data.
1923

1924
    This function will update a L{NodeImage}'s internal structures
1925
    with data from the remote call.
1926

1927
    @type ninfo: L{objects.Node}
1928
    @param ninfo: the node to check
1929
    @param nresult: the remote results for the node
1930
    @param nimg: the node image object
1931
    @param vg_name: the configured VG name
1932

1933
    """
1934
    node = ninfo.name
1935
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1936

    
1937
    nimg.lvm_fail = True
1938
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1939
    if vg_name is None:
1940
      pass
1941
    elif isinstance(lvdata, basestring):
1942
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1943
               utils.SafeEncode(lvdata))
1944
    elif not isinstance(lvdata, dict):
1945
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1946
    else:
1947
      nimg.volumes = lvdata
1948
      nimg.lvm_fail = False
1949

    
1950
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1951
    """Verifies and updates the node instance list.
1952

1953
    If the listing was successful, then updates this node's instance
1954
    list. Otherwise, it marks the RPC call as failed for the instance
1955
    list key.
1956

1957
    @type ninfo: L{objects.Node}
1958
    @param ninfo: the node to check
1959
    @param nresult: the remote results for the node
1960
    @param nimg: the node image object
1961

1962
    """
1963
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1964
    test = not isinstance(idata, list)
1965
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1966
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1967
    if test:
1968
      nimg.hyp_fail = True
1969
    else:
1970
      nimg.instances = idata
1971

    
1972
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1973
    """Verifies and computes a node information map
1974

1975
    @type ninfo: L{objects.Node}
1976
    @param ninfo: the node to check
1977
    @param nresult: the remote results for the node
1978
    @param nimg: the node image object
1979
    @param vg_name: the configured VG name
1980

1981
    """
1982
    node = ninfo.name
1983
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1984

    
1985
    # try to read free memory (from the hypervisor)
1986
    hv_info = nresult.get(constants.NV_HVINFO, None)
1987
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1988
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1989
    if not test:
1990
      try:
1991
        nimg.mfree = int(hv_info["memory_free"])
1992
      except (ValueError, TypeError):
1993
        _ErrorIf(True, self.ENODERPC, node,
1994
                 "node returned invalid nodeinfo, check hypervisor")
1995

    
1996
    # FIXME: devise a free space model for file based instances as well
1997
    if vg_name is not None:
1998
      test = (constants.NV_VGLIST not in nresult or
1999
              vg_name not in nresult[constants.NV_VGLIST])
2000
      _ErrorIf(test, self.ENODELVM, node,
2001
               "node didn't return data for the volume group '%s'"
2002
               " - it is either missing or broken", vg_name)
2003
      if not test:
2004
        try:
2005
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2006
        except (ValueError, TypeError):
2007
          _ErrorIf(True, self.ENODERPC, node,
2008
                   "node returned invalid LVM info, check LVM status")
2009

    
2010
  def BuildHooksEnv(self):
2011
    """Build hooks env.
2012

2013
    Cluster-Verify hooks just ran in the post phase and their failure makes
2014
    the output be logged in the verify output and the verification to fail.
2015

2016
    """
2017
    all_nodes = self.cfg.GetNodeList()
2018
    env = {
2019
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2020
      }
2021
    for node in self.cfg.GetAllNodesInfo().values():
2022
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2023

    
2024
    return env, [], all_nodes
2025

    
2026
  def Exec(self, feedback_fn):
2027
    """Verify integrity of cluster, performing various test on nodes.
2028

2029
    """
2030
    self.bad = False
2031
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2032
    verbose = self.op.verbose
2033
    self._feedback_fn = feedback_fn
2034
    feedback_fn("* Verifying global settings")
2035
    for msg in self.cfg.VerifyConfig():
2036
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2037

    
2038
    # Check the cluster certificates
2039
    for cert_filename in constants.ALL_CERT_FILES:
2040
      (errcode, msg) = _VerifyCertificate(cert_filename)
2041
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2042

    
2043
    vg_name = self.cfg.GetVGName()
2044
    drbd_helper = self.cfg.GetDRBDHelper()
2045
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2046
    cluster = self.cfg.GetClusterInfo()
2047
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2048
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2049
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2050
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2051
                        for iname in instancelist)
2052
    i_non_redundant = [] # Non redundant instances
2053
    i_non_a_balanced = [] # Non auto-balanced instances
2054
    n_offline = 0 # Count of offline nodes
2055
    n_drained = 0 # Count of nodes being drained
2056
    node_vol_should = {}
2057

    
2058
    # FIXME: verify OS list
2059
    # do local checksums
2060
    master_files = [constants.CLUSTER_CONF_FILE]
2061
    master_node = self.master_node = self.cfg.GetMasterNode()
2062
    master_ip = self.cfg.GetMasterIP()
2063

    
2064
    file_names = ssconf.SimpleStore().GetFileList()
2065
    file_names.extend(constants.ALL_CERT_FILES)
2066
    file_names.extend(master_files)
2067
    if cluster.modify_etc_hosts:
2068
      file_names.append(constants.ETC_HOSTS)
2069

    
2070
    local_checksums = utils.FingerprintFiles(file_names)
2071

    
2072
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2073
    node_verify_param = {
2074
      constants.NV_FILELIST: file_names,
2075
      constants.NV_NODELIST: [node.name for node in nodeinfo
2076
                              if not node.offline],
2077
      constants.NV_HYPERVISOR: hypervisors,
2078
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2079
                                  node.secondary_ip) for node in nodeinfo
2080
                                 if not node.offline],
2081
      constants.NV_INSTANCELIST: hypervisors,
2082
      constants.NV_VERSION: None,
2083
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2084
      constants.NV_NODESETUP: None,
2085
      constants.NV_TIME: None,
2086
      constants.NV_MASTERIP: (master_node, master_ip),
2087
      constants.NV_OSLIST: None,
2088
      }
2089

    
2090
    if vg_name is not None:
2091
      node_verify_param[constants.NV_VGLIST] = None
2092
      node_verify_param[constants.NV_LVLIST] = vg_name
2093
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2094
      node_verify_param[constants.NV_DRBDLIST] = None
2095

    
2096
    if drbd_helper:
2097
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2098

    
2099
    # Build our expected cluster state
2100
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2101
                                                 name=node.name))
2102
                      for node in nodeinfo)
2103

    
2104
    for instance in instancelist:
2105
      inst_config = instanceinfo[instance]
2106

    
2107
      for nname in inst_config.all_nodes:
2108
        if nname not in node_image:
2109
          # ghost node
2110
          gnode = self.NodeImage(name=nname)
2111
          gnode.ghost = True
2112
          node_image[nname] = gnode
2113

    
2114
      inst_config.MapLVsByNode(node_vol_should)
2115

    
2116
      pnode = inst_config.primary_node
2117
      node_image[pnode].pinst.append(instance)
2118

    
2119
      for snode in inst_config.secondary_nodes:
2120
        nimg = node_image[snode]
2121
        nimg.sinst.append(instance)
2122
        if pnode not in nimg.sbp:
2123
          nimg.sbp[pnode] = []
2124
        nimg.sbp[pnode].append(instance)
2125

    
2126
    # At this point, we have the in-memory data structures complete,
2127
    # except for the runtime information, which we'll gather next
2128

    
2129
    # Due to the way our RPC system works, exact response times cannot be
2130
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2131
    # time before and after executing the request, we can at least have a time
2132
    # window.
2133
    nvinfo_starttime = time.time()
2134
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2135
                                           self.cfg.GetClusterName())
2136
    nvinfo_endtime = time.time()
2137

    
2138
    all_drbd_map = self.cfg.ComputeDRBDMap()
2139

    
2140
    feedback_fn("* Verifying node status")
2141

    
2142
    refos_img = None
2143

    
2144
    for node_i in nodeinfo:
2145
      node = node_i.name
2146
      nimg = node_image[node]
2147

    
2148
      if node_i.offline:
2149
        if verbose:
2150
          feedback_fn("* Skipping offline node %s" % (node,))
2151
        n_offline += 1
2152
        continue
2153

    
2154
      if node == master_node:
2155
        ntype = "master"
2156
      elif node_i.master_candidate:
2157
        ntype = "master candidate"
2158
      elif node_i.drained:
2159
        ntype = "drained"
2160
        n_drained += 1
2161
      else:
2162
        ntype = "regular"
2163
      if verbose:
2164
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2165

    
2166
      msg = all_nvinfo[node].fail_msg
2167
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2168
      if msg:
2169
        nimg.rpc_fail = True
2170
        continue
2171

    
2172
      nresult = all_nvinfo[node].payload
2173

    
2174
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2175
      self._VerifyNodeNetwork(node_i, nresult)
2176
      self._VerifyNodeLVM(node_i, nresult, vg_name)
2177
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2178
                            master_files)
2179
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2180
                           all_drbd_map)
2181
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2182

    
2183
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2184
      self._UpdateNodeInstances(node_i, nresult, nimg)
2185
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2186
      self._UpdateNodeOS(node_i, nresult, nimg)
2187
      if not nimg.os_fail:
2188
        if refos_img is None:
2189
          refos_img = nimg
2190
        self._VerifyNodeOS(node_i, nimg, refos_img)
2191

    
2192
    feedback_fn("* Verifying instance status")
2193
    for instance in instancelist:
2194
      if verbose:
2195
        feedback_fn("* Verifying instance %s" % instance)
2196
      inst_config = instanceinfo[instance]
2197
      self._VerifyInstance(instance, inst_config, node_image)
2198
      inst_nodes_offline = []
2199

    
2200
      pnode = inst_config.primary_node
2201
      pnode_img = node_image[pnode]
2202
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2203
               self.ENODERPC, pnode, "instance %s, connection to"
2204
               " primary node failed", instance)
2205

    
2206
      if pnode_img.offline:
2207
        inst_nodes_offline.append(pnode)
2208

    
2209
      # If the instance is non-redundant we cannot survive losing its primary
2210
      # node, so we are not N+1 compliant. On the other hand we have no disk
2211
      # templates with more than one secondary so that situation is not well
2212
      # supported either.
2213
      # FIXME: does not support file-backed instances
2214
      if not inst_config.secondary_nodes:
2215
        i_non_redundant.append(instance)
2216
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2217
               instance, "instance has multiple secondary nodes: %s",
2218
               utils.CommaJoin(inst_config.secondary_nodes),
2219
               code=self.ETYPE_WARNING)
2220

    
2221
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2222
        i_non_a_balanced.append(instance)
2223

    
2224
      for snode in inst_config.secondary_nodes:
2225
        s_img = node_image[snode]
2226
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2227
                 "instance %s, connection to secondary node failed", instance)
2228

    
2229
        if s_img.offline:
2230
          inst_nodes_offline.append(snode)
2231

    
2232
      # warn that the instance lives on offline nodes
2233
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2234
               "instance lives on offline node(s) %s",
2235
               utils.CommaJoin(inst_nodes_offline))
2236
      # ... or ghost nodes
2237
      for node in inst_config.all_nodes:
2238
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2239
                 "instance lives on ghost node %s", node)
2240

    
2241
    feedback_fn("* Verifying orphan volumes")
2242
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2243
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2244

    
2245
    feedback_fn("* Verifying orphan instances")
2246
    self._VerifyOrphanInstances(instancelist, node_image)
2247

    
2248
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2249
      feedback_fn("* Verifying N+1 Memory redundancy")
2250
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2251

    
2252
    feedback_fn("* Other Notes")
2253
    if i_non_redundant:
2254
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2255
                  % len(i_non_redundant))
2256

    
2257
    if i_non_a_balanced:
2258
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2259
                  % len(i_non_a_balanced))
2260

    
2261
    if n_offline:
2262
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2263

    
2264
    if n_drained:
2265
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2266

    
2267
    return not self.bad
2268

    
2269
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2270
    """Analyze the post-hooks' result
2271

2272
    This method analyses the hook result, handles it, and sends some
2273
    nicely-formatted feedback back to the user.
2274

2275
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2276
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2277
    @param hooks_results: the results of the multi-node hooks rpc call
2278
    @param feedback_fn: function used send feedback back to the caller
2279
    @param lu_result: previous Exec result
2280
    @return: the new Exec result, based on the previous result
2281
        and hook results
2282

2283
    """
2284
    # We only really run POST phase hooks, and are only interested in
2285
    # their results
2286
    if phase == constants.HOOKS_PHASE_POST:
2287
      # Used to change hooks' output to proper indentation
2288
      indent_re = re.compile('^', re.M)
2289
      feedback_fn("* Hooks Results")
2290
      assert hooks_results, "invalid result from hooks"
2291

    
2292
      for node_name in hooks_results:
2293
        res = hooks_results[node_name]
2294
        msg = res.fail_msg
2295
        test = msg and not res.offline
2296
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2297
                      "Communication failure in hooks execution: %s", msg)
2298
        if res.offline or msg:
2299
          # No need to investigate payload if node is offline or gave an error.
2300
          # override manually lu_result here as _ErrorIf only
2301
          # overrides self.bad
2302
          lu_result = 1
2303
          continue
2304
        for script, hkr, output in res.payload:
2305
          test = hkr == constants.HKR_FAIL
2306
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2307
                        "Script %s failed, output:", script)
2308
          if test:
2309
            output = indent_re.sub('      ', output)
2310
            feedback_fn("%s" % output)
2311
            lu_result = 0
2312

    
2313
      return lu_result
2314

    
2315

    
2316
class LUVerifyDisks(NoHooksLU):
2317
  """Verifies the cluster disks status.
2318

2319
  """
2320
  REQ_BGL = False
2321

    
2322
  def ExpandNames(self):
2323
    self.needed_locks = {
2324
      locking.LEVEL_NODE: locking.ALL_SET,
2325
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2326
    }
2327
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2328

    
2329
  def Exec(self, feedback_fn):
2330
    """Verify integrity of cluster disks.
2331

2332
    @rtype: tuple of three items
2333
    @return: a tuple of (dict of node-to-node_error, list of instances
2334
        which need activate-disks, dict of instance: (node, volume) for
2335
        missing volumes
2336

2337
    """
2338
    result = res_nodes, res_instances, res_missing = {}, [], {}
2339

    
2340
    vg_name = self.cfg.GetVGName()
2341
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2342
    instances = [self.cfg.GetInstanceInfo(name)
2343
                 for name in self.cfg.GetInstanceList()]
2344

    
2345
    nv_dict = {}
2346
    for inst in instances:
2347
      inst_lvs = {}
2348
      if (not inst.admin_up or
2349
          inst.disk_template not in constants.DTS_NET_MIRROR):
2350
        continue
2351
      inst.MapLVsByNode(inst_lvs)
2352
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2353
      for node, vol_list in inst_lvs.iteritems():
2354
        for vol in vol_list:
2355
          nv_dict[(node, vol)] = inst
2356

    
2357
    if not nv_dict:
2358
      return result
2359

    
2360
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2361

    
2362
    for node in nodes:
2363
      # node_volume
2364
      node_res = node_lvs[node]
2365
      if node_res.offline:
2366
        continue
2367
      msg = node_res.fail_msg
2368
      if msg:
2369
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2370
        res_nodes[node] = msg
2371
        continue
2372

    
2373
      lvs = node_res.payload
2374
      for lv_name, (_, _, lv_online) in lvs.items():
2375
        inst = nv_dict.pop((node, lv_name), None)
2376
        if (not lv_online and inst is not None
2377
            and inst.name not in res_instances):
2378
          res_instances.append(inst.name)
2379

    
2380
    # any leftover items in nv_dict are missing LVs, let's arrange the
2381
    # data better
2382
    for key, inst in nv_dict.iteritems():
2383
      if inst.name not in res_missing:
2384
        res_missing[inst.name] = []
2385
      res_missing[inst.name].append(key)
2386

    
2387
    return result
2388

    
2389

    
2390
class LURepairDiskSizes(NoHooksLU):
2391
  """Verifies the cluster disks sizes.
2392

2393
  """
2394
  _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2395
  REQ_BGL = False
2396

    
2397
  def ExpandNames(self):
2398
    if self.op.instances:
2399
      self.wanted_names = []
2400
      for name in self.op.instances:
2401
        full_name = _ExpandInstanceName(self.cfg, name)
2402
        self.wanted_names.append(full_name)
2403
      self.needed_locks = {
2404
        locking.LEVEL_NODE: [],
2405
        locking.LEVEL_INSTANCE: self.wanted_names,
2406
        }
2407
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2408
    else:
2409
      self.wanted_names = None
2410
      self.needed_locks = {
2411
        locking.LEVEL_NODE: locking.ALL_SET,
2412
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2413
        }
2414
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2415

    
2416
  def DeclareLocks(self, level):
2417
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2418
      self._LockInstancesNodes(primary_only=True)
2419

    
2420
  def CheckPrereq(self):
2421
    """Check prerequisites.
2422

2423
    This only checks the optional instance list against the existing names.
2424

2425
    """
2426
    if self.wanted_names is None:
2427
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2428

    
2429
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2430
                             in self.wanted_names]
2431

    
2432
  def _EnsureChildSizes(self, disk):
2433
    """Ensure children of the disk have the needed disk size.
2434

2435
    This is valid mainly for DRBD8 and fixes an issue where the
2436
    children have smaller disk size.
2437

2438
    @param disk: an L{ganeti.objects.Disk} object
2439

2440
    """
2441
    if disk.dev_type == constants.LD_DRBD8:
2442
      assert disk.children, "Empty children for DRBD8?"
2443
      fchild = disk.children[0]
2444
      mismatch = fchild.size < disk.size
2445
      if mismatch:
2446
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2447
                     fchild.size, disk.size)
2448
        fchild.size = disk.size
2449

    
2450
      # and we recurse on this child only, not on the metadev
2451
      return self._EnsureChildSizes(fchild) or mismatch
2452
    else:
2453
      return False
2454

    
2455
  def Exec(self, feedback_fn):
2456
    """Verify the size of cluster disks.
2457

2458
    """
2459
    # TODO: check child disks too
2460
    # TODO: check differences in size between primary/secondary nodes
2461
    per_node_disks = {}
2462
    for instance in self.wanted_instances:
2463
      pnode = instance.primary_node
2464
      if pnode not in per_node_disks:
2465
        per_node_disks[pnode] = []
2466
      for idx, disk in enumerate(instance.disks):
2467
        per_node_disks[pnode].append((instance, idx, disk))
2468

    
2469
    changed = []
2470
    for node, dskl in per_node_disks.items():
2471
      newl = [v[2].Copy() for v in dskl]
2472
      for dsk in newl:
2473
        self.cfg.SetDiskID(dsk, node)
2474
      result = self.rpc.call_blockdev_getsizes(node, newl)
2475
      if result.fail_msg:
2476
        self.LogWarning("Failure in blockdev_getsizes call to node"
2477
                        " %s, ignoring", node)
2478
        continue
2479
      if len(result.data) != len(dskl):
2480
        self.LogWarning("Invalid result from node %s, ignoring node results",
2481
                        node)
2482
        continue
2483
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2484
        if size is None:
2485
          self.LogWarning("Disk %d of instance %s did not return size"
2486
                          " information, ignoring", idx, instance.name)
2487
          continue
2488
        if not isinstance(size, (int, long)):
2489
          self.LogWarning("Disk %d of instance %s did not return valid"
2490
                          " size information, ignoring", idx, instance.name)
2491
          continue
2492
        size = size >> 20
2493
        if size != disk.size:
2494
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2495
                       " correcting: recorded %d, actual %d", idx,
2496
                       instance.name, disk.size, size)
2497
          disk.size = size
2498
          self.cfg.Update(instance, feedback_fn)
2499
          changed.append((instance.name, idx, size))
2500
        if self._EnsureChildSizes(disk):
2501
          self.cfg.Update(instance, feedback_fn)
2502
          changed.append((instance.name, idx, disk.size))
2503
    return changed
2504

    
2505

    
2506
class LURenameCluster(LogicalUnit):
2507
  """Rename the cluster.
2508

2509
  """
2510
  HPATH = "cluster-rename"
2511
  HTYPE = constants.HTYPE_CLUSTER
2512
  _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2513

    
2514
  def BuildHooksEnv(self):
2515
    """Build hooks env.
2516

2517
    """
2518
    env = {
2519
      "OP_TARGET": self.cfg.GetClusterName(),
2520
      "NEW_NAME": self.op.name,
2521
      }
2522
    mn = self.cfg.GetMasterNode()
2523
    all_nodes = self.cfg.GetNodeList()
2524
    return env, [mn], all_nodes
2525

    
2526
  def CheckPrereq(self):
2527
    """Verify that the passed name is a valid one.
2528

2529
    """
2530
    hostname = netutils.GetHostInfo(self.op.name)
2531

    
2532
    new_name = hostname.name
2533
    self.ip = new_ip = hostname.ip
2534
    old_name = self.cfg.GetClusterName()
2535
    old_ip = self.cfg.GetMasterIP()
2536
    if new_name == old_name and new_ip == old_ip:
2537
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2538
                                 " cluster has changed",
2539
                                 errors.ECODE_INVAL)
2540
    if new_ip != old_ip:
2541
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2542
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2543
                                   " reachable on the network. Aborting." %
2544
                                   new_ip, errors.ECODE_NOTUNIQUE)
2545

    
2546
    self.op.name = new_name
2547

    
2548
  def Exec(self, feedback_fn):
2549
    """Rename the cluster.
2550

2551
    """
2552
    clustername = self.op.name
2553
    ip = self.ip
2554

    
2555
    # shutdown the master IP
2556
    master = self.cfg.GetMasterNode()
2557
    result = self.rpc.call_node_stop_master(master, False)
2558
    result.Raise("Could not disable the master role")
2559

    
2560
    try:
2561
      cluster = self.cfg.GetClusterInfo()
2562
      cluster.cluster_name = clustername
2563
      cluster.master_ip = ip
2564
      self.cfg.Update(cluster, feedback_fn)
2565

    
2566
      # update the known hosts file
2567
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2568
      node_list = self.cfg.GetNodeList()
2569
      try:
2570
        node_list.remove(master)
2571
      except ValueError:
2572
        pass
2573
      result = self.rpc.call_upload_file(node_list,
2574
                                         constants.SSH_KNOWN_HOSTS_FILE)
2575
      for to_node, to_result in result.iteritems():
2576
        msg = to_result.fail_msg
2577
        if msg:
2578
          msg = ("Copy of file %s to node %s failed: %s" %
2579
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2580
          self.proc.LogWarning(msg)
2581

    
2582
    finally:
2583
      result = self.rpc.call_node_start_master(master, False, False)
2584
      msg = result.fail_msg
2585
      if msg:
2586
        self.LogWarning("Could not re-enable the master role on"
2587
                        " the master, please restart manually: %s", msg)
2588

    
2589
    return clustername
2590

    
2591

    
2592
class LUSetClusterParams(LogicalUnit):
2593
  """Change the parameters of the cluster.
2594

2595
  """
2596
  HPATH = "cluster-modify"
2597
  HTYPE = constants.HTYPE_CLUSTER
2598
  _OP_PARAMS = [
2599
    ("vg_name", None, _TMaybeString),
2600
    ("enabled_hypervisors", None,
2601
     _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2602
    ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2603
    ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2604
    ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2605
    ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2606
    ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2607
    ("uid_pool", None, _NoType),
2608
    ("add_uids", None, _NoType),
2609
    ("remove_uids", None, _NoType),
2610
    ("maintain_node_health", None, _TMaybeBool),
2611
    ("nicparams", None, _TOr(_TDict, _TNone)),
2612
    ("drbd_helper", None, _TOr(_TString, _TNone)),
2613
    ("default_iallocator", None, _TMaybeString),
2614
    ("reserved_lvs", None, _TOr(_TListOf(_TNonEmptyString), _TNone)),
2615
    ]
2616
  REQ_BGL = False
2617

    
2618
  def CheckArguments(self):
2619
    """Check parameters
2620

2621
    """
2622
    if self.op.uid_pool:
2623
      uidpool.CheckUidPool(self.op.uid_pool)
2624

    
2625
    if self.op.add_uids:
2626
      uidpool.CheckUidPool(self.op.add_uids)
2627

    
2628
    if self.op.remove_uids:
2629
      uidpool.CheckUidPool(self.op.remove_uids)
2630

    
2631
  def ExpandNames(self):
2632
    # FIXME: in the future maybe other cluster params won't require checking on
2633
    # all nodes to be modified.
2634
    self.needed_locks = {
2635
      locking.LEVEL_NODE: locking.ALL_SET,
2636
    }
2637
    self.share_locks[locking.LEVEL_NODE] = 1
2638

    
2639
  def BuildHooksEnv(self):
2640
    """Build hooks env.
2641

2642
    """
2643
    env = {
2644
      "OP_TARGET": self.cfg.GetClusterName(),
2645
      "NEW_VG_NAME": self.op.vg_name,
2646
      }
2647
    mn = self.cfg.GetMasterNode()
2648
    return env, [mn], [mn]
2649

    
2650
  def CheckPrereq(self):
2651
    """Check prerequisites.
2652

2653
    This checks whether the given params don't conflict and
2654
    if the given volume group is valid.
2655

2656
    """
2657
    if self.op.vg_name is not None and not self.op.vg_name:
2658
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2659
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2660
                                   " instances exist", errors.ECODE_INVAL)
2661

    
2662
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2663
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2664
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2665
                                   " drbd-based instances exist",
2666
                                   errors.ECODE_INVAL)
2667

    
2668
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2669

    
2670
    # if vg_name not None, checks given volume group on all nodes
2671
    if self.op.vg_name:
2672
      vglist = self.rpc.call_vg_list(node_list)
2673
      for node in node_list:
2674
        msg = vglist[node].fail_msg
2675
        if msg:
2676
          # ignoring down node
2677
          self.LogWarning("Error while gathering data on node %s"
2678
                          " (ignoring node): %s", node, msg)
2679
          continue
2680
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2681
                                              self.op.vg_name,
2682
                                              constants.MIN_VG_SIZE)
2683
        if vgstatus:
2684
          raise errors.OpPrereqError("Error on node '%s': %s" %
2685
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2686

    
2687
    if self.op.drbd_helper:
2688
      # checks given drbd helper on all nodes
2689
      helpers = self.rpc.call_drbd_helper(node_list)
2690
      for node in node_list:
2691
        ninfo = self.cfg.GetNodeInfo(node)
2692
        if ninfo.offline:
2693
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2694
          continue
2695
        msg = helpers[node].fail_msg
2696
        if msg:
2697
          raise errors.OpPrereqError("Error checking drbd helper on node"
2698
                                     " '%s': %s" % (node, msg),
2699
                                     errors.ECODE_ENVIRON)
2700
        node_helper = helpers[node].payload
2701
        if node_helper != self.op.drbd_helper:
2702
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2703
                                     (node, node_helper), errors.ECODE_ENVIRON)
2704

    
2705
    self.cluster = cluster = self.cfg.GetClusterInfo()
2706
    # validate params changes
2707
    if self.op.beparams:
2708
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2709
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2710

    
2711
    if self.op.nicparams:
2712
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2713
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2714
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2715
      nic_errors = []
2716

    
2717
      # check all instances for consistency
2718
      for instance in self.cfg.GetAllInstancesInfo().values():
2719
        for nic_idx, nic in enumerate(instance.nics):
2720
          params_copy = copy.deepcopy(nic.nicparams)
2721
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2722

    
2723
          # check parameter syntax
2724
          try:
2725
            objects.NIC.CheckParameterSyntax(params_filled)
2726
          except errors.ConfigurationError, err:
2727
            nic_errors.append("Instance %s, nic/%d: %s" %
2728
                              (instance.name, nic_idx, err))
2729

    
2730
          # if we're moving instances to routed, check that they have an ip
2731
          target_mode = params_filled[constants.NIC_MODE]
2732
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2733
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2734
                              (instance.name, nic_idx))
2735
      if nic_errors:
2736
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2737
                                   "\n".join(nic_errors))
2738

    
2739
    # hypervisor list/parameters
2740
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2741
    if self.op.hvparams:
2742
      for hv_name, hv_dict in self.op.hvparams.items():
2743
        if hv_name not in self.new_hvparams:
2744
          self.new_hvparams[hv_name] = hv_dict
2745
        else:
2746
          self.new_hvparams[hv_name].update(hv_dict)
2747

    
2748
    # os hypervisor parameters
2749
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2750
    if self.op.os_hvp:
2751
      for os_name, hvs in self.op.os_hvp.items():
2752
        if os_name not in self.new_os_hvp:
2753
          self.new_os_hvp[os_name] = hvs
2754
        else:
2755
          for hv_name, hv_dict in hvs.items():
2756
            if hv_name not in self.new_os_hvp[os_name]:
2757
              self.new_os_hvp[os_name][hv_name] = hv_dict
2758
            else:
2759
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2760

    
2761
    # os parameters
2762
    self.new_osp = objects.FillDict(cluster.osparams, {})
2763
    if self.op.osparams:
2764
      for os_name, osp in self.op.osparams.items():
2765
        if os_name not in self.new_osp:
2766
          self.new_osp[os_name] = {}
2767

    
2768
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2769
                                                  use_none=True)
2770

    
2771
        if not self.new_osp[os_name]:
2772
          # we removed all parameters
2773
          del self.new_osp[os_name]
2774
        else:
2775
          # check the parameter validity (remote check)
2776
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2777
                         os_name, self.new_osp[os_name])
2778

    
2779
    # changes to the hypervisor list
2780
    if self.op.enabled_hypervisors is not None:
2781
      self.hv_list = self.op.enabled_hypervisors
2782
      for hv in self.hv_list:
2783
        # if the hypervisor doesn't already exist in the cluster
2784
        # hvparams, we initialize it to empty, and then (in both
2785
        # cases) we make sure to fill the defaults, as we might not
2786
        # have a complete defaults list if the hypervisor wasn't
2787
        # enabled before
2788
        if hv not in new_hvp:
2789
          new_hvp[hv] = {}
2790
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2791
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2792
    else:
2793
      self.hv_list = cluster.enabled_hypervisors
2794

    
2795
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2796
      # either the enabled list has changed, or the parameters have, validate
2797
      for hv_name, hv_params in self.new_hvparams.items():
2798
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2799
            (self.op.enabled_hypervisors and
2800
             hv_name in self.op.enabled_hypervisors)):
2801
          # either this is a new hypervisor, or its parameters have changed
2802
          hv_class = hypervisor.GetHypervisor(hv_name)
2803
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2804
          hv_class.CheckParameterSyntax(hv_params)
2805
          _CheckHVParams(self, node_list, hv_name, hv_params)
2806

    
2807
    if self.op.os_hvp:
2808
      # no need to check any newly-enabled hypervisors, since the
2809
      # defaults have already been checked in the above code-block
2810
      for os_name, os_hvp in self.new_os_hvp.items():
2811
        for hv_name, hv_params in os_hvp.items():
2812
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2813
          # we need to fill in the new os_hvp on top of the actual hv_p
2814
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2815
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2816
          hv_class = hypervisor.GetHypervisor(hv_name)
2817
          hv_class.CheckParameterSyntax(new_osp)
2818
          _CheckHVParams(self, node_list, hv_name, new_osp)
2819

    
2820
    if self.op.default_iallocator:
2821
      alloc_script = utils.FindFile(self.op.default_iallocator,
2822
                                    constants.IALLOCATOR_SEARCH_PATH,
2823
                                    os.path.isfile)
2824
      if alloc_script is None:
2825
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2826
                                   " specified" % self.op.default_iallocator,
2827
                                   errors.ECODE_INVAL)
2828

    
2829
  def Exec(self, feedback_fn):
2830
    """Change the parameters of the cluster.
2831

2832
    """
2833
    if self.op.vg_name is not None:
2834
      new_volume = self.op.vg_name
2835
      if not new_volume:
2836
        new_volume = None
2837
      if new_volume != self.cfg.GetVGName():
2838
        self.cfg.SetVGName(new_volume)
2839
      else:
2840
        feedback_fn("Cluster LVM configuration already in desired"
2841
                    " state, not changing")
2842
    if self.op.drbd_helper is not None:
2843
      new_helper = self.op.drbd_helper
2844
      if not new_helper:
2845
        new_helper = None
2846
      if new_helper != self.cfg.GetDRBDHelper():
2847
        self.cfg.SetDRBDHelper(new_helper)
2848
      else:
2849
        feedback_fn("Cluster DRBD helper already in desired state,"
2850
                    " not changing")
2851
    if self.op.hvparams:
2852
      self.cluster.hvparams = self.new_hvparams
2853
    if self.op.os_hvp:
2854
      self.cluster.os_hvp = self.new_os_hvp
2855
    if self.op.enabled_hypervisors is not None:
2856
      self.cluster.hvparams = self.new_hvparams
2857
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2858
    if self.op.beparams:
2859
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2860
    if self.op.nicparams:
2861
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2862
    if self.op.osparams:
2863
      self.cluster.osparams = self.new_osp
2864

    
2865
    if self.op.candidate_pool_size is not None:
2866
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2867
      # we need to update the pool size here, otherwise the save will fail
2868
      _AdjustCandidatePool(self, [])
2869

    
2870
    if self.op.maintain_node_health is not None:
2871
      self.cluster.maintain_node_health = self.op.maintain_node_health
2872

    
2873
    if self.op.add_uids is not None:
2874
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2875

    
2876
    if self.op.remove_uids is not None:
2877
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2878

    
2879
    if self.op.uid_pool is not None:
2880
      self.cluster.uid_pool = self.op.uid_pool
2881

    
2882
    if self.op.default_iallocator is not None:
2883
      self.cluster.default_iallocator = self.op.default_iallocator
2884

    
2885
    if self.op.reserved_lvs is not None:
2886
      self.cluster.reserved_lvs = self.op.reserved_lvs
2887

    
2888
    self.cfg.Update(self.cluster, feedback_fn)
2889

    
2890

    
2891
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2892
  """Distribute additional files which are part of the cluster configuration.
2893

2894
  ConfigWriter takes care of distributing the config and ssconf files, but
2895
  there are more files which should be distributed to all nodes. This function
2896
  makes sure those are copied.
2897

2898
  @param lu: calling logical unit
2899
  @param additional_nodes: list of nodes not in the config to distribute to
2900

2901
  """
2902
  # 1. Gather target nodes
2903
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2904
  dist_nodes = lu.cfg.GetOnlineNodeList()
2905
  if additional_nodes is not None:
2906
    dist_nodes.extend(additional_nodes)
2907
  if myself.name in dist_nodes:
2908
    dist_nodes.remove(myself.name)
2909

    
2910
  # 2. Gather files to distribute
2911
  dist_files = set([constants.ETC_HOSTS,
2912
                    constants.SSH_KNOWN_HOSTS_FILE,
2913
                    constants.RAPI_CERT_FILE,
2914
                    constants.RAPI_USERS_FILE,
2915
                    constants.CONFD_HMAC_KEY,
2916
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2917
                   ])
2918

    
2919
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2920
  for hv_name in enabled_hypervisors:
2921
    hv_class = hypervisor.GetHypervisor(hv_name)
2922
    dist_files.update(hv_class.GetAncillaryFiles())
2923

    
2924
  # 3. Perform the files upload
2925
  for fname in dist_files:
2926
    if os.path.exists(fname):
2927
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2928
      for to_node, to_result in result.items():
2929
        msg = to_result.fail_msg
2930
        if msg:
2931
          msg = ("Copy of file %s to node %s failed: %s" %
2932
                 (fname, to_node, msg))
2933
          lu.proc.LogWarning(msg)
2934

    
2935

    
2936
class LURedistributeConfig(NoHooksLU):
2937
  """Force the redistribution of cluster configuration.
2938

2939
  This is a very simple LU.
2940

2941
  """
2942
  REQ_BGL = False
2943

    
2944
  def ExpandNames(self):
2945
    self.needed_locks = {
2946
      locking.LEVEL_NODE: locking.ALL_SET,
2947
    }
2948
    self.share_locks[locking.LEVEL_NODE] = 1
2949

    
2950
  def Exec(self, feedback_fn):
2951
    """Redistribute the configuration.
2952

2953
    """
2954
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2955
    _RedistributeAncillaryFiles(self)
2956

    
2957

    
2958
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2959
  """Sleep and poll for an instance's disk to sync.
2960

2961
  """
2962
  if not instance.disks or disks is not None and not disks:
2963
    return True
2964

    
2965
  disks = _ExpandCheckDisks(instance, disks)
2966

    
2967
  if not oneshot:
2968
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2969

    
2970
  node = instance.primary_node
2971

    
2972
  for dev in disks:
2973
    lu.cfg.SetDiskID(dev, node)
2974

    
2975
  # TODO: Convert to utils.Retry
2976

    
2977
  retries = 0
2978
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2979
  while True:
2980
    max_time = 0
2981
    done = True
2982
    cumul_degraded = False
2983
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2984
    msg = rstats.fail_msg
2985
    if msg:
2986
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2987
      retries += 1
2988
      if retries >= 10:
2989
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2990
                                 " aborting." % node)
2991
      time.sleep(6)
2992
      continue
2993
    rstats = rstats.payload
2994
    retries = 0
2995
    for i, mstat in enumerate(rstats):
2996
      if mstat is None:
2997
        lu.LogWarning("Can't compute data for node %s/%s",
2998
                           node, disks[i].iv_name)
2999
        continue
3000

    
3001
      cumul_degraded = (cumul_degraded or
3002
                        (mstat.is_degraded and mstat.sync_percent is None))
3003
      if mstat.sync_percent is not None:
3004
        done = False
3005
        if mstat.estimated_time is not None:
3006
          rem_time = ("%s remaining (estimated)" %
3007
                      utils.FormatSeconds(mstat.estimated_time))
3008
          max_time = mstat.estimated_time
3009
        else:
3010
          rem_time = "no time estimate"
3011
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3012
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3013

    
3014
    # if we're done but degraded, let's do a few small retries, to
3015
    # make sure we see a stable and not transient situation; therefore
3016
    # we force restart of the loop
3017
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3018
      logging.info("Degraded disks found, %d retries left", degr_retries)
3019
      degr_retries -= 1
3020
      time.sleep(1)
3021
      continue
3022

    
3023
    if done or oneshot:
3024
      break
3025

    
3026
    time.sleep(min(60, max_time))
3027

    
3028
  if done:
3029
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3030
  return not cumul_degraded
3031

    
3032

    
3033
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3034
  """Check that mirrors are not degraded.
3035

3036
  The ldisk parameter, if True, will change the test from the
3037
  is_degraded attribute (which represents overall non-ok status for
3038
  the device(s)) to the ldisk (representing the local storage status).
3039

3040
  """
3041
  lu.cfg.SetDiskID(dev, node)
3042

    
3043
  result = True
3044

    
3045
  if on_primary or dev.AssembleOnSecondary():
3046
    rstats = lu.rpc.call_blockdev_find(node, dev)
3047
    msg = rstats.fail_msg
3048
    if msg:
3049
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3050
      result = False
3051
    elif not rstats.payload:
3052
      lu.LogWarning("Can't find disk on node %s", node)
3053
      result = False
3054
    else:
3055
      if ldisk:
3056
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3057
      else:
3058
        result = result and not rstats.payload.is_degraded
3059

    
3060
  if dev.children:
3061
    for child in dev.children:
3062
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3063

    
3064
  return result
3065

    
3066

    
3067
class LUDiagnoseOS(NoHooksLU):
3068
  """Logical unit for OS diagnose/query.
3069

3070
  """
3071
  _OP_PARAMS = [
3072
    _POutputFields,
3073
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3074
    ]
3075
  REQ_BGL = False
3076
  _FIELDS_STATIC = utils.FieldSet()
3077
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
3078
                                   "parameters", "api_versions")
3079

    
3080
  def CheckArguments(self):
3081
    if self.op.names:
3082
      raise errors.OpPrereqError("Selective OS query not supported",
3083
                                 errors.ECODE_INVAL)
3084

    
3085
    _CheckOutputFields(static=self._FIELDS_STATIC,
3086
                       dynamic=self._FIELDS_DYNAMIC,
3087
                       selected=self.op.output_fields)
3088

    
3089
  def ExpandNames(self):
3090
    # Lock all nodes, in shared mode
3091
    # Temporary removal of locks, should be reverted later
3092
    # TODO: reintroduce locks when they are lighter-weight
3093
    self.needed_locks = {}
3094
    #self.share_locks[locking.LEVEL_NODE] = 1
3095
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3096

    
3097
  @staticmethod
3098
  def _DiagnoseByOS(rlist):
3099
    """Remaps a per-node return list into an a per-os per-node dictionary
3100

3101
    @param rlist: a map with node names as keys and OS objects as values
3102

3103
    @rtype: dict
3104
    @return: a dictionary with osnames as keys and as value another
3105
        map, with nodes as keys and tuples of (path, status, diagnose,
3106
        variants, parameters, api_versions) as values, eg::
3107

3108
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3109
                                     (/srv/..., False, "invalid api")],
3110
                           "node2": [(/srv/..., True, "", [], [])]}
3111
          }
3112

3113
    """
3114
    all_os = {}
3115
    # we build here the list of nodes that didn't fail the RPC (at RPC
3116
    # level), so that nodes with a non-responding node daemon don't
3117
    # make all OSes invalid
3118
    good_nodes = [node_name for node_name in rlist
3119
                  if not rlist[node_name].fail_msg]
3120
    for node_name, nr in rlist.items():
3121
      if nr.fail_msg or not nr.payload:
3122
        continue
3123
      for (name, path, status, diagnose, variants,
3124
           params, api_versions) in nr.payload:
3125
        if name not in all_os:
3126
          # build a list of nodes for this os containing empty lists
3127
          # for each node in node_list
3128
          all_os[name] = {}
3129
          for nname in good_nodes:
3130
            all_os[name][nname] = []
3131
        # convert params from [name, help] to (name, help)
3132
        params = [tuple(v) for v in params]
3133
        all_os[name][node_name].append((path, status, diagnose,
3134
                                        variants, params, api_versions))
3135
    return all_os
3136

    
3137
  def Exec(self, feedback_fn):
3138
    """Compute the list of OSes.
3139

3140
    """
3141
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3142
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3143
    pol = self._DiagnoseByOS(node_data)
3144
    output = []
3145

    
3146
    for os_name, os_data in pol.items():
3147
      row = []
3148
      valid = True
3149
      (variants, params, api_versions) = null_state = (set(), set(), set())
3150
      for idx, osl in enumerate(os_data.values()):
3151
        valid = bool(valid and osl and osl[0][1])
3152
        if not valid:
3153
          (variants, params, api_versions) = null_state
3154
          break
3155
        node_variants, node_params, node_api = osl[0][3:6]
3156
        if idx == 0: # first entry
3157
          variants = set(node_variants)
3158
          params = set(node_params)
3159
          api_versions = set(node_api)
3160
        else: # keep consistency
3161
          variants.intersection_update(node_variants)
3162
          params.intersection_update(node_params)
3163
          api_versions.intersection_update(node_api)
3164

    
3165
      for field in self.op.output_fields:
3166
        if field == "name":
3167
          val = os_name
3168
        elif field == "valid":
3169
          val = valid
3170
        elif field == "node_status":
3171
          # this is just a copy of the dict
3172
          val = {}
3173
          for node_name, nos_list in os_data.items():
3174
            val[node_name] = nos_list
3175
        elif field == "variants":
3176
          val = list(variants)
3177
        elif field == "parameters":
3178
          val = list(params)
3179
        elif field == "api_versions":
3180
          val = list(api_versions)
3181
        else:
3182
          raise errors.ParameterError(field)
3183
        row.append(val)
3184
      output.append(row)
3185

    
3186
    return output
3187

    
3188

    
3189
class LURemoveNode(LogicalUnit):
3190
  """Logical unit for removing a node.
3191

3192
  """
3193
  HPATH = "node-remove"
3194
  HTYPE = constants.HTYPE_NODE
3195
  _OP_PARAMS = [
3196
    _PNodeName,
3197
    ]
3198

    
3199
  def BuildHooksEnv(self):
3200
    """Build hooks env.
3201

3202
    This doesn't run on the target node in the pre phase as a failed
3203
    node would then be impossible to remove.
3204

3205
    """
3206
    env = {
3207
      "OP_TARGET": self.op.node_name,
3208
      "NODE_NAME": self.op.node_name,
3209
      }
3210
    all_nodes = self.cfg.GetNodeList()
3211
    try:
3212
      all_nodes.remove(self.op.node_name)
3213
    except ValueError:
3214
      logging.warning("Node %s which is about to be removed not found"
3215
                      " in the all nodes list", self.op.node_name)
3216
    return env, all_nodes, all_nodes
3217

    
3218
  def CheckPrereq(self):
3219
    """Check prerequisites.
3220

3221
    This checks:
3222
     - the node exists in the configuration
3223
     - it does not have primary or secondary instances
3224
     - it's not the master
3225

3226
    Any errors are signaled by raising errors.OpPrereqError.
3227

3228
    """
3229
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3230
    node = self.cfg.GetNodeInfo(self.op.node_name)
3231
    assert node is not None
3232

    
3233
    instance_list = self.cfg.GetInstanceList()
3234

    
3235
    masternode = self.cfg.GetMasterNode()
3236
    if node.name == masternode:
3237
      raise errors.OpPrereqError("Node is the master node,"
3238
                                 " you need to failover first.",
3239
                                 errors.ECODE_INVAL)
3240

    
3241
    for instance_name in instance_list:
3242
      instance = self.cfg.GetInstanceInfo(instance_name)
3243
      if node.name in instance.all_nodes:
3244
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3245
                                   " please remove first." % instance_name,
3246
                                   errors.ECODE_INVAL)
3247
    self.op.node_name = node.name
3248
    self.node = node
3249

    
3250
  def Exec(self, feedback_fn):
3251
    """Removes the node from the cluster.
3252

3253
    """
3254
    node = self.node
3255
    logging.info("Stopping the node daemon and removing configs from node %s",
3256
                 node.name)
3257

    
3258
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3259

    
3260
    # Promote nodes to master candidate as needed
3261
    _AdjustCandidatePool(self, exceptions=[node.name])
3262
    self.context.RemoveNode(node.name)
3263

    
3264
    # Run post hooks on the node before it's removed
3265
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3266
    try:
3267
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3268
    except:
3269
      # pylint: disable-msg=W0702
3270
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3271

    
3272
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3273
    msg = result.fail_msg
3274
    if msg:
3275
      self.LogWarning("Errors encountered on the remote node while leaving"
3276
                      " the cluster: %s", msg)
3277

    
3278
    # Remove node from our /etc/hosts
3279
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3280
      # FIXME: this should be done via an rpc call to node daemon
3281
      utils.RemoveHostFromEtcHosts(node.name)
3282
      _RedistributeAncillaryFiles(self)
3283

    
3284

    
3285
class LUQueryNodes(NoHooksLU):
3286
  """Logical unit for querying nodes.
3287

3288
  """
3289
  # pylint: disable-msg=W0142
3290
  _OP_PARAMS = [
3291
    _POutputFields,
3292
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3293
    ("use_locking", False, _TBool),
3294
    ]
3295
  REQ_BGL = False
3296

    
3297
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3298
                    "master_candidate", "offline", "drained"]
3299

    
3300
  _FIELDS_DYNAMIC = utils.FieldSet(
3301
    "dtotal", "dfree",
3302
    "mtotal", "mnode", "mfree",
3303
    "bootid",
3304
    "ctotal", "cnodes", "csockets",
3305
    )
3306

    
3307
  _FIELDS_STATIC = utils.FieldSet(*[
3308
    "pinst_cnt", "sinst_cnt",
3309
    "pinst_list", "sinst_list",
3310
    "pip", "sip", "tags",
3311
    "master",
3312
    "role"] + _SIMPLE_FIELDS
3313
    )
3314

    
3315
  def CheckArguments(self):
3316
    _CheckOutputFields(static=self._FIELDS_STATIC,
3317
                       dynamic=self._FIELDS_DYNAMIC,
3318
                       selected=self.op.output_fields)
3319

    
3320
  def ExpandNames(self):
3321
    self.needed_locks = {}
3322
    self.share_locks[locking.LEVEL_NODE] = 1
3323

    
3324
    if self.op.names:
3325
      self.wanted = _GetWantedNodes(self, self.op.names)
3326
    else:
3327
      self.wanted = locking.ALL_SET
3328

    
3329
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3330
    self.do_locking = self.do_node_query and self.op.use_locking
3331
    if self.do_locking:
3332
      # if we don't request only static fields, we need to lock the nodes
3333
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
3334

    
3335
  def Exec(self, feedback_fn):
3336
    """Computes the list of nodes and their attributes.
3337

3338
    """
3339
    all_info = self.cfg.GetAllNodesInfo()
3340
    if self.do_locking:
3341
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
3342
    elif self.wanted != locking.ALL_SET:
3343
      nodenames = self.wanted
3344
      missing = set(nodenames).difference(all_info.keys())
3345
      if missing:
3346
        raise errors.OpExecError(
3347
          "Some nodes were removed before retrieving their data: %s" % missing)
3348
    else:
3349
      nodenames = all_info.keys()
3350

    
3351
    nodenames = utils.NiceSort(nodenames)
3352
    nodelist = [all_info[name] for name in nodenames]
3353

    
3354
    # begin data gathering
3355

    
3356
    if self.do_node_query:
3357
      live_data = {}
3358
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3359
                                          self.cfg.GetHypervisorType())
3360
      for name in nodenames:
3361
        nodeinfo = node_data[name]
3362
        if not nodeinfo.fail_msg and nodeinfo.payload:
3363
          nodeinfo = nodeinfo.payload
3364
          fn = utils.TryConvert
3365
          live_data[name] = {
3366
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3367
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3368
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
3369
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3370
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3371
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3372
            "bootid": nodeinfo.get('bootid', None),
3373
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3374
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3375
            }
3376
        else:
3377
          live_data[name] = {}
3378
    else:
3379
      live_data = dict.fromkeys(nodenames, {})
3380

    
3381
    node_to_primary = dict([(name, set()) for name in nodenames])
3382
    node_to_secondary = dict([(name, set()) for name in nodenames])
3383

    
3384
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3385
                             "sinst_cnt", "sinst_list"))
3386
    if inst_fields & frozenset(self.op.output_fields):
3387
      inst_data = self.cfg.GetAllInstancesInfo()
3388

    
3389
      for inst in inst_data.values():
3390
        if inst.primary_node in node_to_primary:
3391
          node_to_primary[inst.primary_node].add(inst.name)
3392
        for secnode in inst.secondary_nodes:
3393
          if secnode in node_to_secondary:
3394
            node_to_secondary[secnode].add(inst.name)
3395

    
3396
    master_node = self.cfg.GetMasterNode()
3397

    
3398
    # end data gathering
3399

    
3400
    output = []
3401
    for node in nodelist:
3402
      node_output = []
3403
      for field in self.op.output_fields:
3404
        if field in self._SIMPLE_FIELDS:
3405
          val = getattr(node, field)
3406
        elif field == "pinst_list":
3407
          val = list(node_to_primary[node.name])
3408
        elif field == "sinst_list":
3409
          val = list(node_to_secondary[node.name])
3410
        elif field == "pinst_cnt":
3411
          val = len(node_to_primary[node.name])
3412
        elif field == "sinst_cnt":
3413
          val = len(node_to_secondary[node.name])
3414
        elif field == "pip":
3415
          val = node.primary_ip
3416
        elif field == "sip":
3417
          val = node.secondary_ip
3418
        elif field == "tags":
3419
          val = list(node.GetTags())
3420
        elif field == "master":
3421
          val = node.name == master_node
3422
        elif self._FIELDS_DYNAMIC.Matches(field):
3423
          val = live_data[node.name].get(field, None)
3424
        elif field == "role":
3425
          if node.name == master_node:
3426
            val = "M"
3427
          elif node.master_candidate:
3428
            val = "C"
3429
          elif node.drained:
3430
            val = "D"
3431
          elif node.offline:
3432
            val = "O"
3433
          else:
3434
            val = "R"
3435
        else:
3436
          raise errors.ParameterError(field)
3437
        node_output.append(val)
3438
      output.append(node_output)
3439

    
3440
    return output
3441

    
3442

    
3443
class LUQueryNodeVolumes(NoHooksLU):
3444
  """Logical unit for getting volumes on node(s).
3445

3446
  """
3447
  _OP_PARAMS = [
3448
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3449
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3450
    ]
3451
  REQ_BGL = False
3452
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3453
  _FIELDS_STATIC = utils.FieldSet("node")
3454

    
3455
  def CheckArguments(self):
3456
    _CheckOutputFields(static=self._FIELDS_STATIC,
3457
                       dynamic=self._FIELDS_DYNAMIC,
3458
                       selected=self.op.output_fields)
3459

    
3460
  def ExpandNames(self):
3461
    self.needed_locks = {}
3462
    self.share_locks[locking.LEVEL_NODE] = 1
3463
    if not self.op.nodes:
3464
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3465
    else:
3466
      self.needed_locks[locking.LEVEL_NODE] = \
3467
        _GetWantedNodes(self, self.op.nodes)
3468

    
3469
  def Exec(self, feedback_fn):
3470
    """Computes the list of nodes and their attributes.
3471

3472
    """
3473
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3474
    volumes = self.rpc.call_node_volumes(nodenames)
3475

    
3476
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3477
             in self.cfg.GetInstanceList()]
3478

    
3479
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3480

    
3481
    output = []
3482
    for node in nodenames:
3483
      nresult = volumes[node]
3484
      if nresult.offline:
3485
        continue
3486
      msg = nresult.fail_msg
3487
      if msg:
3488
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3489
        continue
3490

    
3491
      node_vols = nresult.payload[:]
3492
      node_vols.sort(key=lambda vol: vol['dev'])
3493

    
3494
      for vol in node_vols:
3495
        node_output = []
3496
        for field in self.op.output_fields:
3497
          if field == "node":
3498
            val = node
3499
          elif field == "phys":
3500
            val = vol['dev']
3501
          elif field == "vg":
3502
            val = vol['vg']
3503
          elif field == "name":
3504
            val = vol['name']
3505
          elif field == "size":
3506
            val = int(float(vol['size']))
3507
          elif field == "instance":
3508
            for inst in ilist:
3509
              if node not in lv_by_node[inst]:
3510
                continue
3511
              if vol['name'] in lv_by_node[inst][node]:
3512
                val = inst.name
3513
                break
3514
            else:
3515
              val = '-'
3516
          else:
3517
            raise errors.ParameterError(field)
3518
          node_output.append(str(val))
3519

    
3520
        output.append(node_output)
3521

    
3522
    return output
3523

    
3524

    
3525
class LUQueryNodeStorage(NoHooksLU):
3526
  """Logical unit for getting information on storage units on node(s).
3527

3528
  """
3529
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3530
  _OP_PARAMS = [
3531
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3532
    ("storage_type", _NoDefault, _CheckStorageType),
3533
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3534
    ("name", None, _TMaybeString),
3535
    ]
3536
  REQ_BGL = False
3537

    
3538
  def CheckArguments(self):
3539
    _CheckOutputFields(static=self._FIELDS_STATIC,
3540
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3541
                       selected=self.op.output_fields)
3542

    
3543
  def ExpandNames(self):
3544
    self.needed_locks = {}
3545
    self.share_locks[locking.LEVEL_NODE] = 1
3546

    
3547
    if self.op.nodes:
3548
      self.needed_locks[locking.LEVEL_NODE] = \
3549
        _GetWantedNodes(self, self.op.nodes)
3550
    else:
3551
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3552

    
3553
  def Exec(self, feedback_fn):
3554
    """Computes the list of nodes and their attributes.
3555

3556
    """
3557
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3558

    
3559
    # Always get name to sort by
3560
    if constants.SF_NAME in self.op.output_fields:
3561
      fields = self.op.output_fields[:]
3562
    else:
3563
      fields = [constants.SF_NAME] + self.op.output_fields
3564

    
3565
    # Never ask for node or type as it's only known to the LU
3566
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3567
      while extra in fields:
3568
        fields.remove(extra)
3569

    
3570
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3571
    name_idx = field_idx[constants.SF_NAME]
3572

    
3573
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3574
    data = self.rpc.call_storage_list(self.nodes,
3575
                                      self.op.storage_type, st_args,
3576
                                      self.op.name, fields)
3577

    
3578
    result = []
3579

    
3580
    for node in utils.NiceSort(self.nodes):
3581
      nresult = data[node]
3582
      if nresult.offline:
3583
        continue
3584

    
3585
      msg = nresult.fail_msg
3586
      if msg:
3587
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3588
        continue
3589

    
3590
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3591

    
3592
      for name in utils.NiceSort(rows.keys()):
3593
        row = rows[name]
3594

    
3595
        out = []
3596

    
3597
        for field in self.op.output_fields:
3598
          if field == constants.SF_NODE:
3599
            val = node
3600
          elif field == constants.SF_TYPE:
3601
            val = self.op.storage_type
3602
          elif field in field_idx:
3603
            val = row[field_idx[field]]
3604
          else:
3605
            raise errors.ParameterError(field)
3606

    
3607
          out.append(val)
3608

    
3609
        result.append(out)
3610

    
3611
    return result
3612

    
3613

    
3614
class LUModifyNodeStorage(NoHooksLU):
3615
  """Logical unit for modifying a storage volume on a node.
3616

3617
  """
3618
  _OP_PARAMS = [
3619
    _PNodeName,
3620
    ("storage_type", _NoDefault, _CheckStorageType),
3621
    ("name", _NoDefault, _TNonEmptyString),
3622
    ("changes", _NoDefault, _TDict),
3623
    ]
3624
  REQ_BGL = False
3625

    
3626
  def CheckArguments(self):
3627
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3628

    
3629
    storage_type = self.op.storage_type
3630

    
3631
    try:
3632
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3633
    except KeyError:
3634
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3635
                                 " modified" % storage_type,
3636
                                 errors.ECODE_INVAL)
3637

    
3638
    diff = set(self.op.changes.keys()) - modifiable
3639
    if diff:
3640
      raise errors.OpPrereqError("The following fields can not be modified for"
3641
                                 " storage units of type '%s': %r" %
3642
                                 (storage_type, list(diff)),
3643
                                 errors.ECODE_INVAL)
3644

    
3645
  def ExpandNames(self):
3646
    self.needed_locks = {
3647
      locking.LEVEL_NODE: self.op.node_name,
3648
      }
3649

    
3650
  def Exec(self, feedback_fn):
3651
    """Computes the list of nodes and their attributes.
3652

3653
    """
3654
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3655
    result = self.rpc.call_storage_modify(self.op.node_name,
3656
                                          self.op.storage_type, st_args,
3657
                                          self.op.name, self.op.changes)
3658
    result.Raise("Failed to modify storage unit '%s' on %s" %
3659
                 (self.op.name, self.op.node_name))
3660

    
3661

    
3662
class LUAddNode(LogicalUnit):
3663
  """Logical unit for adding node to the cluster.
3664

3665
  """
3666
  HPATH = "node-add"
3667
  HTYPE = constants.HTYPE_NODE
3668
  _OP_PARAMS = [
3669
    _PNodeName,
3670
    ("primary_ip", None, _NoType),
3671
    ("secondary_ip", None, _TMaybeString),
3672
    ("readd", False, _TBool),
3673
    ]
3674

    
3675
  def CheckArguments(self):
3676
    # validate/normalize the node name
3677
    self.op.node_name = netutils.HostInfo.NormalizeName(self.op.node_name)
3678

    
3679
  def BuildHooksEnv(self):
3680
    """Build hooks env.
3681

3682
    This will run on all nodes before, and on all nodes + the new node after.
3683

3684
    """
3685
    env = {
3686
      "OP_TARGET": self.op.node_name,
3687
      "NODE_NAME": self.op.node_name,
3688
      "NODE_PIP": self.op.primary_ip,
3689
      "NODE_SIP": self.op.secondary_ip,
3690
      }
3691
    nodes_0 = self.cfg.GetNodeList()
3692
    nodes_1 = nodes_0 + [self.op.node_name, ]
3693
    return env, nodes_0, nodes_1
3694

    
3695
  def CheckPrereq(self):
3696
    """Check prerequisites.
3697

3698
    This checks:
3699
     - the new node is not already in the config
3700
     - it is resolvable
3701
     - its parameters (single/dual homed) matches the cluster
3702

3703
    Any errors are signaled by raising errors.OpPrereqError.
3704

3705
    """
3706
    node_name = self.op.node_name
3707
    cfg = self.cfg
3708

    
3709
    dns_data = netutils.GetHostInfo(node_name)
3710

    
3711
    node = dns_data.name
3712
    primary_ip = self.op.primary_ip = dns_data.ip
3713
    if self.op.secondary_ip is None:
3714
      self.op.secondary_ip = primary_ip
3715
    if not netutils.IsValidIP4(self.op.secondary_ip):
3716
      raise errors.OpPrereqError("Invalid secondary IP given",
3717
                                 errors.ECODE_INVAL)
3718
    secondary_ip = self.op.secondary_ip
3719

    
3720
    node_list = cfg.GetNodeList()
3721
    if not self.op.readd and node in node_list:
3722
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3723
                                 node, errors.ECODE_EXISTS)
3724
    elif self.op.readd and node not in node_list:
3725
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3726
                                 errors.ECODE_NOENT)
3727

    
3728
    self.changed_primary_ip = False
3729

    
3730
    for existing_node_name in node_list:
3731
      existing_node = cfg.GetNodeInfo(existing_node_name)
3732

    
3733
      if self.op.readd and node == existing_node_name:
3734
        if existing_node.secondary_ip != secondary_ip:
3735
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3736
                                     " address configuration as before",
3737
                                     errors.ECODE_INVAL)
3738
        if existing_node.primary_ip != primary_ip:
3739
          self.changed_primary_ip = True
3740

    
3741
        continue
3742

    
3743
      if (existing_node.primary_ip == primary_ip or
3744
          existing_node.secondary_ip == primary_ip or
3745
          existing_node.primary_ip == secondary_ip or
3746
          existing_node.secondary_ip == secondary_ip):
3747
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3748
                                   " existing node %s" % existing_node.name,
3749
                                   errors.ECODE_NOTUNIQUE)
3750

    
3751
    # check that the type of the node (single versus dual homed) is the
3752
    # same as for the master
3753
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3754
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3755
    newbie_singlehomed = secondary_ip == primary_ip
3756
    if master_singlehomed != newbie_singlehomed:
3757
      if master_singlehomed:
3758
        raise errors.OpPrereqError("The master has no private ip but the"
3759
                                   " new node has one",
3760
                                   errors.ECODE_INVAL)
3761
      else:
3762
        raise errors.OpPrereqError("The master has a private ip but the"
3763
                                   " new node doesn't have one",
3764
                                   errors.ECODE_INVAL)
3765

    
3766
    # checks reachability
3767
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3768
      raise errors.OpPrereqError("Node not reachable by ping",
3769
                                 errors.ECODE_ENVIRON)
3770

    
3771
    if not newbie_singlehomed:
3772
      # check reachability from my secondary ip to newbie's secondary ip
3773
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3774
                           source=myself.secondary_ip):
3775
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3776
                                   " based ping to noded port",
3777
                                   errors.ECODE_ENVIRON)
3778

    
3779
    if self.op.readd:
3780
      exceptions = [node]
3781
    else:
3782
      exceptions = []
3783

    
3784
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3785

    
3786
    if self.op.readd:
3787
      self.new_node = self.cfg.GetNodeInfo(node)
3788
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3789
    else:
3790
      self.new_node = objects.Node(name=node,
3791
                                   primary_ip=primary_ip,
3792
                                   secondary_ip=secondary_ip,
3793
                                   master_candidate=self.master_candidate,
3794
                                   offline=False, drained=False)
3795

    
3796
  def Exec(self, feedback_fn):
3797
    """Adds the new node to the cluster.
3798

3799
    """
3800
    new_node = self.new_node
3801
    node = new_node.name
3802

    
3803
    # for re-adds, reset the offline/drained/master-candidate flags;
3804
    # we need to reset here, otherwise offline would prevent RPC calls
3805
    # later in the procedure; this also means that if the re-add
3806
    # fails, we are left with a non-offlined, broken node
3807
    if self.op.readd:
3808
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3809
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3810
      # if we demote the node, we do cleanup later in the procedure
3811
      new_node.master_candidate = self.master_candidate
3812
      if self.changed_primary_ip:
3813
        new_node.primary_ip = self.op.primary_ip
3814

    
3815
    # notify the user about any possible mc promotion
3816
    if new_node.master_candidate:
3817
      self.LogInfo("Node will be a master candidate")
3818

    
3819
    # check connectivity
3820
    result = self.rpc.call_version([node])[node]
3821
    result.Raise("Can't get version information from node %s" % node)
3822
    if constants.PROTOCOL_VERSION == result.payload:
3823
      logging.info("Communication to node %s fine, sw version %s match",
3824
                   node, result.payload)
3825
    else:
3826
      raise errors.OpExecError("Version mismatch master version %s,"
3827
                               " node version %s" %
3828
                               (constants.PROTOCOL_VERSION, result.payload))
3829

    
3830
    # setup ssh on node
3831
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3832
      logging.info("Copy ssh key to node %s", node)
3833
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3834
      keyarray = []
3835
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3836
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3837
                  priv_key, pub_key]
3838

    
3839
      for i in keyfiles:
3840
        keyarray.append(utils.ReadFile(i))
3841

    
3842
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3843
                                      keyarray[2], keyarray[3], keyarray[4],
3844
                                      keyarray[5])
3845
      result.Raise("Cannot transfer ssh keys to the new node")
3846

    
3847
    # Add node to our /etc/hosts, and add key to known_hosts
3848
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3849
      # FIXME: this should be done via an rpc call to node daemon
3850
      utils.AddHostToEtcHosts(new_node.name)
3851

    
3852
    if new_node.secondary_ip != new_node.primary_ip:
3853
      result = self.rpc.call_node_has_ip_address(new_node.name,
3854
                                                 new_node.secondary_ip)
3855
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3856
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3857
      if not result.payload:
3858
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3859
                                 " you gave (%s). Please fix and re-run this"
3860
                                 " command." % new_node.secondary_ip)
3861

    
3862
    node_verify_list = [self.cfg.GetMasterNode()]
3863
    node_verify_param = {
3864
      constants.NV_NODELIST: [node],
3865
      # TODO: do a node-net-test as well?
3866
    }
3867

    
3868
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3869
                                       self.cfg.GetClusterName())
3870
    for verifier in node_verify_list:
3871
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3872
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3873
      if nl_payload:
3874
        for failed in nl_payload:
3875
          feedback_fn("ssh/hostname verification failed"
3876
                      " (checking from %s): %s" %
3877
                      (verifier, nl_payload[failed]))
3878
        raise errors.OpExecError("ssh/hostname verification failed.")
3879

    
3880
    if self.op.readd:
3881
      _RedistributeAncillaryFiles(self)
3882
      self.context.ReaddNode(new_node)
3883
      # make sure we redistribute the config
3884
      self.cfg.Update(new_node, feedback_fn)
3885
      # and make sure the new node will not have old files around
3886
      if not new_node.master_candidate:
3887
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3888
        msg = result.fail_msg
3889
        if msg:
3890
          self.LogWarning("Node failed to demote itself from master"
3891
                          " candidate status: %s" % msg)
3892
    else:
3893
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3894
      self.context.AddNode(new_node, self.proc.GetECId())
3895

    
3896

    
3897
class LUSetNodeParams(LogicalUnit):
3898
  """Modifies the parameters of a node.
3899

3900
  """
3901
  HPATH = "node-modify"
3902
  HTYPE = constants.HTYPE_NODE
3903
  _OP_PARAMS = [
3904
    _PNodeName,
3905
    ("master_candidate", None, _TMaybeBool),
3906
    ("offline", None, _TMaybeBool),
3907
    ("drained", None, _TMaybeBool),
3908
    ("auto_promote", False, _TBool),
3909
    _PForce,
3910
    ]
3911
  REQ_BGL = False
3912

    
3913
  def CheckArguments(self):
3914
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3915
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3916
    if all_mods.count(None) == 3:
3917
      raise errors.OpPrereqError("Please pass at least one modification",
3918
                                 errors.ECODE_INVAL)
3919
    if all_mods.count(True) > 1:
3920
      raise errors.OpPrereqError("Can't set the node into more than one"
3921
                                 " state at the same time",
3922
                                 errors.ECODE_INVAL)
3923

    
3924
    # Boolean value that tells us whether we're offlining or draining the node
3925
    self.offline_or_drain = (self.op.offline == True or
3926
                             self.op.drained == True)
3927
    self.deoffline_or_drain = (self.op.offline == False or
3928
                               self.op.drained == False)
3929
    self.might_demote = (self.op.master_candidate == False or
3930
                         self.offline_or_drain)
3931

    
3932
    self.lock_all = self.op.auto_promote and self.might_demote
3933

    
3934

    
3935
  def ExpandNames(self):
3936
    if self.lock_all:
3937
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3938
    else:
3939
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3940

    
3941
  def BuildHooksEnv(self):
3942
    """Build hooks env.
3943

3944
    This runs on the master node.
3945

3946
    """
3947
    env = {
3948
      "OP_TARGET": self.op.node_name,
3949
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3950
      "OFFLINE": str(self.op.offline),
3951
      "DRAINED": str(self.op.drained),
3952
      }
3953
    nl = [self.cfg.GetMasterNode(),
3954
          self.op.node_name]
3955
    return env, nl, nl
3956

    
3957
  def CheckPrereq(self):
3958
    """Check prerequisites.
3959

3960
    This only checks the instance list against the existing names.
3961

3962
    """
3963
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3964

    
3965
    if (self.op.master_candidate is not None or
3966
        self.op.drained is not None or
3967
        self.op.offline is not None):
3968
      # we can't change the master's node flags
3969
      if self.op.node_name == self.cfg.GetMasterNode():
3970
        raise errors.OpPrereqError("The master role can be changed"
3971
                                   " only via master-failover",
3972
                                   errors.ECODE_INVAL)
3973

    
3974

    
3975
    if node.master_candidate and self.might_demote and not self.lock_all:
3976
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3977
      # check if after removing the current node, we're missing master
3978
      # candidates
3979
      (mc_remaining, mc_should, _) = \
3980
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3981
      if mc_remaining < mc_should:
3982
        raise errors.OpPrereqError("Not enough master candidates, please"
3983
                                   " pass auto_promote to allow promotion",
3984
                                   errors.ECODE_INVAL)
3985

    
3986
    if (self.op.master_candidate == True and
3987
        ((node.offline and not self.op.offline == False) or
3988
         (node.drained and not self.op.drained == False))):
3989
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3990
                                 " to master_candidate" % node.name,
3991
                                 errors.ECODE_INVAL)
3992

    
3993
    # If we're being deofflined/drained, we'll MC ourself if needed
3994
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3995
        self.op.master_candidate == True and not node.master_candidate):
3996
      self.op.master_candidate = _DecideSelfPromotion(self)
3997
      if self.op.master_candidate:
3998
        self.LogInfo("Autopromoting node to master candidate")
3999

    
4000
    return
4001

    
4002
  def Exec(self, feedback_fn):
4003
    """Modifies a node.
4004

4005
    """
4006
    node = self.node
4007

    
4008
    result = []
4009
    changed_mc = False
4010

    
4011
    if self.op.offline is not None:
4012
      node.offline = self.op.offline
4013
      result.append(("offline", str(self.op.offline)))
4014
      if self.op.offline == True:
4015
        if node.master_candidate:
4016
          node.master_candidate = False
4017
          changed_mc = True
4018
          result.append(("master_candidate", "auto-demotion due to offline"))
4019
        if node.drained:
4020
          node.drained = False
4021
          result.append(("drained", "clear drained status due to offline"))
4022

    
4023
    if self.op.master_candidate is not None:
4024
      node.master_candidate = self.op.master_candidate
4025
      changed_mc = True
4026
      result.append(("master_candidate", str(self.op.master_candidate)))
4027
      if self.op.master_candidate == False:
4028
        rrc = self.rpc.call_node_demote_from_mc(node.name)
4029
        msg = rrc.fail_msg
4030
        if msg:
4031
          self.LogWarning("Node failed to demote itself: %s" % msg)
4032

    
4033
    if self.op.drained is not None:
4034
      node.drained = self.op.drained
4035
      result.append(("drained", str(self.op.drained)))
4036
      if self.op.drained == True:
4037
        if node.master_candidate:
4038
          node.master_candidate = False
4039
          changed_mc = True
4040
          result.append(("master_candidate", "auto-demotion due to drain"))
4041
          rrc = self.rpc.call_node_demote_from_mc(node.name)
4042
          msg = rrc.fail_msg
4043
          if msg:
4044
            self.LogWarning("Node failed to demote itself: %s" % msg)
4045
        if node.offline:
4046
          node.offline = False
4047
          result.append(("offline", "clear offline status due to drain"))
4048

    
4049
    # we locked all nodes, we adjust the CP before updating this node
4050
    if self.lock_all:
4051
      _AdjustCandidatePool(self, [node.name])
4052

    
4053
    # this will trigger configuration file update, if needed
4054
    self.cfg.Update(node, feedback_fn)
4055

    
4056
    # this will trigger job queue propagation or cleanup
4057
    if changed_mc:
4058
      self.context.ReaddNode(node)
4059

    
4060
    return result
4061

    
4062

    
4063
class LUPowercycleNode(NoHooksLU):
4064
  """Powercycles a node.
4065

4066
  """
4067
  _OP_PARAMS = [
4068
    _PNodeName,
4069
    _PForce,
4070
    ]
4071
  REQ_BGL = False
4072

    
4073
  def CheckArguments(self):
4074
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4075
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4076
      raise errors.OpPrereqError("The node is the master and the force"
4077
                                 " parameter was not set",
4078
                                 errors.ECODE_INVAL)
4079

    
4080
  def ExpandNames(self):
4081
    """Locking for PowercycleNode.
4082

4083
    This is a last-resort option and shouldn't block on other
4084
    jobs. Therefore, we grab no locks.
4085

4086
    """
4087
    self.needed_locks = {}
4088

    
4089
  def Exec(self, feedback_fn):
4090
    """Reboots a node.
4091

4092
    """
4093
    result = self.rpc.call_node_powercycle(self.op.node_name,
4094
                                           self.cfg.GetHypervisorType())
4095
    result.Raise("Failed to schedule the reboot")
4096
    return result.payload
4097

    
4098

    
4099
class LUQueryClusterInfo(NoHooksLU):
4100
  """Query cluster configuration.
4101

4102
  """
4103
  REQ_BGL = False
4104

    
4105
  def ExpandNames(self):
4106
    self.needed_locks = {}
4107

    
4108
  def Exec(self, feedback_fn):
4109
    """Return cluster config.
4110

4111
    """
4112
    cluster = self.cfg.GetClusterInfo()
4113
    os_hvp = {}
4114

    
4115
    # Filter just for enabled hypervisors
4116
    for os_name, hv_dict in cluster.os_hvp.items():
4117
      os_hvp[os_name] = {}
4118
      for hv_name, hv_params in hv_dict.items():
4119
        if hv_name in cluster.enabled_hypervisors:
4120
          os_hvp[os_name][hv_name] = hv_params
4121

    
4122
    result = {
4123
      "software_version": constants.RELEASE_VERSION,
4124
      "protocol_version": constants.PROTOCOL_VERSION,
4125
      "config_version": constants.CONFIG_VERSION,
4126
      "os_api_version": max(constants.OS_API_VERSIONS),
4127
      "export_version": constants.EXPORT_VERSION,
4128
      "architecture": (platform.architecture()[0], platform.machine()),
4129
      "name": cluster.cluster_name,
4130
      "master": cluster.master_node,
4131
      "default_hypervisor": cluster.enabled_hypervisors[0],
4132
      "enabled_hypervisors": cluster.enabled_hypervisors,
4133
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4134
                        for hypervisor_name in cluster.enabled_hypervisors]),
4135
      "os_hvp": os_hvp,
4136
      "beparams": cluster.beparams,
4137
      "osparams": cluster.osparams,
4138
      "nicparams": cluster.nicparams,
4139
      "candidate_pool_size": cluster.candidate_pool_size,
4140
      "master_netdev": cluster.master_netdev,
4141
      "volume_group_name": cluster.volume_group_name,
4142
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4143
      "file_storage_dir": cluster.file_storage_dir,
4144
      "maintain_node_health": cluster.maintain_node_health,
4145
      "ctime": cluster.ctime,
4146
      "mtime": cluster.mtime,
4147
      "uuid": cluster.uuid,
4148
      "tags": list(cluster.GetTags()),
4149
      "uid_pool": cluster.uid_pool,
4150
      "default_iallocator": cluster.default_iallocator,
4151
      "reserved_lvs": cluster.reserved_lvs,
4152
      }
4153

    
4154
    return result
4155

    
4156

    
4157
class LUQueryConfigValues(NoHooksLU):
4158
  """Return configuration values.
4159

4160
  """
4161
  _OP_PARAMS = [_POutputFields]
4162
  REQ_BGL = False
4163
  _FIELDS_DYNAMIC = utils.FieldSet()
4164
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4165
                                  "watcher_pause")
4166

    
4167
  def CheckArguments(self):
4168
    _CheckOutputFields(static=self._FIELDS_STATIC,
4169
                       dynamic=self._FIELDS_DYNAMIC,
4170
                       selected=self.op.output_fields)
4171

    
4172
  def ExpandNames(self):
4173
    self.needed_locks = {}
4174

    
4175
  def Exec(self, feedback_fn):
4176
    """Dump a representation of the cluster config to the standard output.
4177

4178
    """
4179
    values = []
4180
    for field in self.op.output_fields:
4181
      if field == "cluster_name":
4182
        entry = self.cfg.GetClusterName()
4183
      elif field == "master_node":
4184
        entry = self.cfg.GetMasterNode()
4185
      elif field == "drain_flag":
4186
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4187
      elif field == "watcher_pause":
4188
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4189
      else:
4190
        raise errors.ParameterError(field)
4191
      values.append(entry)
4192
    return values
4193

    
4194

    
4195
class LUActivateInstanceDisks(NoHooksLU):
4196
  """Bring up an instance's disks.
4197

4198
  """
4199
  _OP_PARAMS = [
4200
    _PInstanceName,
4201
    ("ignore_size", False, _TBool),
4202
    ]
4203
  REQ_BGL = False
4204

    
4205
  def ExpandNames(self):
4206
    self._ExpandAndLockInstance()
4207
    self.needed_locks[locking.LEVEL_NODE] = []
4208
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4209

    
4210
  def DeclareLocks(self, level):
4211
    if level == locking.LEVEL_NODE:
4212
      self._LockInstancesNodes()
4213

    
4214
  def CheckPrereq(self):
4215
    """Check prerequisites.
4216

4217
    This checks that the instance is in the cluster.
4218

4219
    """
4220
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4221
    assert self.instance is not None, \
4222
      "Cannot retrieve locked instance %s" % self.op.instance_name
4223
    _CheckNodeOnline(self, self.instance.primary_node)
4224

    
4225
  def Exec(self, feedback_fn):
4226
    """Activate the disks.
4227

4228
    """
4229
    disks_ok, disks_info = \
4230
              _AssembleInstanceDisks(self, self.instance,
4231
                                     ignore_size=self.op.ignore_size)
4232
    if not disks_ok:
4233
      raise errors.OpExecError("Cannot activate block devices")
4234

    
4235
    return disks_info
4236

    
4237

    
4238
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4239
                           ignore_size=False):
4240
  """Prepare the block devices for an instance.
4241

4242
  This sets up the block devices on all nodes.
4243

4244
  @type lu: L{LogicalUnit}
4245
  @param lu: the logical unit on whose behalf we execute
4246
  @type instance: L{objects.Instance}
4247
  @param instance: the instance for whose disks we assemble
4248
  @type disks: list of L{objects.Disk} or None
4249
  @param disks: which disks to assemble (or all, if None)
4250
  @type ignore_secondaries: boolean
4251
  @param ignore_secondaries: if true, errors on secondary nodes
4252
      won't result in an error return from the function
4253
  @type ignore_size: boolean
4254
  @param ignore_size: if true, the current known size of the disk
4255
      will not be used during the disk activation, useful for cases
4256
      when the size is wrong
4257
  @return: False if the operation failed, otherwise a list of
4258
      (host, instance_visible_name, node_visible_name)
4259
      with the mapping from node devices to instance devices
4260

4261
  """
4262
  device_info = []
4263
  disks_ok = True
4264
  iname = instance.name
4265
  disks = _ExpandCheckDisks(instance, disks)
4266

    
4267
  # With the two passes mechanism we try to reduce the window of
4268
  # opportunity for the race condition of switching DRBD to primary
4269
  # before handshaking occured, but we do not eliminate it
4270

    
4271
  # The proper fix would be to wait (with some limits) until the
4272
  # connection has been made and drbd transitions from WFConnection
4273
  # into any other network-connected state (Connected, SyncTarget,
4274
  # SyncSource, etc.)
4275

    
4276
  # 1st pass, assemble on all nodes in secondary mode
4277
  for inst_disk in disks:
4278
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4279
      if ignore_size:
4280
        node_disk = node_disk.Copy()
4281
        node_disk.UnsetSize()
4282
      lu.cfg.SetDiskID(node_disk, node)
4283
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4284
      msg = result.fail_msg
4285
      if msg:
4286
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4287
                           " (is_primary=False, pass=1): %s",
4288
                           inst_disk.iv_name, node, msg)
4289
        if not ignore_secondaries:
4290
          disks_ok = False
4291

    
4292
  # FIXME: race condition on drbd migration to primary
4293

    
4294
  # 2nd pass, do only the primary node
4295
  for inst_disk in disks:
4296
    dev_path = None
4297

    
4298
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4299
      if node != instance.primary_node:
4300
        continue
4301
      if ignore_size:
4302
        node_disk = node_disk.Copy()
4303
        node_disk.UnsetSize()
4304
      lu.cfg.SetDiskID(node_disk, node)
4305
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4306
      msg = result.fail_msg
4307
      if msg:
4308
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4309
                           " (is_primary=True, pass=2): %s",
4310
                           inst_disk.iv_name, node, msg)
4311
        disks_ok = False
4312
      else:
4313
        dev_path = result.payload
4314

    
4315
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4316

    
4317
  # leave the disks configured for the primary node
4318
  # this is a workaround that would be fixed better by
4319
  # improving the logical/physical id handling
4320
  for disk in disks:
4321
    lu.cfg.SetDiskID(disk, instance.primary_node)
4322

    
4323
  return disks_ok, device_info
4324

    
4325

    
4326
def _StartInstanceDisks(lu, instance, force):
4327
  """Start the disks of an instance.
4328

4329
  """
4330
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4331
                                           ignore_secondaries=force)
4332
  if not disks_ok:
4333
    _ShutdownInstanceDisks(lu, instance)
4334
    if force is not None and not force:
4335
      lu.proc.LogWarning("", hint="If the message above refers to a"
4336
                         " secondary node,"
4337
                         " you can retry the operation using '--force'.")
4338
    raise errors.OpExecError("Disk consistency error")
4339

    
4340

    
4341
class LUDeactivateInstanceDisks(NoHooksLU):
4342
  """Shutdown an instance's disks.
4343

4344
  """
4345
  _OP_PARAMS = [
4346
    _PInstanceName,
4347
    ]
4348
  REQ_BGL = False
4349

    
4350
  def ExpandNames(self):
4351
    self._ExpandAndLockInstance()
4352
    self.needed_locks[locking.LEVEL_NODE] = []
4353
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4354

    
4355
  def DeclareLocks(self, level):
4356
    if level == locking.LEVEL_NODE:
4357
      self._LockInstancesNodes()
4358

    
4359
  def CheckPrereq(self):
4360
    """Check prerequisites.
4361

4362
    This checks that the instance is in the cluster.
4363

4364
    """
4365
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4366
    assert self.instance is not None, \
4367
      "Cannot retrieve locked instance %s" % self.op.instance_name
4368

    
4369
  def Exec(self, feedback_fn):
4370
    """Deactivate the disks
4371

4372
    """
4373
    instance = self.instance
4374
    _SafeShutdownInstanceDisks(self, instance)
4375

    
4376

    
4377
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4378
  """Shutdown block devices of an instance.
4379

4380
  This function checks if an instance is running, before calling
4381
  _ShutdownInstanceDisks.
4382

4383
  """
4384
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4385
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4386

    
4387

    
4388
def _ExpandCheckDisks(instance, disks):
4389
  """Return the instance disks selected by the disks list
4390

4391
  @type disks: list of L{objects.Disk} or None
4392
  @param disks: selected disks
4393
  @rtype: list of L{objects.Disk}
4394
  @return: selected instance disks to act on
4395

4396
  """
4397
  if disks is None:
4398
    return instance.disks
4399
  else:
4400
    if not set(disks).issubset(instance.disks):
4401
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4402
                                   " target instance")
4403
    return disks
4404

    
4405

    
4406
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4407
  """Shutdown block devices of an instance.
4408

4409
  This does the shutdown on all nodes of the instance.
4410

4411
  If the ignore_primary is false, errors on the primary node are
4412
  ignored.
4413

4414
  """
4415
  all_result = True
4416
  disks = _ExpandCheckDisks(instance, disks)
4417

    
4418
  for disk in disks:
4419
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4420
      lu.cfg.SetDiskID(top_disk, node)
4421
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4422
      msg = result.fail_msg
4423
      if msg:
4424
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4425
                      disk.iv_name, node, msg)
4426
        if not ignore_primary or node != instance.primary_node:
4427
          all_result = False
4428
  return all_result
4429

    
4430

    
4431
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4432
  """Checks if a node has enough free memory.
4433

4434
  This function check if a given node has the needed amount of free
4435
  memory. In case the node has less memory or we cannot get the
4436
  information from the node, this function raise an OpPrereqError
4437
  exception.
4438

4439
  @type lu: C{LogicalUnit}
4440
  @param lu: a logical unit from which we get configuration data
4441
  @type node: C{str}
4442
  @param node: the node to check
4443
  @type reason: C{str}
4444
  @param reason: string to use in the error message
4445
  @type requested: C{int}
4446
  @param requested: the amount of memory in MiB to check for
4447
  @type hypervisor_name: C{str}
4448
  @param hypervisor_name: the hypervisor to ask for memory stats
4449
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4450
      we cannot check the node
4451

4452
  """
4453
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4454
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4455
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4456
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4457
  if not isinstance(free_mem, int):
4458
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4459
                               " was '%s'" % (node, free_mem),
4460
                               errors.ECODE_ENVIRON)
4461
  if requested > free_mem:
4462
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4463
                               " needed %s MiB, available %s MiB" %
4464
                               (node, reason, requested, free_mem),
4465
                               errors.ECODE_NORES)
4466

    
4467

    
4468
def _CheckNodesFreeDisk(lu, nodenames, requested):
4469
  """Checks if nodes have enough free disk space in the default VG.
4470

4471
  This function check if all given nodes have the needed amount of
4472
  free disk. In case any node has less disk or we cannot get the
4473
  information from the node, this function raise an OpPrereqError
4474
  exception.
4475

4476
  @type lu: C{LogicalUnit}
4477
  @param lu: a logical unit from which we get configuration data
4478
  @type nodenames: C{list}
4479
  @param nodenames: the list of node names to check
4480
  @type requested: C{int}
4481
  @param requested: the amount of disk in MiB to check for
4482
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4483
      we cannot check the node
4484

4485
  """
4486
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4487
                                   lu.cfg.GetHypervisorType())
4488
  for node in nodenames:
4489
    info = nodeinfo[node]
4490
    info.Raise("Cannot get current information from node %s" % node,
4491
               prereq=True, ecode=errors.ECODE_ENVIRON)
4492
    vg_free = info.payload.get("vg_free", None)
4493
    if not isinstance(vg_free, int):
4494
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4495
                                 " result was '%s'" % (node, vg_free),
4496
                                 errors.ECODE_ENVIRON)
4497
    if requested > vg_free:
4498
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4499
                                 " required %d MiB, available %d MiB" %
4500
                                 (node, requested, vg_free),
4501
                                 errors.ECODE_NORES)
4502

    
4503

    
4504
class LUStartupInstance(LogicalUnit):
4505
  """Starts an instance.
4506

4507
  """
4508
  HPATH = "instance-start"
4509
  HTYPE = constants.HTYPE_INSTANCE
4510
  _OP_PARAMS = [
4511
    _PInstanceName,
4512
    _PForce,
4513
    ("hvparams", _EmptyDict, _TDict),
4514
    ("beparams", _EmptyDict, _TDict),
4515
    ]
4516
  REQ_BGL = False
4517

    
4518
  def CheckArguments(self):
4519
    # extra beparams
4520
    if self.op.beparams:
4521
      # fill the beparams dict
4522
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4523

    
4524
  def ExpandNames(self):
4525
    self._ExpandAndLockInstance()
4526

    
4527
  def BuildHooksEnv(self):
4528
    """Build hooks env.
4529

4530
    This runs on master, primary and secondary nodes of the instance.
4531

4532
    """
4533
    env = {
4534
      "FORCE": self.op.force,
4535
      }
4536
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4537
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4538
    return env, nl, nl
4539

    
4540
  def CheckPrereq(self):
4541
    """Check prerequisites.
4542

4543
    This checks that the instance is in the cluster.
4544

4545
    """
4546
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4547
    assert self.instance is not None, \
4548
      "Cannot retrieve locked instance %s" % self.op.instance_name
4549

    
4550
    # extra hvparams
4551
    if self.op.hvparams:
4552
      # check hypervisor parameter syntax (locally)
4553
      cluster = self.cfg.GetClusterInfo()
4554
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4555
      filled_hvp = cluster.FillHV(instance)
4556
      filled_hvp.update(self.op.hvparams)
4557
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4558
      hv_type.CheckParameterSyntax(filled_hvp)
4559
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4560

    
4561
    _CheckNodeOnline(self, instance.primary_node)
4562

    
4563
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4564
    # check bridges existence
4565
    _CheckInstanceBridgesExist(self, instance)
4566

    
4567
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4568
                                              instance.name,
4569
                                              instance.hypervisor)
4570
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4571
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4572
    if not remote_info.payload: # not running already
4573
      _CheckNodeFreeMemory(self, instance.primary_node,
4574
                           "starting instance %s" % instance.name,
4575
                           bep[constants.BE_MEMORY], instance.hypervisor)
4576

    
4577
  def Exec(self, feedback_fn):
4578
    """Start the instance.
4579

4580
    """
4581
    instance = self.instance
4582
    force = self.op.force
4583

    
4584
    self.cfg.MarkInstanceUp(instance.name)
4585

    
4586
    node_current = instance.primary_node
4587

    
4588
    _StartInstanceDisks(self, instance, force)
4589

    
4590
    result = self.rpc.call_instance_start(node_current, instance,
4591
                                          self.op.hvparams, self.op.beparams)
4592
    msg = result.fail_msg
4593
    if msg:
4594
      _ShutdownInstanceDisks(self, instance)
4595
      raise errors.OpExecError("Could not start instance: %s" % msg)
4596

    
4597

    
4598
class LURebootInstance(LogicalUnit):
4599
  """Reboot an instance.
4600

4601
  """
4602
  HPATH = "instance-reboot"
4603
  HTYPE = constants.HTYPE_INSTANCE
4604
  _OP_PARAMS = [
4605
    _PInstanceName,
4606
    ("ignore_secondaries", False, _TBool),
4607
    ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4608
    _PShutdownTimeout,
4609
    ]
4610
  REQ_BGL = False
4611

    
4612
  def ExpandNames(self):
4613
    self._ExpandAndLockInstance()
4614

    
4615
  def BuildHooksEnv(self):
4616
    """Build hooks env.
4617

4618
    This runs on master, primary and secondary nodes of the instance.
4619

4620
    """
4621
    env = {
4622
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4623
      "REBOOT_TYPE": self.op.reboot_type,
4624
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4625
      }
4626
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4627
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4628
    return env, nl, nl
4629

    
4630
  def CheckPrereq(self):
4631
    """Check prerequisites.
4632

4633
    This checks that the instance is in the cluster.
4634

4635
    """
4636
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4637
    assert self.instance is not None, \
4638
      "Cannot retrieve locked instance %s" % self.op.instance_name
4639

    
4640
    _CheckNodeOnline(self, instance.primary_node)
4641

    
4642
    # check bridges existence
4643
    _CheckInstanceBridgesExist(self, instance)
4644

    
4645
  def Exec(self, feedback_fn):
4646
    """Reboot the instance.
4647

4648
    """
4649
    instance = self.instance
4650
    ignore_secondaries = self.op.ignore_secondaries
4651
    reboot_type = self.op.reboot_type
4652

    
4653
    node_current = instance.primary_node
4654

    
4655
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4656
                       constants.INSTANCE_REBOOT_HARD]:
4657
      for disk in instance.disks:
4658
        self.cfg.SetDiskID(disk, node_current)
4659
      result = self.rpc.call_instance_reboot(node_current, instance,
4660
                                             reboot_type,
4661
                                             self.op.shutdown_timeout)
4662
      result.Raise("Could not reboot instance")
4663
    else:
4664
      result = self.rpc.call_instance_shutdown(node_current, instance,
4665
                                               self.op.shutdown_timeout)
4666
      result.Raise("Could not shutdown instance for full reboot")
4667
      _ShutdownInstanceDisks(self, instance)
4668
      _StartInstanceDisks(self, instance, ignore_secondaries)
4669
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4670
      msg = result.fail_msg
4671
      if msg:
4672
        _ShutdownInstanceDisks(self, instance)
4673
        raise errors.OpExecError("Could not start instance for"
4674
                                 " full reboot: %s" % msg)
4675

    
4676
    self.cfg.MarkInstanceUp(instance.name)
4677

    
4678

    
4679
class LUShutdownInstance(LogicalUnit):
4680
  """Shutdown an instance.
4681

4682
  """
4683
  HPATH = "instance-stop"
4684
  HTYPE = constants.HTYPE_INSTANCE
4685
  _OP_PARAMS = [
4686
    _PInstanceName,
4687
    ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4688
    ]
4689
  REQ_BGL = False
4690

    
4691
  def ExpandNames(self):
4692
    self._ExpandAndLockInstance()
4693

    
4694
  def BuildHooksEnv(self):
4695
    """Build hooks env.
4696

4697
    This runs on master, primary and secondary nodes of the instance.
4698

4699
    """
4700
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4701
    env["TIMEOUT"] = self.op.timeout
4702
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4703
    return env, nl, nl
4704

    
4705
  def CheckPrereq(self):
4706
    """Check prerequisites.
4707

4708
    This checks that the instance is in the cluster.
4709

4710
    """
4711
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4712
    assert self.instance is not None, \
4713
      "Cannot retrieve locked instance %s" % self.op.instance_name
4714
    _CheckNodeOnline(self, self.instance.primary_node)
4715

    
4716
  def Exec(self, feedback_fn):
4717
    """Shutdown the instance.
4718

4719
    """
4720
    instance = self.instance
4721
    node_current = instance.primary_node
4722
    timeout = self.op.timeout
4723
    self.cfg.MarkInstanceDown(instance.name)
4724
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4725
    msg = result.fail_msg
4726
    if msg:
4727
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4728

    
4729
    _ShutdownInstanceDisks(self, instance)
4730

    
4731

    
4732
class LUReinstallInstance(LogicalUnit):
4733
  """Reinstall an instance.
4734

4735
  """
4736
  HPATH = "instance-reinstall"
4737
  HTYPE = constants.HTYPE_INSTANCE
4738
  _OP_PARAMS = [
4739
    _PInstanceName,
4740
    ("os_type", None, _TMaybeString),
4741
    ("force_variant", False, _TBool),
4742
    ]
4743
  REQ_BGL = False
4744

    
4745
  def ExpandNames(self):
4746
    self._ExpandAndLockInstance()
4747

    
4748
  def BuildHooksEnv(self):
4749
    """Build hooks env.
4750

4751
    This runs on master, primary and secondary nodes of the instance.
4752

4753
    """
4754
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4755
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4756
    return env, nl, nl
4757

    
4758
  def CheckPrereq(self):
4759
    """Check prerequisites.
4760

4761
    This checks that the instance is in the cluster and is not running.
4762

4763
    """
4764
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4765
    assert instance is not None, \
4766
      "Cannot retrieve locked instance %s" % self.op.instance_name
4767
    _CheckNodeOnline(self, instance.primary_node)
4768

    
4769
    if instance.disk_template == constants.DT_DISKLESS:
4770
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4771
                                 self.op.instance_name,
4772
                                 errors.ECODE_INVAL)
4773
    _CheckInstanceDown(self, instance, "cannot reinstall")
4774

    
4775
    if self.op.os_type is not None:
4776
      # OS verification
4777
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4778
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4779

    
4780
    self.instance = instance
4781

    
4782
  def Exec(self, feedback_fn):
4783
    """Reinstall the instance.
4784

4785
    """
4786
    inst = self.instance
4787

    
4788
    if self.op.os_type is not None:
4789
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4790
      inst.os = self.op.os_type
4791
      self.cfg.Update(inst, feedback_fn)
4792

    
4793
    _StartInstanceDisks(self, inst, None)
4794
    try:
4795
      feedback_fn("Running the instance OS create scripts...")
4796
      # FIXME: pass debug option from opcode to backend
4797
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4798
                                             self.op.debug_level)
4799
      result.Raise("Could not install OS for instance %s on node %s" %
4800
                   (inst.name, inst.primary_node))
4801
    finally:
4802
      _ShutdownInstanceDisks(self, inst)
4803

    
4804

    
4805
class LURecreateInstanceDisks(LogicalUnit):
4806
  """Recreate an instance's missing disks.
4807

4808
  """
4809
  HPATH = "instance-recreate-disks"
4810
  HTYPE = constants.HTYPE_INSTANCE
4811
  _OP_PARAMS = [
4812
    _PInstanceName,
4813
    ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4814
    ]
4815
  REQ_BGL = False
4816

    
4817
  def ExpandNames(self):
4818
    self._ExpandAndLockInstance()
4819

    
4820
  def BuildHooksEnv(self):
4821
    """Build hooks env.
4822

4823
    This runs on master, primary and secondary nodes of the instance.
4824

4825
    """
4826
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4827
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4828
    return env, nl, nl
4829

    
4830
  def CheckPrereq(self):
4831
    """Check prerequisites.
4832

4833
    This checks that the instance is in the cluster and is not running.
4834

4835
    """
4836
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4837
    assert instance is not None, \
4838
      "Cannot retrieve locked instance %s" % self.op.instance_name
4839
    _CheckNodeOnline(self, instance.primary_node)
4840

    
4841
    if instance.disk_template == constants.DT_DISKLESS:
4842
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4843
                                 self.op.instance_name, errors.ECODE_INVAL)
4844
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4845

    
4846
    if not self.op.disks:
4847
      self.op.disks = range(len(instance.disks))
4848
    else:
4849
      for idx in self.op.disks:
4850
        if idx >= len(instance.disks):
4851
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4852
                                     errors.ECODE_INVAL)
4853

    
4854
    self.instance = instance
4855

    
4856
  def Exec(self, feedback_fn):
4857
    """Recreate the disks.
4858

4859
    """
4860
    to_skip = []
4861
    for idx, _ in enumerate(self.instance.disks):
4862
      if idx not in self.op.disks: # disk idx has not been passed in
4863
        to_skip.append(idx)
4864
        continue
4865

    
4866
    _CreateDisks(self, self.instance, to_skip=to_skip)
4867

    
4868

    
4869
class LURenameInstance(LogicalUnit):
4870
  """Rename an instance.
4871

4872
  """
4873
  HPATH = "instance-rename"
4874
  HTYPE = constants.HTYPE_INSTANCE
4875
  _OP_PARAMS = [
4876
    _PInstanceName,
4877
    ("new_name", _NoDefault, _TNonEmptyString),
4878
    ("ip_check", False, _TBool),
4879
    ("name_check", True, _TBool),
4880
    ]
4881

    
4882
  def CheckArguments(self):
4883
    """Check arguments.
4884

4885
    """
4886
    if self.op.ip_check and not self.op.name_check:
4887
      # TODO: make the ip check more flexible and not depend on the name check
4888
      raise errors.OpPrereqError("Cannot do ip check without a name check",
4889
                                 errors.ECODE_INVAL)
4890

    
4891
  def BuildHooksEnv(self):
4892
    """Build hooks env.
4893

4894
    This runs on master, primary and secondary nodes of the instance.
4895

4896
    """
4897
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4898
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4899
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4900
    return env, nl, nl
4901

    
4902
  def CheckPrereq(self):
4903
    """Check prerequisites.
4904

4905
    This checks that the instance is in the cluster and is not running.
4906

4907
    """
4908
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4909
                                                self.op.instance_name)
4910
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4911
    assert instance is not None
4912
    _CheckNodeOnline(self, instance.primary_node)
4913
    _CheckInstanceDown(self, instance, "cannot rename")
4914
    self.instance = instance
4915

    
4916
    new_name = self.op.new_name
4917
    if self.op.name_check:
4918
      hostinfo = netutils.HostInfo(netutils.HostInfo.NormalizeName(new_name))
4919
      new_name = hostinfo.name
4920
      if (self.op.ip_check and
4921
          netutils.TcpPing(hostinfo.ip, constants.DEFAULT_NODED_PORT)):
4922
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4923
                                   (hostinfo.ip, new_name),
4924
                                   errors.ECODE_NOTUNIQUE)
4925

    
4926
    instance_list = self.cfg.GetInstanceList()
4927
    if new_name in instance_list:
4928
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4929
                                 new_name, errors.ECODE_EXISTS)
4930

    
4931

    
4932
  def Exec(self, feedback_fn):
4933
    """Reinstall the instance.
4934

4935
    """
4936
    inst = self.instance
4937
    old_name = inst.name
4938

    
4939
    if inst.disk_template == constants.DT_FILE:
4940
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4941

    
4942
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4943
    # Change the instance lock. This is definitely safe while we hold the BGL
4944
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4945
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4946

    
4947
    # re-read the instance from the configuration after rename
4948
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4949

    
4950
    if inst.disk_template == constants.DT_FILE:
4951
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4952
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4953
                                                     old_file_storage_dir,
4954
                                                     new_file_storage_dir)
4955
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4956
                   " (but the instance has been renamed in Ganeti)" %
4957
                   (inst.primary_node, old_file_storage_dir,
4958
                    new_file_storage_dir))
4959

    
4960
    _StartInstanceDisks(self, inst, None)
4961
    try:
4962
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4963
                                                 old_name, self.op.debug_level)
4964
      msg = result.fail_msg
4965
      if msg:
4966
        msg = ("Could not run OS rename script for instance %s on node %s"
4967
               " (but the instance has been renamed in Ganeti): %s" %
4968
               (inst.name, inst.primary_node, msg))
4969
        self.proc.LogWarning(msg)
4970
    finally:
4971
      _ShutdownInstanceDisks(self, inst)
4972

    
4973
    return inst.name
4974

    
4975

    
4976
class LURemoveInstance(LogicalUnit):
4977
  """Remove an instance.
4978

4979
  """
4980
  HPATH = "instance-remove"
4981
  HTYPE = constants.HTYPE_INSTANCE
4982
  _OP_PARAMS = [
4983
    _PInstanceName,
4984
    ("ignore_failures", False, _TBool),
4985
    _PShutdownTimeout,
4986
    ]
4987
  REQ_BGL = False
4988

    
4989
  def ExpandNames(self):
4990
    self._ExpandAndLockInstance()
4991
    self.needed_locks[locking.LEVEL_NODE] = []
4992
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4993

    
4994
  def DeclareLocks(self, level):
4995
    if level == locking.LEVEL_NODE:
4996
      self._LockInstancesNodes()
4997

    
4998
  def BuildHooksEnv(self):
4999
    """Build hooks env.
5000

5001
    This runs on master, primary and secondary nodes of the instance.
5002

5003
    """
5004
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5005
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5006
    nl = [self.cfg.GetMasterNode()]
5007
    nl_post = list(self.instance.all_nodes) + nl
5008
    return env, nl, nl_post
5009

    
5010
  def CheckPrereq(self):
5011
    """Check prerequisites.
5012

5013
    This checks that the instance is in the cluster.
5014

5015
    """
5016
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5017
    assert self.instance is not None, \
5018
      "Cannot retrieve locked instance %s" % self.op.instance_name
5019

    
5020
  def Exec(self, feedback_fn):
5021
    """Remove the instance.
5022

5023
    """
5024
    instance = self.instance
5025
    logging.info("Shutting down instance %s on node %s",
5026
                 instance.name, instance.primary_node)
5027

    
5028
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5029
                                             self.op.shutdown_timeout)
5030
    msg = result.fail_msg
5031
    if msg:
5032
      if self.op.ignore_failures:
5033
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5034
      else:
5035
        raise errors.OpExecError("Could not shutdown instance %s on"
5036
                                 " node %s: %s" %
5037
                                 (instance.name, instance.primary_node, msg))
5038

    
5039
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5040

    
5041

    
5042
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5043
  """Utility function to remove an instance.
5044

5045
  """
5046
  logging.info("Removing block devices for instance %s", instance.name)
5047

    
5048
  if not _RemoveDisks(lu, instance):
5049
    if not ignore_failures:
5050
      raise errors.OpExecError("Can't remove instance's disks")
5051
    feedback_fn("Warning: can't remove instance's disks")
5052

    
5053
  logging.info("Removing instance %s out of cluster config", instance.name)
5054

    
5055
  lu.cfg.RemoveInstance(instance.name)
5056

    
5057
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5058
    "Instance lock removal conflict"
5059

    
5060
  # Remove lock for the instance
5061
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5062

    
5063

    
5064
class LUQueryInstances(NoHooksLU):
5065
  """Logical unit for querying instances.
5066

5067
  """
5068
  # pylint: disable-msg=W0142
5069
  _OP_PARAMS = [
5070
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5071
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5072
    ("use_locking", False, _TBool),
5073
    ]
5074
  REQ_BGL = False
5075
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5076
                    "serial_no", "ctime", "mtime", "uuid"]
5077
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5078
                                    "admin_state",
5079
                                    "disk_template", "ip", "mac", "bridge",
5080
                                    "nic_mode", "nic_link",
5081
                                    "sda_size", "sdb_size", "vcpus", "tags",
5082
                                    "network_port", "beparams",
5083
                                    r"(disk)\.(size)/([0-9]+)",
5084
                                    r"(disk)\.(sizes)", "disk_usage",
5085
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5086
                                    r"(nic)\.(bridge)/([0-9]+)",
5087
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
5088
                                    r"(disk|nic)\.(count)",
5089
                                    "hvparams",
5090
                                    ] + _SIMPLE_FIELDS +
5091
                                  ["hv/%s" % name
5092
                                   for name in constants.HVS_PARAMETERS
5093
                                   if name not in constants.HVC_GLOBALS] +
5094
                                  ["be/%s" % name
5095
                                   for name in constants.BES_PARAMETERS])
5096
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5097
                                   "oper_ram",
5098
                                   "oper_vcpus",
5099
                                   "status")
5100

    
5101

    
5102
  def CheckArguments(self):
5103
    _CheckOutputFields(static=self._FIELDS_STATIC,
5104
                       dynamic=self._FIELDS_DYNAMIC,
5105
                       selected=self.op.output_fields)
5106

    
5107
  def ExpandNames(self):
5108
    self.needed_locks = {}
5109
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5110
    self.share_locks[locking.LEVEL_NODE] = 1
5111

    
5112
    if self.op.names:
5113
      self.wanted = _GetWantedInstances(self, self.op.names)
5114
    else:
5115
      self.wanted = locking.ALL_SET
5116

    
5117
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5118
    self.do_locking = self.do_node_query and self.op.use_locking
5119
    if self.do_locking:
5120
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5121
      self.needed_locks[locking.LEVEL_NODE] = []
5122
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5123

    
5124
  def DeclareLocks(self, level):
5125
    if level == locking.LEVEL_NODE and self.do_locking:
5126
      self._LockInstancesNodes()
5127

    
5128
  def Exec(self, feedback_fn):
5129
    """Computes the list of nodes and their attributes.
5130

5131
    """
5132
    # pylint: disable-msg=R0912
5133
    # way too many branches here
5134
    all_info = self.cfg.GetAllInstancesInfo()
5135
    if self.wanted == locking.ALL_SET:
5136
      # caller didn't specify instance names, so ordering is not important
5137
      if self.do_locking:
5138
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5139
      else:
5140
        instance_names = all_info.keys()
5141
      instance_names = utils.NiceSort(instance_names)
5142
    else:
5143
      # caller did specify names, so we must keep the ordering
5144
      if self.do_locking:
5145
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5146
      else:
5147
        tgt_set = all_info.keys()
5148
      missing = set(self.wanted).difference(tgt_set)
5149
      if missing:
5150
        raise errors.OpExecError("Some instances were removed before"
5151
                                 " retrieving their data: %s" % missing)
5152
      instance_names = self.wanted
5153

    
5154
    instance_list = [all_info[iname] for iname in instance_names]
5155

    
5156
    # begin data gathering
5157

    
5158
    nodes = frozenset([inst.primary_node for inst in instance_list])
5159
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
5160

    
5161
    bad_nodes = []
5162
    off_nodes = []
5163
    if self.do_node_query:
5164
      live_data = {}
5165
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5166
      for name in nodes:
5167
        result = node_data[name]
5168
        if result.offline:
5169
          # offline nodes will be in both lists
5170
          off_nodes.append(name)
5171
        if result.fail_msg:
5172
          bad_nodes.append(name)
5173
        else:
5174
          if result.payload:
5175
            live_data.update(result.payload)
5176
          # else no instance is alive
5177
    else:
5178
      live_data = dict([(name, {}) for name in instance_names])
5179

    
5180
    # end data gathering
5181

    
5182
    HVPREFIX = "hv/"
5183
    BEPREFIX = "be/"
5184
    output = []
5185
    cluster = self.cfg.GetClusterInfo()
5186
    for instance in instance_list:
5187
      iout = []
5188
      i_hv = cluster.FillHV(instance, skip_globals=True)
5189
      i_be = cluster.FillBE(instance)
5190
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5191
      for field in self.op.output_fields:
5192
        st_match = self._FIELDS_STATIC.Matches(field)
5193
        if field in self._SIMPLE_FIELDS:
5194
          val = getattr(instance, field)
5195
        elif field == "pnode":
5196
          val = instance.primary_node
5197
        elif field == "snodes":
5198
          val = list(instance.secondary_nodes)
5199
        elif field == "admin_state":
5200
          val = instance.admin_up
5201
        elif field == "oper_state":
5202
          if instance.primary_node in bad_nodes:
5203
            val = None
5204
          else:
5205
            val = bool(live_data.get(instance.name))
5206
        elif field == "status":
5207
          if instance.primary_node in off_nodes:
5208
            val = "ERROR_nodeoffline"
5209
          elif instance.primary_node in bad_nodes:
5210
            val = "ERROR_nodedown"
5211
          else:
5212
            running = bool(live_data.get(instance.name))
5213
            if running:
5214
              if instance.admin_up:
5215
                val = "running"
5216
              else:
5217
                val = "ERROR_up"
5218
            else:
5219
              if instance.admin_up:
5220
                val = "ERROR_down"
5221
              else:
5222
                val = "ADMIN_down"
5223
        elif field == "oper_ram":
5224
          if instance.primary_node in bad_nodes:
5225
            val = None
5226
          elif instance.name in live_data:
5227
            val = live_data[instance.name].get("memory", "?")
5228
          else:
5229
            val = "-"
5230
        elif field == "oper_vcpus":
5231
          if instance.primary_node in bad_nodes:
5232
            val = None
5233
          elif instance.name in live_data:
5234
            val = live_data[instance.name].get("vcpus", "?")
5235
          else:
5236
            val = "-"
5237
        elif field == "vcpus":
5238
          val = i_be[constants.BE_VCPUS]
5239
        elif field == "disk_template":
5240
          val = instance.disk_template
5241
        elif field == "ip":
5242
          if instance.nics:
5243
            val = instance.nics[0].ip
5244
          else:
5245
            val = None
5246
        elif field == "nic_mode":
5247
          if instance.nics:
5248
            val = i_nicp[0][constants.NIC_MODE]
5249
          else:
5250
            val = None
5251
        elif field == "nic_link":
5252
          if instance.nics:
5253
            val = i_nicp[0][constants.NIC_LINK]
5254
          else:
5255
            val = None
5256
        elif field == "bridge":
5257
          if (instance.nics and
5258
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5259
            val = i_nicp[0][constants.NIC_LINK]
5260
          else:
5261
            val = None
5262
        elif field == "mac":
5263
          if instance.nics:
5264
            val = instance.nics[0].mac
5265
          else:
5266
            val = None
5267
        elif field == "sda_size" or field == "sdb_size":
5268
          idx = ord(field[2]) - ord('a')
5269
          try:
5270
            val = instance.FindDisk(idx).size
5271
          except errors.OpPrereqError:
5272
            val = None
5273
        elif field == "disk_usage": # total disk usage per node
5274
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5275
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5276
        elif field == "tags":
5277
          val = list(instance.GetTags())
5278
        elif field == "hvparams":
5279
          val = i_hv
5280
        elif (field.startswith(HVPREFIX) and
5281
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5282
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5283
          val = i_hv.get(field[len(HVPREFIX):], None)
5284
        elif field == "beparams":
5285
          val = i_be
5286
        elif (field.startswith(BEPREFIX) and
5287
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5288
          val = i_be.get(field[len(BEPREFIX):], None)
5289
        elif st_match and st_match.groups():
5290
          # matches a variable list
5291
          st_groups = st_match.groups()
5292
          if st_groups and st_groups[0] == "disk":
5293
            if st_groups[1] == "count":
5294
              val = len(instance.disks)
5295
            elif st_groups[1] == "sizes":
5296
              val = [disk.size for disk in instance.disks]
5297
            elif st_groups[1] == "size":
5298
              try:
5299
                val = instance.FindDisk(st_groups[2]).size
5300
              except errors.OpPrereqError:
5301
                val = None
5302
            else:
5303
              assert False, "Unhandled disk parameter"
5304
          elif st_groups[0] == "nic":
5305
            if st_groups[1] == "count":
5306
              val = len(instance.nics)
5307
            elif st_groups[1] == "macs":
5308
              val = [nic.mac for nic in instance.nics]
5309
            elif st_groups[1] == "ips":
5310
              val = [nic.ip for nic in instance.nics]
5311
            elif st_groups[1] == "modes":
5312
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5313
            elif st_groups[1] == "links":
5314
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5315
            elif st_groups[1] == "bridges":
5316
              val = []
5317
              for nicp in i_nicp:
5318
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5319
                  val.append(nicp[constants.NIC_LINK])
5320
                else:
5321
                  val.append(None)
5322
            else:
5323
              # index-based item
5324
              nic_idx = int(st_groups[2])
5325
              if nic_idx >= len(instance.nics):
5326
                val = None
5327
              else:
5328
                if st_groups[1] == "mac":
5329
                  val = instance.nics[nic_idx].mac
5330
                elif st_groups[1] == "ip":
5331
                  val = instance.nics[nic_idx].ip
5332
                elif st_groups[1] == "mode":
5333
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5334
                elif st_groups[1] == "link":
5335
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5336
                elif st_groups[1] == "bridge":
5337
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5338
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5339
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5340
                  else:
5341
                    val = None
5342
                else:
5343
                  assert False, "Unhandled NIC parameter"
5344
          else:
5345
            assert False, ("Declared but unhandled variable parameter '%s'" %
5346
                           field)
5347
        else:
5348
          assert False, "Declared but unhandled parameter '%s'" % field
5349
        iout.append(val)
5350
      output.append(iout)
5351

    
5352
    return output
5353

    
5354

    
5355
class LUFailoverInstance(LogicalUnit):
5356
  """Failover an instance.
5357

5358
  """
5359
  HPATH = "instance-failover"
5360
  HTYPE = constants.HTYPE_INSTANCE
5361
  _OP_PARAMS = [
5362
    _PInstanceName,
5363
    ("ignore_consistency", False, _TBool),
5364
    _PShutdownTimeout,
5365
    ]
5366
  REQ_BGL = False
5367

    
5368
  def ExpandNames(self):
5369
    self._ExpandAndLockInstance()
5370
    self.needed_locks[locking.LEVEL_NODE] = []
5371
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5372

    
5373
  def DeclareLocks(self, level):
5374
    if level == locking.LEVEL_NODE:
5375
      self._LockInstancesNodes()
5376

    
5377
  def BuildHooksEnv(self):
5378
    """Build hooks env.
5379

5380
    This runs on master, primary and secondary nodes of the instance.
5381

5382
    """
5383
    instance = self.instance
5384
    source_node = instance.primary_node
5385
    target_node = instance.secondary_nodes[0]
5386
    env = {
5387
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5388
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5389
      "OLD_PRIMARY": source_node,
5390
      "OLD_SECONDARY": target_node,
5391
      "NEW_PRIMARY": target_node,
5392
      "NEW_SECONDARY": source_node,
5393
      }
5394
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5395
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5396
    nl_post = list(nl)
5397
    nl_post.append(source_node)
5398
    return env, nl, nl_post
5399

    
5400
  def CheckPrereq(self):
5401
    """Check prerequisites.
5402

5403
    This checks that the instance is in the cluster.
5404

5405
    """
5406
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5407
    assert self.instance is not None, \
5408
      "Cannot retrieve locked instance %s" % self.op.instance_name
5409

    
5410
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5411
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5412
      raise errors.OpPrereqError("Instance's disk layout is not"
5413
                                 " network mirrored, cannot failover.",
5414
                                 errors.ECODE_STATE)
5415

    
5416
    secondary_nodes = instance.secondary_nodes
5417
    if not secondary_nodes:
5418
      raise errors.ProgrammerError("no secondary node but using "
5419
                                   "a mirrored disk template")
5420

    
5421
    target_node = secondary_nodes[0]
5422
    _CheckNodeOnline(self, target_node)
5423
    _CheckNodeNotDrained(self, target_node)
5424
    if instance.admin_up:
5425
      # check memory requirements on the secondary node
5426
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5427
                           instance.name, bep[constants.BE_MEMORY],
5428
                           instance.hypervisor)
5429
    else:
5430
      self.LogInfo("Not checking memory on the secondary node as"
5431
                   " instance will not be started")
5432

    
5433
    # check bridge existance
5434
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5435

    
5436
  def Exec(self, feedback_fn):
5437
    """Failover an instance.
5438

5439
    The failover is done by shutting it down on its present node and
5440
    starting it on the secondary.
5441

5442
    """
5443
    instance = self.instance
5444

    
5445
    source_node = instance.primary_node
5446
    target_node = instance.secondary_nodes[0]
5447

    
5448
    if instance.admin_up:
5449
      feedback_fn("* checking disk consistency between source and target")
5450
      for dev in instance.disks:
5451
        # for drbd, these are drbd over lvm
5452
        if not _CheckDiskConsistency(self, dev, target_node, False):
5453
          if not self.op.ignore_consistency:
5454
            raise errors.OpExecError("Disk %s is degraded on target node,"
5455
                                     " aborting failover." % dev.iv_name)
5456
    else:
5457
      feedback_fn("* not checking disk consistency as instance is not running")
5458

    
5459
    feedback_fn("* shutting down instance on source node")
5460
    logging.info("Shutting down instance %s on node %s",
5461
                 instance.name, source_node)
5462

    
5463
    result = self.rpc.call_instance_shutdown(source_node, instance,
5464
                                             self.op.shutdown_timeout)
5465
    msg = result.fail_msg
5466
    if msg:
5467
      if self.op.ignore_consistency:
5468
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5469
                             " Proceeding anyway. Please make sure node"
5470
                             " %s is down. Error details: %s",
5471
                             instance.name, source_node, source_node, msg)
5472
      else:
5473
        raise errors.OpExecError("Could not shutdown instance %s on"
5474
                                 " node %s: %s" %
5475
                                 (instance.name, source_node, msg))
5476

    
5477
    feedback_fn("* deactivating the instance's disks on source node")
5478
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5479
      raise errors.OpExecError("Can't shut down the instance's disks.")
5480

    
5481
    instance.primary_node = target_node
5482
    # distribute new instance config to the other nodes
5483
    self.cfg.Update(instance, feedback_fn)
5484

    
5485
    # Only start the instance if it's marked as up
5486
    if instance.admin_up:
5487
      feedback_fn("* activating the instance's disks on target node")
5488
      logging.info("Starting instance %s on node %s",
5489
                   instance.name, target_node)
5490

    
5491
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5492
                                           ignore_secondaries=True)
5493
      if not disks_ok:
5494
        _ShutdownInstanceDisks(self, instance)
5495
        raise errors.OpExecError("Can't activate the instance's disks")
5496

    
5497
      feedback_fn("* starting the instance on the target node")
5498
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5499
      msg = result.fail_msg
5500
      if msg:
5501
        _ShutdownInstanceDisks(self, instance)
5502
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5503
                                 (instance.name, target_node, msg))
5504

    
5505

    
5506
class LUMigrateInstance(LogicalUnit):
5507
  """Migrate an instance.
5508

5509
  This is migration without shutting down, compared to the failover,
5510
  which is done with shutdown.
5511

5512
  """
5513
  HPATH = "instance-migrate"
5514
  HTYPE = constants.HTYPE_INSTANCE
5515
  _OP_PARAMS = [
5516
    _PInstanceName,
5517
    _PMigrationMode,
5518
    _PMigrationLive,
5519
    ("cleanup", False, _TBool),
5520
    ]
5521

    
5522
  REQ_BGL = False
5523

    
5524
  def ExpandNames(self):
5525
    self._ExpandAndLockInstance()
5526

    
5527
    self.needed_locks[locking.LEVEL_NODE] = []
5528
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5529

    
5530
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5531
                                       self.op.cleanup)
5532
    self.tasklets = [self._migrater]
5533

    
5534
  def DeclareLocks(self, level):
5535
    if level == locking.LEVEL_NODE:
5536
      self._LockInstancesNodes()
5537

    
5538
  def BuildHooksEnv(self):
5539
    """Build hooks env.
5540

5541
    This runs on master, primary and secondary nodes of the instance.
5542

5543
    """
5544
    instance = self._migrater.instance
5545
    source_node = instance.primary_node
5546
    target_node = instance.secondary_nodes[0]
5547
    env = _BuildInstanceHookEnvByObject(self, instance)
5548
    env["MIGRATE_LIVE"] = self._migrater.live
5549
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5550
    env.update({
5551
        "OLD_PRIMARY": source_node,
5552
        "OLD_SECONDARY": target_node,
5553
        "NEW_PRIMARY": target_node,
5554
        "NEW_SECONDARY": source_node,
5555
        })
5556
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5557
    nl_post = list(nl)
5558
    nl_post.append(source_node)
5559
    return env, nl, nl_post
5560

    
5561

    
5562
class LUMoveInstance(LogicalUnit):
5563
  """Move an instance by data-copying.
5564

5565
  """
5566
  HPATH = "instance-move"
5567
  HTYPE = constants.HTYPE_INSTANCE
5568
  _OP_PARAMS = [
5569
    _PInstanceName,
5570
    ("target_node", _NoDefault, _TNonEmptyString),
5571
    _PShutdownTimeout,
5572
    ]
5573
  REQ_BGL = False
5574

    
5575
  def ExpandNames(self):
5576
    self._ExpandAndLockInstance()
5577
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5578
    self.op.target_node = target_node
5579
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5580
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5581

    
5582
  def DeclareLocks(self, level):
5583
    if level == locking.LEVEL_NODE:
5584
      self._LockInstancesNodes(primary_only=True)
5585

    
5586
  def BuildHooksEnv(self):
5587
    """Build hooks env.
5588

5589
    This runs on master, primary and secondary nodes of the instance.
5590

5591
    """
5592
    env = {
5593
      "TARGET_NODE": self.op.target_node,
5594
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5595
      }
5596
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5597
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5598
                                       self.op.target_node]
5599
    return env, nl, nl
5600

    
5601
  def CheckPrereq(self):
5602
    """Check prerequisites.
5603

5604
    This checks that the instance is in the cluster.
5605

5606
    """
5607
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5608
    assert self.instance is not None, \
5609
      "Cannot retrieve locked instance %s" % self.op.instance_name
5610

    
5611
    node = self.cfg.GetNodeInfo(self.op.target_node)
5612
    assert node is not None, \
5613
      "Cannot retrieve locked node %s" % self.op.target_node
5614

    
5615
    self.target_node = target_node = node.name
5616

    
5617
    if target_node == instance.primary_node:
5618
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5619
                                 (instance.name, target_node),
5620
                                 errors.ECODE_STATE)
5621

    
5622
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5623

    
5624
    for idx, dsk in enumerate(instance.disks):
5625
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5626
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5627
                                   " cannot copy" % idx, errors.ECODE_STATE)
5628

    
5629
    _CheckNodeOnline(self, target_node)
5630
    _CheckNodeNotDrained(self, target_node)
5631

    
5632
    if instance.admin_up:
5633
      # check memory requirements on the secondary node
5634
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5635
                           instance.name, bep[constants.BE_MEMORY],
5636
                           instance.hypervisor)
5637
    else:
5638
      self.LogInfo("Not checking memory on the secondary node as"
5639
                   " instance will not be started")
5640

    
5641
    # check bridge existance
5642
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5643

    
5644
  def Exec(self, feedback_fn):
5645
    """Move an instance.
5646

5647
    The move is done by shutting it down on its present node, copying
5648
    the data over (slow) and starting it on the new node.
5649

5650
    """
5651
    instance = self.instance
5652

    
5653
    source_node = instance.primary_node
5654
    target_node = self.target_node
5655

    
5656
    self.LogInfo("Shutting down instance %s on source node %s",
5657
                 instance.name, source_node)
5658

    
5659
    result = self.rpc.call_instance_shutdown(source_node, instance,
5660
                                             self.op.shutdown_timeout)
5661
    msg = result.fail_msg
5662
    if msg:
5663
      if self.op.ignore_consistency:
5664
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5665
                             " Proceeding anyway. Please make sure node"
5666
                             " %s is down. Error details: %s",
5667
                             instance.name, source_node, source_node, msg)
5668
      else:
5669
        raise errors.OpExecError("Could not shutdown instance %s on"
5670
                                 " node %s: %s" %
5671
                                 (instance.name, source_node, msg))
5672

    
5673
    # create the target disks
5674
    try:
5675
      _CreateDisks(self, instance, target_node=target_node)
5676
    except errors.OpExecError:
5677
      self.LogWarning("Device creation failed, reverting...")
5678
      try:
5679
        _RemoveDisks(self, instance, target_node=target_node)
5680
      finally:
5681
        self.cfg.ReleaseDRBDMinors(instance.name)
5682
        raise
5683

    
5684
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5685

    
5686
    errs = []
5687
    # activate, get path, copy the data over
5688
    for idx, disk in enumerate(instance.disks):
5689
      self.LogInfo("Copying data for disk %d", idx)
5690
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5691
                                               instance.name, True)
5692
      if result.fail_msg:
5693
        self.LogWarning("Can't assemble newly created disk %d: %s",
5694
                        idx, result.fail_msg)
5695
        errs.append(result.fail_msg)
5696
        break
5697
      dev_path = result.payload
5698
      result = self.rpc.call_blockdev_export(source_node, disk,
5699
                                             target_node, dev_path,
5700
                                             cluster_name)
5701
      if result.fail_msg:
5702
        self.LogWarning("Can't copy data over for disk %d: %s",
5703
                        idx, result.fail_msg)
5704
        errs.append(result.fail_msg)
5705
        break
5706

    
5707
    if errs:
5708
      self.LogWarning("Some disks failed to copy, aborting")
5709
      try:
5710
        _RemoveDisks(self, instance, target_node=target_node)
5711
      finally:
5712
        self.cfg.ReleaseDRBDMinors(instance.name)
5713
        raise errors.OpExecError("Errors during disk copy: %s" %
5714
                                 (",".join(errs),))
5715

    
5716
    instance.primary_node = target_node
5717
    self.cfg.Update(instance, feedback_fn)
5718

    
5719
    self.LogInfo("Removing the disks on the original node")
5720
    _RemoveDisks(self, instance, target_node=source_node)
5721

    
5722
    # Only start the instance if it's marked as up
5723
    if instance.admin_up:
5724
      self.LogInfo("Starting instance %s on node %s",
5725
                   instance.name, target_node)
5726

    
5727
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5728
                                           ignore_secondaries=True)
5729
      if not disks_ok:
5730
        _ShutdownInstanceDisks(self, instance)
5731
        raise errors.OpExecError("Can't activate the instance's disks")
5732

    
5733
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5734
      msg = result.fail_msg
5735
      if msg:
5736
        _ShutdownInstanceDisks(self, instance)
5737
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5738
                                 (instance.name, target_node, msg))
5739

    
5740

    
5741
class LUMigrateNode(LogicalUnit):
5742
  """Migrate all instances from a node.
5743

5744
  """
5745
  HPATH = "node-migrate"
5746
  HTYPE = constants.HTYPE_NODE
5747
  _OP_PARAMS = [
5748
    _PNodeName,
5749
    _PMigrationMode,
5750
    _PMigrationLive,
5751
    ]
5752
  REQ_BGL = False
5753

    
5754
  def ExpandNames(self):
5755
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5756

    
5757
    self.needed_locks = {
5758
      locking.LEVEL_NODE: [self.op.node_name],
5759
      }
5760

    
5761
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5762

    
5763
    # Create tasklets for migrating instances for all instances on this node
5764
    names = []
5765
    tasklets = []
5766

    
5767
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5768
      logging.debug("Migrating instance %s", inst.name)
5769
      names.append(inst.name)
5770

    
5771
      tasklets.append(TLMigrateInstance(self, inst.name, False))
5772

    
5773
    self.tasklets = tasklets
5774

    
5775
    # Declare instance locks
5776
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5777

    
5778
  def DeclareLocks(self, level):
5779
    if level == locking.LEVEL_NODE:
5780
      self._LockInstancesNodes()
5781

    
5782
  def BuildHooksEnv(self):
5783
    """Build hooks env.
5784

5785
    This runs on the master, the primary and all the secondaries.
5786

5787
    """
5788
    env = {
5789
      "NODE_NAME": self.op.node_name,
5790
      }
5791

    
5792
    nl = [self.cfg.GetMasterNode()]
5793

    
5794
    return (env, nl, nl)
5795

    
5796

    
5797
class TLMigrateInstance(Tasklet):
5798
  """Tasklet class for instance migration.
5799

5800
  @type live: boolean
5801
  @ivar live: whether the migration will be done live or non-live;
5802
      this variable is initalized only after CheckPrereq has run
5803

5804
  """
5805
  def __init__(self, lu, instance_name, cleanup):
5806
    """Initializes this class.
5807

5808
    """
5809
    Tasklet.__init__(self, lu)
5810

    
5811
    # Parameters
5812
    self.instance_name = instance_name
5813
    self.cleanup = cleanup
5814
    self.live = False # will be overridden later
5815

    
5816
  def CheckPrereq(self):
5817
    """Check prerequisites.
5818

5819
    This checks that the instance is in the cluster.
5820

5821
    """
5822
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5823
    instance = self.cfg.GetInstanceInfo(instance_name)
5824
    assert instance is not None
5825

    
5826
    if instance.disk_template != constants.DT_DRBD8:
5827
      raise errors.OpPrereqError("Instance's disk layout is not"
5828
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5829

    
5830
    secondary_nodes = instance.secondary_nodes
5831
    if not secondary_nodes:
5832
      raise errors.ConfigurationError("No secondary node but using"
5833
                                      " drbd8 disk template")
5834

    
5835
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5836

    
5837
    target_node = secondary_nodes[0]
5838
    # check memory requirements on the secondary node
5839
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5840
                         instance.name, i_be[constants.BE_MEMORY],
5841
                         instance.hypervisor)
5842

    
5843
    # check bridge existance
5844
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5845

    
5846
    if not self.cleanup:
5847
      _CheckNodeNotDrained(self.lu, target_node)
5848
      result = self.rpc.call_instance_migratable(instance.primary_node,
5849
                                                 instance)
5850
      result.Raise("Can't migrate, please use failover",
5851
                   prereq=True, ecode=errors.ECODE_STATE)
5852

    
5853
    self.instance = instance
5854

    
5855
    if self.lu.op.live is not None and self.lu.op.mode is not None:
5856
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5857
                                 " parameters are accepted",
5858
                                 errors.ECODE_INVAL)
5859
    if self.lu.op.live is not None:
5860
      if self.lu.op.live:
5861
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
5862
      else:
5863
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5864
      # reset the 'live' parameter to None so that repeated
5865
      # invocations of CheckPrereq do not raise an exception
5866
      self.lu.op.live = None
5867
    elif self.lu.op.mode is None:
5868
      # read the default value from the hypervisor
5869
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5870
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5871

    
5872
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5873

    
5874
  def _WaitUntilSync(self):
5875
    """Poll with custom rpc for disk sync.
5876

5877
    This uses our own step-based rpc call.
5878

5879
    """
5880
    self.feedback_fn("* wait until resync is done")
5881
    all_done = False
5882
    while not all_done:
5883
      all_done = True
5884
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5885
                                            self.nodes_ip,
5886
                                            self.instance.disks)
5887
      min_percent = 100
5888
      for node, nres in result.items():
5889
        nres.Raise("Cannot resync disks on node %s" % node)
5890
        node_done, node_percent = nres.payload
5891
        all_done = all_done and node_done
5892
        if node_percent is not None:
5893
          min_percent = min(min_percent, node_percent)
5894
      if not all_done:
5895
        if min_percent < 100:
5896
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5897
        time.sleep(2)
5898

    
5899
  def _EnsureSecondary(self, node):
5900
    """Demote a node to secondary.
5901

5902
    """
5903
    self.feedback_fn("* switching node %s to secondary mode" % node)
5904

    
5905
    for dev in self.instance.disks:
5906
      self.cfg.SetDiskID(dev, node)
5907

    
5908
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5909
                                          self.instance.disks)
5910
    result.Raise("Cannot change disk to secondary on node %s" % node)
5911

    
5912
  def _GoStandalone(self):
5913
    """Disconnect from the network.
5914

5915
    """
5916
    self.feedback_fn("* changing into standalone mode")
5917
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5918
                                               self.instance.disks)
5919
    for node, nres in result.items():
5920
      nres.Raise("Cannot disconnect disks node %s" % node)
5921

    
5922
  def _GoReconnect(self, multimaster):
5923
    """Reconnect to the network.
5924

5925
    """
5926
    if multimaster:
5927
      msg = "dual-master"
5928
    else:
5929
      msg = "single-master"
5930
    self.feedback_fn("* changing disks into %s mode" % msg)
5931
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5932
                                           self.instance.disks,
5933
                                           self.instance.name, multimaster)
5934
    for node, nres in result.items():
5935
      nres.Raise("Cannot change disks config on node %s" % node)
5936

    
5937
  def _ExecCleanup(self):
5938
    """Try to cleanup after a failed migration.
5939

5940
    The cleanup is done by:
5941
      - check that the instance is running only on one node
5942
        (and update the config if needed)
5943
      - change disks on its secondary node to secondary
5944
      - wait until disks are fully synchronized
5945
      - disconnect from the network
5946
      - change disks into single-master mode
5947
      - wait again until disks are fully synchronized
5948

5949
    """
5950
    instance = self.instance
5951
    target_node = self.target_node
5952
    source_node = self.source_node
5953

    
5954
    # check running on only one node
5955
    self.feedback_fn("* checking where the instance actually runs"
5956
                     " (if this hangs, the hypervisor might be in"
5957
                     " a bad state)")
5958
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5959
    for node, result in ins_l.items():
5960
      result.Raise("Can't contact node %s" % node)
5961

    
5962
    runningon_source = instance.name in ins_l[source_node].payload
5963
    runningon_target = instance.name in ins_l[target_node].payload
5964

    
5965
    if runningon_source and runningon_target:
5966
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5967
                               " or the hypervisor is confused. You will have"
5968
                               " to ensure manually that it runs only on one"
5969
                               " and restart this operation.")
5970

    
5971
    if not (runningon_source or runningon_target):
5972
      raise errors.OpExecError("Instance does not seem to be running at all."
5973
                               " In this case, it's safer to repair by"
5974
                               " running 'gnt-instance stop' to ensure disk"
5975
                               " shutdown, and then restarting it.")
5976

    
5977
    if runningon_target:
5978
      # the migration has actually succeeded, we need to update the config
5979
      self.feedback_fn("* instance running on secondary node (%s),"
5980
                       " updating config" % target_node)
5981
      instance.primary_node = target_node
5982
      self.cfg.Update(instance, self.feedback_fn)
5983
      demoted_node = source_node
5984
    else:
5985
      self.feedback_fn("* instance confirmed to be running on its"
5986
                       " primary node (%s)" % source_node)
5987
      demoted_node = target_node
5988

    
5989
    self._EnsureSecondary(demoted_node)
5990
    try:
5991
      self._WaitUntilSync()
5992
    except errors.OpExecError:
5993
      # we ignore here errors, since if the device is standalone, it
5994
      # won't be able to sync
5995
      pass
5996
    self._GoStandalone()
5997
    self._GoReconnect(False)
5998
    self._WaitUntilSync()
5999

    
6000
    self.feedback_fn("* done")
6001

    
6002
  def _RevertDiskStatus(self):
6003
    """Try to revert the disk status after a failed migration.
6004

6005
    """
6006
    target_node = self.target_node
6007
    try:
6008
      self._EnsureSecondary(target_node)
6009
      self._GoStandalone()
6010
      self._GoReconnect(False)
6011
      self._WaitUntilSync()
6012
    except errors.OpExecError, err:
6013
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6014
                         " drives: error '%s'\n"
6015
                         "Please look and recover the instance status" %
6016
                         str(err))
6017

    
6018
  def _AbortMigration(self):
6019
    """Call the hypervisor code to abort a started migration.
6020

6021
    """
6022
    instance = self.instance
6023
    target_node = self.target_node
6024
    migration_info = self.migration_info
6025

    
6026
    abort_result = self.rpc.call_finalize_migration(target_node,
6027
                                                    instance,
6028
                                                    migration_info,
6029
                                                    False)
6030
    abort_msg = abort_result.fail_msg
6031
    if abort_msg:
6032
      logging.error("Aborting migration failed on target node %s: %s",
6033
                    target_node, abort_msg)
6034
      # Don't raise an exception here, as we stil have to try to revert the
6035
      # disk status, even if this step failed.
6036

    
6037
  def _ExecMigration(self):
6038
    """Migrate an instance.
6039

6040
    The migrate is done by:
6041
      - change the disks into dual-master mode
6042
      - wait until disks are fully synchronized again
6043
      - migrate the instance
6044
      - change disks on the new secondary node (the old primary) to secondary
6045
      - wait until disks are fully synchronized
6046
      - change disks into single-master mode
6047

6048
    """
6049
    instance = self.instance
6050
    target_node = self.target_node
6051
    source_node = self.source_node
6052

    
6053
    self.feedback_fn("* checking disk consistency between source and target")
6054
    for dev in instance.disks:
6055
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6056
        raise errors.OpExecError("Disk %s is degraded or not fully"
6057
                                 " synchronized on target node,"
6058
                                 " aborting migrate." % dev.iv_name)
6059

    
6060
    # First get the migration information from the remote node
6061
    result = self.rpc.call_migration_info(source_node, instance)
6062
    msg = result.fail_msg
6063
    if msg:
6064
      log_err = ("Failed fetching source migration information from %s: %s" %
6065
                 (source_node, msg))
6066
      logging.error(log_err)
6067
      raise errors.OpExecError(log_err)
6068

    
6069
    self.migration_info = migration_info = result.payload
6070

    
6071
    # Then switch the disks to master/master mode
6072
    self._EnsureSecondary(target_node)
6073
    self._GoStandalone()
6074
    self._GoReconnect(True)
6075
    self._WaitUntilSync()
6076

    
6077
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6078
    result = self.rpc.call_accept_instance(target_node,
6079
                                           instance,
6080
                                           migration_info,
6081
                                           self.nodes_ip[target_node])
6082

    
6083
    msg = result.fail_msg
6084
    if msg:
6085
      logging.error("Instance pre-migration failed, trying to revert"
6086
                    " disk status: %s", msg)
6087
      self.feedback_fn("Pre-migration failed, aborting")
6088
      self._AbortMigration()
6089
      self._RevertDiskStatus()
6090
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6091
                               (instance.name, msg))
6092

    
6093
    self.feedback_fn("* migrating instance to %s" % target_node)
6094
    time.sleep(10)
6095
    result = self.rpc.call_instance_migrate(source_node, instance,
6096
                                            self.nodes_ip[target_node],
6097
                                            self.live)
6098
    msg = result.fail_msg
6099
    if msg:
6100
      logging.error("Instance migration failed, trying to revert"
6101
                    " disk status: %s", msg)
6102
      self.feedback_fn("Migration failed, aborting")
6103
      self._AbortMigration()
6104
      self._RevertDiskStatus()
6105
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6106
                               (instance.name, msg))
6107
    time.sleep(10)
6108

    
6109
    instance.primary_node = target_node
6110
    # distribute new instance config to the other nodes
6111
    self.cfg.Update(instance, self.feedback_fn)
6112

    
6113
    result = self.rpc.call_finalize_migration(target_node,
6114
                                              instance,
6115
                                              migration_info,
6116
                                              True)
6117
    msg = result.fail_msg
6118
    if msg:
6119
      logging.error("Instance migration succeeded, but finalization failed:"
6120
                    " %s", msg)
6121
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6122
                               msg)
6123

    
6124
    self._EnsureSecondary(source_node)
6125
    self._WaitUntilSync()
6126
    self._GoStandalone()
6127
    self._GoReconnect(False)
6128
    self._WaitUntilSync()
6129

    
6130
    self.feedback_fn("* done")
6131

    
6132
  def Exec(self, feedback_fn):
6133
    """Perform the migration.
6134

6135
    """
6136
    feedback_fn("Migrating instance %s" % self.instance.name)
6137

    
6138
    self.feedback_fn = feedback_fn
6139

    
6140
    self.source_node = self.instance.primary_node
6141
    self.target_node = self.instance.secondary_nodes[0]
6142
    self.all_nodes = [self.source_node, self.target_node]
6143
    self.nodes_ip = {
6144
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6145
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6146
      }
6147

    
6148
    if self.cleanup:
6149
      return self._ExecCleanup()
6150
    else:
6151
      return self._ExecMigration()
6152

    
6153

    
6154
def _CreateBlockDev(lu, node, instance, device, force_create,
6155
                    info, force_open):
6156
  """Create a tree of block devices on a given node.
6157

6158
  If this device type has to be created on secondaries, create it and
6159
  all its children.
6160

6161
  If not, just recurse to children keeping the same 'force' value.
6162

6163
  @param lu: the lu on whose behalf we execute
6164
  @param node: the node on which to create the device
6165
  @type instance: L{objects.Instance}
6166
  @param instance: the instance which owns the device
6167
  @type device: L{objects.Disk}
6168
  @param device: the device to create
6169
  @type force_create: boolean
6170
  @param force_create: whether to force creation of this device; this
6171
      will be change to True whenever we find a device which has
6172
      CreateOnSecondary() attribute
6173
  @param info: the extra 'metadata' we should attach to the device
6174
      (this will be represented as a LVM tag)
6175
  @type force_open: boolean
6176
  @param force_open: this parameter will be passes to the
6177
      L{backend.BlockdevCreate} function where it specifies
6178
      whether we run on primary or not, and it affects both
6179
      the child assembly and the device own Open() execution
6180

6181
  """
6182
  if device.CreateOnSecondary():
6183
    force_create = True
6184

    
6185
  if device.children:
6186
    for child in device.children:
6187
      _CreateBlockDev(lu, node, instance, child, force_create,
6188
                      info, force_open)
6189

    
6190
  if not force_create:
6191
    return
6192

    
6193
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6194

    
6195

    
6196
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6197
  """Create a single block device on a given node.
6198

6199
  This will not recurse over children of the device, so they must be
6200
  created in advance.
6201

6202
  @param lu: the lu on whose behalf we execute
6203
  @param node: the node on which to create the device
6204
  @type instance: L{objects.Instance}
6205
  @param instance: the instance which owns the device
6206
  @type device: L{objects.Disk}
6207
  @param device: the device to create
6208
  @param info: the extra 'metadata' we should attach to the device
6209
      (this will be represented as a LVM tag)
6210
  @type force_open: boolean
6211
  @param force_open: this parameter will be passes to the
6212
      L{backend.BlockdevCreate} function where it specifies
6213
      whether we run on primary or not, and it affects both
6214
      the child assembly and the device own Open() execution
6215

6216
  """
6217
  lu.cfg.SetDiskID(device, node)
6218
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6219
                                       instance.name, force_open, info)
6220
  result.Raise("Can't create block device %s on"
6221
               " node %s for instance %s" % (device, node, instance.name))
6222
  if device.physical_id is None:
6223
    device.physical_id = result.payload
6224

    
6225

    
6226
def _GenerateUniqueNames(lu, exts):
6227
  """Generate a suitable LV name.
6228

6229
  This will generate a logical volume name for the given instance.
6230

6231
  """
6232
  results = []
6233
  for val in exts:
6234
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6235
    results.append("%s%s" % (new_id, val))
6236
  return results
6237

    
6238

    
6239
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6240
                         p_minor, s_minor):
6241
  """Generate a drbd8 device complete with its children.
6242

6243
  """
6244
  port = lu.cfg.AllocatePort()
6245
  vgname = lu.cfg.GetVGName()
6246
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6247
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6248
                          logical_id=(vgname, names[0]))
6249
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6250
                          logical_id=(vgname, names[1]))
6251
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6252
                          logical_id=(primary, secondary, port,
6253
                                      p_minor, s_minor,
6254
                                      shared_secret),
6255
                          children=[dev_data, dev_meta],
6256
                          iv_name=iv_name)
6257
  return drbd_dev
6258

    
6259

    
6260
def _GenerateDiskTemplate(lu, template_name,
6261
                          instance_name, primary_node,
6262
                          secondary_nodes, disk_info,
6263
                          file_storage_dir, file_driver,
6264
                          base_index):
6265
  """Generate the entire disk layout for a given template type.
6266

6267
  """
6268
  #TODO: compute space requirements
6269

    
6270
  vgname = lu.cfg.GetVGName()
6271
  disk_count = len(disk_info)
6272
  disks = []
6273
  if template_name == constants.DT_DISKLESS:
6274
    pass
6275
  elif template_name == constants.DT_PLAIN:
6276
    if len(secondary_nodes) != 0:
6277
      raise errors.ProgrammerError("Wrong template configuration")
6278

    
6279
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6280
                                      for i in range(disk_count)])
6281
    for idx, disk in enumerate(disk_info):
6282
      disk_index = idx + base_index
6283
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6284
                              logical_id=(vgname, names[idx]),
6285
                              iv_name="disk/%d" % disk_index,
6286
                              mode=disk["mode"])
6287
      disks.append(disk_dev)
6288
  elif template_name == constants.DT_DRBD8:
6289
    if len(secondary_nodes) != 1:
6290
      raise errors.ProgrammerError("Wrong template configuration")
6291
    remote_node = secondary_nodes[0]
6292
    minors = lu.cfg.AllocateDRBDMinor(
6293
      [primary_node, remote_node] * len(disk_info), instance_name)
6294

    
6295
    names = []
6296
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6297
                                               for i in range(disk_count)]):
6298
      names.append(lv_prefix + "_data")
6299
      names.append(lv_prefix + "_meta")
6300
    for idx, disk in enumerate(disk_info):
6301
      disk_index = idx + base_index
6302
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6303
                                      disk["size"], names[idx*2:idx*2+2],
6304
                                      "disk/%d" % disk_index,
6305
                                      minors[idx*2], minors[idx*2+1])
6306
      disk_dev.mode = disk["mode"]
6307
      disks.append(disk_dev)
6308
  elif template_name == constants.DT_FILE:
6309
    if len(secondary_nodes) != 0:
6310
      raise errors.ProgrammerError("Wrong template configuration")
6311

    
6312
    _RequireFileStorage()
6313

    
6314
    for idx, disk in enumerate(disk_info):
6315
      disk_index = idx + base_index
6316
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6317
                              iv_name="disk/%d" % disk_index,
6318
                              logical_id=(file_driver,
6319
                                          "%s/disk%d" % (file_storage_dir,
6320
                                                         disk_index)),
6321
                              mode=disk["mode"])
6322
      disks.append(disk_dev)
6323
  else:
6324
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6325
  return disks
6326

    
6327

    
6328
def _GetInstanceInfoText(instance):
6329
  """Compute that text that should be added to the disk's metadata.
6330

6331
  """
6332
  return "originstname+%s" % instance.name
6333

    
6334

    
6335
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6336
  """Create all disks for an instance.
6337

6338
  This abstracts away some work from AddInstance.
6339

6340
  @type lu: L{LogicalUnit}
6341
  @param lu: the logical unit on whose behalf we execute
6342
  @type instance: L{objects.Instance}
6343
  @param instance: the instance whose disks we should create
6344
  @type to_skip: list
6345
  @param to_skip: list of indices to skip
6346
  @type target_node: string
6347
  @param target_node: if passed, overrides the target node for creation
6348
  @rtype: boolean
6349
  @return: the success of the creation
6350

6351
  """
6352
  info = _GetInstanceInfoText(instance)
6353
  if target_node is None:
6354
    pnode = instance.primary_node
6355
    all_nodes = instance.all_nodes
6356
  else:
6357
    pnode = target_node
6358
    all_nodes = [pnode]
6359

    
6360
  if instance.disk_template == constants.DT_FILE:
6361
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6362
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6363

    
6364
    result.Raise("Failed to create directory '%s' on"
6365
                 " node %s" % (file_storage_dir, pnode))
6366

    
6367
  # Note: this needs to be kept in sync with adding of disks in
6368
  # LUSetInstanceParams
6369
  for idx, device in enumerate(instance.disks):
6370
    if to_skip and idx in to_skip:
6371
      continue
6372
    logging.info("Creating volume %s for instance %s",
6373
                 device.iv_name, instance.name)
6374
    #HARDCODE
6375
    for node in all_nodes:
6376
      f_create = node == pnode
6377
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6378

    
6379

    
6380
def _RemoveDisks(lu, instance, target_node=None):
6381
  """Remove all disks for an instance.
6382

6383
  This abstracts away some work from `AddInstance()` and
6384
  `RemoveInstance()`. Note that in case some of the devices couldn't
6385
  be removed, the removal will continue with the other ones (compare
6386
  with `_CreateDisks()`).
6387

6388
  @type lu: L{LogicalUnit}
6389
  @param lu: the logical unit on whose behalf we execute
6390
  @type instance: L{objects.Instance}
6391
  @param instance: the instance whose disks we should remove
6392
  @type target_node: string
6393
  @param target_node: used to override the node on which to remove the disks
6394
  @rtype: boolean
6395
  @return: the success of the removal
6396

6397
  """
6398
  logging.info("Removing block devices for instance %s", instance.name)
6399

    
6400
  all_result = True
6401
  for device in instance.disks:
6402
    if target_node:
6403
      edata = [(target_node, device)]
6404
    else:
6405
      edata = device.ComputeNodeTree(instance.primary_node)
6406
    for node, disk in edata:
6407
      lu.cfg.SetDiskID(disk, node)
6408
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6409
      if msg:
6410
        lu.LogWarning("Could not remove block device %s on node %s,"
6411
                      " continuing anyway: %s", device.iv_name, node, msg)
6412
        all_result = False
6413

    
6414
  if instance.disk_template == constants.DT_FILE:
6415
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6416
    if target_node:
6417
      tgt = target_node
6418
    else:
6419
      tgt = instance.primary_node
6420
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6421
    if result.fail_msg:
6422
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6423
                    file_storage_dir, instance.primary_node, result.fail_msg)
6424
      all_result = False
6425

    
6426
  return all_result
6427

    
6428

    
6429
def _ComputeDiskSize(disk_template, disks):
6430
  """Compute disk size requirements in the volume group
6431

6432
  """
6433
  # Required free disk space as a function of disk and swap space
6434
  req_size_dict = {
6435
    constants.DT_DISKLESS: None,
6436
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6437
    # 128 MB are added for drbd metadata for each disk
6438
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6439
    constants.DT_FILE: None,
6440
  }
6441

    
6442
  if disk_template not in req_size_dict:
6443
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6444
                                 " is unknown" %  disk_template)
6445

    
6446
  return req_size_dict[disk_template]
6447

    
6448

    
6449
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6450
  """Hypervisor parameter validation.
6451

6452
  This function abstract the hypervisor parameter validation to be
6453
  used in both instance create and instance modify.
6454

6455
  @type lu: L{LogicalUnit}
6456
  @param lu: the logical unit for which we check
6457
  @type nodenames: list
6458
  @param nodenames: the list of nodes on which we should check
6459
  @type hvname: string
6460
  @param hvname: the name of the hypervisor we should use
6461
  @type hvparams: dict
6462
  @param hvparams: the parameters which we need to check
6463
  @raise errors.OpPrereqError: if the parameters are not valid
6464

6465
  """
6466
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6467
                                                  hvname,
6468
                                                  hvparams)
6469
  for node in nodenames:
6470
    info = hvinfo[node]
6471
    if info.offline:
6472
      continue
6473
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6474

    
6475

    
6476
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6477
  """OS parameters validation.
6478

6479
  @type lu: L{LogicalUnit}
6480
  @param lu: the logical unit for which we check
6481
  @type required: boolean
6482
  @param required: whether the validation should fail if the OS is not
6483
      found
6484
  @type nodenames: list
6485
  @param nodenames: the list of nodes on which we should check
6486
  @type osname: string
6487
  @param osname: the name of the hypervisor we should use
6488
  @type osparams: dict
6489
  @param osparams: the parameters which we need to check
6490
  @raise errors.OpPrereqError: if the parameters are not valid
6491

6492
  """
6493
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6494
                                   [constants.OS_VALIDATE_PARAMETERS],
6495
                                   osparams)
6496
  for node, nres in result.items():
6497
    # we don't check for offline cases since this should be run only
6498
    # against the master node and/or an instance's nodes
6499
    nres.Raise("OS Parameters validation failed on node %s" % node)
6500
    if not nres.payload:
6501
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6502
                 osname, node)
6503

    
6504

    
6505
class LUCreateInstance(LogicalUnit):
6506
  """Create an instance.
6507

6508
  """
6509
  HPATH = "instance-add"
6510
  HTYPE = constants.HTYPE_INSTANCE
6511
  _OP_PARAMS = [
6512
    _PInstanceName,
6513
    ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6514
    ("start", True, _TBool),
6515
    ("wait_for_sync", True, _TBool),
6516
    ("ip_check", True, _TBool),
6517
    ("name_check", True, _TBool),
6518
    ("disks", _NoDefault, _TListOf(_TDict)),
6519
    ("nics", _NoDefault, _TListOf(_TDict)),
6520
    ("hvparams", _EmptyDict, _TDict),
6521
    ("beparams", _EmptyDict, _TDict),
6522
    ("osparams", _EmptyDict, _TDict),
6523
    ("no_install", None, _TMaybeBool),
6524
    ("os_type", None, _TMaybeString),
6525
    ("force_variant", False, _TBool),
6526
    ("source_handshake", None, _TOr(_TList, _TNone)),
6527
    ("source_x509_ca", None, _TMaybeString),
6528
    ("source_instance_name", None, _TMaybeString),
6529
    ("src_node", None, _TMaybeString),
6530
    ("src_path", None, _TMaybeString),
6531
    ("pnode", None, _TMaybeString),
6532
    ("snode", None, _TMaybeString),
6533
    ("iallocator", None, _TMaybeString),
6534
    ("hypervisor", None, _TMaybeString),
6535
    ("disk_template", _NoDefault, _CheckDiskTemplate),
6536
    ("identify_defaults", False, _TBool),
6537
    ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6538
    ("file_storage_dir", None, _TMaybeString),
6539
    ("dry_run", False, _TBool),
6540
    ]
6541
  REQ_BGL = False
6542

    
6543
  def CheckArguments(self):
6544
    """Check arguments.
6545

6546
    """
6547
    # do not require name_check to ease forward/backward compatibility
6548
    # for tools
6549
    if self.op.no_install and self.op.start:
6550
      self.LogInfo("No-installation mode selected, disabling startup")
6551
      self.op.start = False
6552
    # validate/normalize the instance name
6553
    self.op.instance_name = \
6554
      netutils.HostInfo.NormalizeName(self.op.instance_name)
6555

    
6556
    if self.op.ip_check and not self.op.name_check:
6557
      # TODO: make the ip check more flexible and not depend on the name check
6558
      raise errors.OpPrereqError("Cannot do ip check without a name check",
6559
                                 errors.ECODE_INVAL)
6560

    
6561
    # check nics' parameter names
6562
    for nic in self.op.nics:
6563
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6564

    
6565
    # check disks. parameter names and consistent adopt/no-adopt strategy
6566
    has_adopt = has_no_adopt = False
6567
    for disk in self.op.disks:
6568
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6569
      if "adopt" in disk:
6570
        has_adopt = True
6571
      else:
6572
        has_no_adopt = True
6573
    if has_adopt and has_no_adopt:
6574
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6575
                                 errors.ECODE_INVAL)
6576
    if has_adopt:
6577
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6578
        raise errors.OpPrereqError("Disk adoption is not supported for the"
6579
                                   " '%s' disk template" %
6580
                                   self.op.disk_template,
6581
                                   errors.ECODE_INVAL)
6582
      if self.op.iallocator is not None:
6583
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6584
                                   " iallocator script", errors.ECODE_INVAL)
6585
      if self.op.mode == constants.INSTANCE_IMPORT:
6586
        raise errors.OpPrereqError("Disk adoption not allowed for"
6587
                                   " instance import", errors.ECODE_INVAL)
6588

    
6589
    self.adopt_disks = has_adopt
6590

    
6591
    # instance name verification
6592
    if self.op.name_check:
6593
      self.hostname1 = netutils.GetHostInfo(self.op.instance_name)
6594
      self.op.instance_name = self.hostname1.name
6595
      # used in CheckPrereq for ip ping check
6596
      self.check_ip = self.hostname1.ip
6597
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6598
      raise errors.OpPrereqError("Remote imports require names to be checked" %
6599
                                 errors.ECODE_INVAL)
6600
    else:
6601
      self.check_ip = None
6602

    
6603
    # file storage checks
6604
    if (self.op.file_driver and
6605
        not self.op.file_driver in constants.FILE_DRIVER):
6606
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6607
                                 self.op.file_driver, errors.ECODE_INVAL)
6608

    
6609
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6610
      raise errors.OpPrereqError("File storage directory path not absolute",
6611
                                 errors.ECODE_INVAL)
6612

    
6613
    ### Node/iallocator related checks
6614
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6615

    
6616
    self._cds = _GetClusterDomainSecret()
6617

    
6618
    if self.op.mode == constants.INSTANCE_IMPORT:
6619
      # On import force_variant must be True, because if we forced it at
6620
      # initial install, our only chance when importing it back is that it
6621
      # works again!
6622
      self.op.force_variant = True
6623

    
6624
      if self.op.no_install:
6625
        self.LogInfo("No-installation mode has no effect during import")
6626

    
6627
    elif self.op.mode == constants.INSTANCE_CREATE:
6628
      if self.op.os_type is None:
6629
        raise errors.OpPrereqError("No guest OS specified",
6630
                                   errors.ECODE_INVAL)
6631
      if self.op.disk_template is None:
6632
        raise errors.OpPrereqError("No disk template specified",
6633
                                   errors.ECODE_INVAL)
6634

    
6635
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6636
      # Check handshake to ensure both clusters have the same domain secret
6637
      src_handshake = self.op.source_handshake
6638
      if not src_handshake:
6639
        raise errors.OpPrereqError("Missing source handshake",
6640
                                   errors.ECODE_INVAL)
6641

    
6642
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6643
                                                           src_handshake)
6644
      if errmsg:
6645
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6646
                                   errors.ECODE_INVAL)
6647

    
6648
      # Load and check source CA
6649
      self.source_x509_ca_pem = self.op.source_x509_ca
6650
      if not self.source_x509_ca_pem:
6651
        raise errors.OpPrereqError("Missing source X509 CA",
6652
                                   errors.ECODE_INVAL)
6653

    
6654
      try:
6655
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6656
                                                    self._cds)
6657
      except OpenSSL.crypto.Error, err:
6658
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6659
                                   (err, ), errors.ECODE_INVAL)
6660

    
6661
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6662
      if errcode is not None:
6663
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6664
                                   errors.ECODE_INVAL)
6665

    
6666
      self.source_x509_ca = cert
6667

    
6668
      src_instance_name = self.op.source_instance_name
6669
      if not src_instance_name:
6670
        raise errors.OpPrereqError("Missing source instance name",
6671
                                   errors.ECODE_INVAL)
6672

    
6673
      norm_name = netutils.HostInfo.NormalizeName(src_instance_name)
6674
      self.source_instance_name = netutils.GetHostInfo(norm_name).name
6675

    
6676
    else:
6677
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6678
                                 self.op.mode, errors.ECODE_INVAL)
6679

    
6680
  def ExpandNames(self):
6681
    """ExpandNames for CreateInstance.
6682

6683
    Figure out the right locks for instance creation.
6684

6685
    """
6686
    self.needed_locks = {}
6687

    
6688
    instance_name = self.op.instance_name
6689
    # this is just a preventive check, but someone might still add this
6690
    # instance in the meantime, and creation will fail at lock-add time
6691
    if instance_name in self.cfg.GetInstanceList():
6692
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6693
                                 instance_name, errors.ECODE_EXISTS)
6694

    
6695
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6696

    
6697
    if self.op.iallocator:
6698
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6699
    else:
6700
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6701
      nodelist = [self.op.pnode]
6702
      if self.op.snode is not None:
6703
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6704
        nodelist.append(self.op.snode)
6705
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6706

    
6707
    # in case of import lock the source node too
6708
    if self.op.mode == constants.INSTANCE_IMPORT:
6709
      src_node = self.op.src_node
6710
      src_path = self.op.src_path
6711

    
6712
      if src_path is None:
6713
        self.op.src_path = src_path = self.op.instance_name
6714

    
6715
      if src_node is None:
6716
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6717
        self.op.src_node = None
6718
        if os.path.isabs(src_path):
6719
          raise errors.OpPrereqError("Importing an instance from an absolute"
6720
                                     " path requires a source node option.",
6721
                                     errors.ECODE_INVAL)
6722
      else:
6723
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6724
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6725
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6726
        if not os.path.isabs(src_path):
6727
          self.op.src_path = src_path = \
6728
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6729

    
6730
  def _RunAllocator(self):
6731
    """Run the allocator based on input opcode.
6732

6733
    """
6734
    nics = [n.ToDict() for n in self.nics]
6735
    ial = IAllocator(self.cfg, self.rpc,
6736
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6737
                     name=self.op.instance_name,
6738
                     disk_template=self.op.disk_template,
6739
                     tags=[],
6740
                     os=self.op.os_type,
6741
                     vcpus=self.be_full[constants.BE_VCPUS],
6742
                     mem_size=self.be_full[constants.BE_MEMORY],
6743
                     disks=self.disks,
6744
                     nics=nics,
6745
                     hypervisor=self.op.hypervisor,
6746
                     )
6747

    
6748
    ial.Run(self.op.iallocator)
6749

    
6750
    if not ial.success:
6751
      raise errors.OpPrereqError("Can't compute nodes using"
6752
                                 " iallocator '%s': %s" %
6753
                                 (self.op.iallocator, ial.info),
6754
                                 errors.ECODE_NORES)
6755
    if len(ial.result) != ial.required_nodes:
6756
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6757
                                 " of nodes (%s), required %s" %
6758
                                 (self.op.iallocator, len(ial.result),
6759
                                  ial.required_nodes), errors.ECODE_FAULT)
6760
    self.op.pnode = ial.result[0]
6761
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6762
                 self.op.instance_name, self.op.iallocator,
6763
                 utils.CommaJoin(ial.result))
6764
    if ial.required_nodes == 2:
6765
      self.op.snode = ial.result[1]
6766

    
6767
  def BuildHooksEnv(self):
6768
    """Build hooks env.
6769

6770
    This runs on master, primary and secondary nodes of the instance.
6771

6772
    """
6773
    env = {
6774
      "ADD_MODE": self.op.mode,
6775
      }
6776
    if self.op.mode == constants.INSTANCE_IMPORT:
6777
      env["SRC_NODE"] = self.op.src_node
6778
      env["SRC_PATH"] = self.op.src_path
6779
      env["SRC_IMAGES"] = self.src_images
6780

    
6781
    env.update(_BuildInstanceHookEnv(
6782
      name=self.op.instance_name,
6783
      primary_node=self.op.pnode,
6784
      secondary_nodes=self.secondaries,
6785
      status=self.op.start,
6786
      os_type=self.op.os_type,
6787
      memory=self.be_full[constants.BE_MEMORY],
6788
      vcpus=self.be_full[constants.BE_VCPUS],
6789
      nics=_NICListToTuple(self, self.nics),
6790
      disk_template=self.op.disk_template,
6791
      disks=[(d["size"], d["mode"]) for d in self.disks],
6792
      bep=self.be_full,
6793
      hvp=self.hv_full,
6794
      hypervisor_name=self.op.hypervisor,
6795
    ))
6796

    
6797
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6798
          self.secondaries)
6799
    return env, nl, nl
6800

    
6801
  def _ReadExportInfo(self):
6802
    """Reads the export information from disk.
6803

6804
    It will override the opcode source node and path with the actual
6805
    information, if these two were not specified before.
6806

6807
    @return: the export information
6808

6809
    """
6810
    assert self.op.mode == constants.INSTANCE_IMPORT
6811

    
6812
    src_node = self.op.src_node
6813
    src_path = self.op.src_path
6814

    
6815
    if src_node is None:
6816
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6817
      exp_list = self.rpc.call_export_list(locked_nodes)
6818
      found = False
6819
      for node in exp_list:
6820
        if exp_list[node].fail_msg:
6821
          continue
6822
        if src_path in exp_list[node].payload:
6823
          found = True
6824
          self.op.src_node = src_node = node
6825
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6826
                                                       src_path)
6827
          break
6828
      if not found:
6829
        raise errors.OpPrereqError("No export found for relative path %s" %
6830
                                    src_path, errors.ECODE_INVAL)
6831

    
6832
    _CheckNodeOnline(self, src_node)
6833
    result = self.rpc.call_export_info(src_node, src_path)
6834
    result.Raise("No export or invalid export found in dir %s" % src_path)
6835

    
6836
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6837
    if not export_info.has_section(constants.INISECT_EXP):
6838
      raise errors.ProgrammerError("Corrupted export config",
6839
                                   errors.ECODE_ENVIRON)
6840

    
6841
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6842
    if (int(ei_version) != constants.EXPORT_VERSION):
6843
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6844
                                 (ei_version, constants.EXPORT_VERSION),
6845
                                 errors.ECODE_ENVIRON)
6846
    return export_info
6847

    
6848
  def _ReadExportParams(self, einfo):
6849
    """Use export parameters as defaults.
6850

6851
    In case the opcode doesn't specify (as in override) some instance
6852
    parameters, then try to use them from the export information, if
6853
    that declares them.
6854

6855
    """
6856
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6857

    
6858
    if self.op.disk_template is None:
6859
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6860
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6861
                                          "disk_template")
6862
      else:
6863
        raise errors.OpPrereqError("No disk template specified and the export"
6864
                                   " is missing the disk_template information",
6865
                                   errors.ECODE_INVAL)
6866

    
6867
    if not self.op.disks:
6868
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6869
        disks = []
6870
        # TODO: import the disk iv_name too
6871
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6872
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6873
          disks.append({"size": disk_sz})
6874
        self.op.disks = disks
6875
      else:
6876
        raise errors.OpPrereqError("No disk info specified and the export"
6877
                                   " is missing the disk information",
6878
                                   errors.ECODE_INVAL)
6879

    
6880
    if (not self.op.nics and
6881
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6882
      nics = []
6883
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6884
        ndict = {}
6885
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6886
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6887
          ndict[name] = v
6888
        nics.append(ndict)
6889
      self.op.nics = nics
6890

    
6891
    if (self.op.hypervisor is None and
6892
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6893
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6894
    if einfo.has_section(constants.INISECT_HYP):
6895
      # use the export parameters but do not override the ones
6896
      # specified by the user
6897
      for name, value in einfo.items(constants.INISECT_HYP):
6898
        if name not in self.op.hvparams:
6899
          self.op.hvparams[name] = value
6900

    
6901
    if einfo.has_section(constants.INISECT_BEP):
6902
      # use the parameters, without overriding
6903
      for name, value in einfo.items(constants.INISECT_BEP):
6904
        if name not in self.op.beparams:
6905
          self.op.beparams[name] = value
6906
    else:
6907
      # try to read the parameters old style, from the main section
6908
      for name in constants.BES_PARAMETERS:
6909
        if (name not in self.op.beparams and
6910
            einfo.has_option(constants.INISECT_INS, name)):
6911
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6912

    
6913
    if einfo.has_section(constants.INISECT_OSP):
6914
      # use the parameters, without overriding
6915
      for name, value in einfo.items(constants.INISECT_OSP):
6916
        if name not in self.op.osparams:
6917
          self.op.osparams[name] = value
6918

    
6919
  def _RevertToDefaults(self, cluster):
6920
    """Revert the instance parameters to the default values.
6921

6922
    """
6923
    # hvparams
6924
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6925
    for name in self.op.hvparams.keys():
6926
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6927
        del self.op.hvparams[name]
6928
    # beparams
6929
    be_defs = cluster.SimpleFillBE({})
6930
    for name in self.op.beparams.keys():
6931
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6932
        del self.op.beparams[name]
6933
    # nic params
6934
    nic_defs = cluster.SimpleFillNIC({})
6935
    for nic in self.op.nics:
6936
      for name in constants.NICS_PARAMETERS:
6937
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6938
          del nic[name]
6939
    # osparams
6940
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6941
    for name in self.op.osparams.keys():
6942
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
6943
        del self.op.osparams[name]
6944

    
6945
  def CheckPrereq(self):
6946
    """Check prerequisites.
6947

6948
    """
6949
    if self.op.mode == constants.INSTANCE_IMPORT:
6950
      export_info = self._ReadExportInfo()
6951
      self._ReadExportParams(export_info)
6952

    
6953
    _CheckDiskTemplate(self.op.disk_template)
6954

    
6955
    if (not self.cfg.GetVGName() and
6956
        self.op.disk_template not in constants.DTS_NOT_LVM):
6957
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6958
                                 " instances", errors.ECODE_STATE)
6959

    
6960
    if self.op.hypervisor is None:
6961
      self.op.hypervisor = self.cfg.GetHypervisorType()
6962

    
6963
    cluster = self.cfg.GetClusterInfo()
6964
    enabled_hvs = cluster.enabled_hypervisors
6965
    if self.op.hypervisor not in enabled_hvs:
6966
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6967
                                 " cluster (%s)" % (self.op.hypervisor,
6968
                                  ",".join(enabled_hvs)),
6969
                                 errors.ECODE_STATE)
6970

    
6971
    # check hypervisor parameter syntax (locally)
6972
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6973
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6974
                                      self.op.hvparams)
6975
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6976
    hv_type.CheckParameterSyntax(filled_hvp)
6977
    self.hv_full = filled_hvp
6978
    # check that we don't specify global parameters on an instance
6979
    _CheckGlobalHvParams(self.op.hvparams)
6980

    
6981
    # fill and remember the beparams dict
6982
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6983
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
6984

    
6985
    # build os parameters
6986
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6987

    
6988
    # now that hvp/bep are in final format, let's reset to defaults,
6989
    # if told to do so
6990
    if self.op.identify_defaults:
6991
      self._RevertToDefaults(cluster)
6992

    
6993
    # NIC buildup
6994
    self.nics = []
6995
    for idx, nic in enumerate(self.op.nics):
6996
      nic_mode_req = nic.get("mode", None)
6997
      nic_mode = nic_mode_req
6998
      if nic_mode is None:
6999
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7000

    
7001
      # in routed mode, for the first nic, the default ip is 'auto'
7002
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7003
        default_ip_mode = constants.VALUE_AUTO
7004
      else:
7005
        default_ip_mode = constants.VALUE_NONE
7006

    
7007
      # ip validity checks
7008
      ip = nic.get("ip", default_ip_mode)
7009
      if ip is None or ip.lower() == constants.VALUE_NONE:
7010
        nic_ip = None
7011
      elif ip.lower() == constants.VALUE_AUTO:
7012
        if not self.op.name_check:
7013
          raise errors.OpPrereqError("IP address set to auto but name checks"
7014
                                     " have been skipped. Aborting.",
7015
                                     errors.ECODE_INVAL)
7016
        nic_ip = self.hostname1.ip
7017
      else:
7018
        if not netutils.IsValidIP4(ip):
7019
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
7020
                                     " like a valid IP" % ip,
7021
                                     errors.ECODE_INVAL)
7022
        nic_ip = ip
7023

    
7024
      # TODO: check the ip address for uniqueness
7025
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7026
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7027
                                   errors.ECODE_INVAL)
7028

    
7029
      # MAC address verification
7030
      mac = nic.get("mac", constants.VALUE_AUTO)
7031
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7032
        mac = utils.NormalizeAndValidateMac(mac)
7033

    
7034
        try:
7035
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7036
        except errors.ReservationError:
7037
          raise errors.OpPrereqError("MAC address %s already in use"
7038
                                     " in cluster" % mac,
7039
                                     errors.ECODE_NOTUNIQUE)
7040

    
7041
      # bridge verification
7042
      bridge = nic.get("bridge", None)
7043
      link = nic.get("link", None)
7044
      if bridge and link:
7045
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7046
                                   " at the same time", errors.ECODE_INVAL)
7047
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7048
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7049
                                   errors.ECODE_INVAL)
7050
      elif bridge:
7051
        link = bridge
7052

    
7053
      nicparams = {}
7054
      if nic_mode_req:
7055
        nicparams[constants.NIC_MODE] = nic_mode_req
7056
      if link:
7057
        nicparams[constants.NIC_LINK] = link
7058

    
7059
      check_params = cluster.SimpleFillNIC(nicparams)
7060
      objects.NIC.CheckParameterSyntax(check_params)
7061
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7062

    
7063
    # disk checks/pre-build
7064
    self.disks = []
7065
    for disk in self.op.disks:
7066
      mode = disk.get("mode", constants.DISK_RDWR)
7067
      if mode not in constants.DISK_ACCESS_SET:
7068
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7069
                                   mode, errors.ECODE_INVAL)
7070
      size = disk.get("size", None)
7071
      if size is None:
7072
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7073
      try:
7074
        size = int(size)
7075
      except (TypeError, ValueError):
7076
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7077
                                   errors.ECODE_INVAL)
7078
      new_disk = {"size": size, "mode": mode}
7079
      if "adopt" in disk:
7080
        new_disk["adopt"] = disk["adopt"]
7081
      self.disks.append(new_disk)
7082

    
7083
    if self.op.mode == constants.INSTANCE_IMPORT:
7084

    
7085
      # Check that the new instance doesn't have less disks than the export
7086
      instance_disks = len(self.disks)
7087
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7088
      if instance_disks < export_disks:
7089
        raise errors.OpPrereqError("Not enough disks to import."
7090
                                   " (instance: %d, export: %d)" %
7091
                                   (instance_disks, export_disks),
7092
                                   errors.ECODE_INVAL)
7093

    
7094
      disk_images = []
7095
      for idx in range(export_disks):
7096
        option = 'disk%d_dump' % idx
7097
        if export_info.has_option(constants.INISECT_INS, option):
7098
          # FIXME: are the old os-es, disk sizes, etc. useful?
7099
          export_name = export_info.get(constants.INISECT_INS, option)
7100
          image = utils.PathJoin(self.op.src_path, export_name)
7101
          disk_images.append(image)
7102
        else:
7103
          disk_images.append(False)
7104

    
7105
      self.src_images = disk_images
7106

    
7107
      old_name = export_info.get(constants.INISECT_INS, 'name')
7108
      try:
7109
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7110
      except (TypeError, ValueError), err:
7111
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7112
                                   " an integer: %s" % str(err),
7113
                                   errors.ECODE_STATE)
7114
      if self.op.instance_name == old_name:
7115
        for idx, nic in enumerate(self.nics):
7116
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7117
            nic_mac_ini = 'nic%d_mac' % idx
7118
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7119

    
7120
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7121

    
7122
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7123
    if self.op.ip_check:
7124
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7125
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7126
                                   (self.check_ip, self.op.instance_name),
7127
                                   errors.ECODE_NOTUNIQUE)
7128

    
7129
    #### mac address generation
7130
    # By generating here the mac address both the allocator and the hooks get
7131
    # the real final mac address rather than the 'auto' or 'generate' value.
7132
    # There is a race condition between the generation and the instance object
7133
    # creation, which means that we know the mac is valid now, but we're not
7134
    # sure it will be when we actually add the instance. If things go bad
7135
    # adding the instance will abort because of a duplicate mac, and the
7136
    # creation job will fail.
7137
    for nic in self.nics:
7138
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7139
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7140

    
7141
    #### allocator run
7142

    
7143
    if self.op.iallocator is not None:
7144
      self._RunAllocator()
7145

    
7146
    #### node related checks
7147

    
7148
    # check primary node
7149
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7150
    assert self.pnode is not None, \
7151
      "Cannot retrieve locked node %s" % self.op.pnode
7152
    if pnode.offline:
7153
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7154
                                 pnode.name, errors.ECODE_STATE)
7155
    if pnode.drained:
7156
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7157
                                 pnode.name, errors.ECODE_STATE)
7158

    
7159
    self.secondaries = []
7160

    
7161
    # mirror node verification
7162
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7163
      if self.op.snode is None:
7164
        raise errors.OpPrereqError("The networked disk templates need"
7165
                                   " a mirror node", errors.ECODE_INVAL)
7166
      if self.op.snode == pnode.name:
7167
        raise errors.OpPrereqError("The secondary node cannot be the"
7168
                                   " primary node.", errors.ECODE_INVAL)
7169
      _CheckNodeOnline(self, self.op.snode)
7170
      _CheckNodeNotDrained(self, self.op.snode)
7171
      self.secondaries.append(self.op.snode)
7172

    
7173
    nodenames = [pnode.name] + self.secondaries
7174

    
7175
    req_size = _ComputeDiskSize(self.op.disk_template,
7176
                                self.disks)
7177

    
7178
    # Check lv size requirements, if not adopting
7179
    if req_size is not None and not self.adopt_disks:
7180
      _CheckNodesFreeDisk(self, nodenames, req_size)
7181

    
7182
    if self.adopt_disks: # instead, we must check the adoption data
7183
      all_lvs = set([i["adopt"] for i in self.disks])
7184
      if len(all_lvs) != len(self.disks):
7185
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7186
                                   errors.ECODE_INVAL)
7187
      for lv_name in all_lvs:
7188
        try:
7189
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7190
        except errors.ReservationError:
7191
          raise errors.OpPrereqError("LV named %s used by another instance" %
7192
                                     lv_name, errors.ECODE_NOTUNIQUE)
7193

    
7194
      node_lvs = self.rpc.call_lv_list([pnode.name],
7195
                                       self.cfg.GetVGName())[pnode.name]
7196
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7197
      node_lvs = node_lvs.payload
7198
      delta = all_lvs.difference(node_lvs.keys())
7199
      if delta:
7200
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7201
                                   utils.CommaJoin(delta),
7202
                                   errors.ECODE_INVAL)
7203
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7204
      if online_lvs:
7205
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7206
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7207
                                   errors.ECODE_STATE)
7208
      # update the size of disk based on what is found
7209
      for dsk in self.disks:
7210
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7211

    
7212
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7213

    
7214
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7215
    # check OS parameters (remotely)
7216
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7217

    
7218
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7219

    
7220
    # memory check on primary node
7221
    if self.op.start:
7222
      _CheckNodeFreeMemory(self, self.pnode.name,
7223
                           "creating instance %s" % self.op.instance_name,
7224
                           self.be_full[constants.BE_MEMORY],
7225
                           self.op.hypervisor)
7226

    
7227
    self.dry_run_result = list(nodenames)
7228

    
7229
  def Exec(self, feedback_fn):
7230
    """Create and add the instance to the cluster.
7231

7232
    """
7233
    instance = self.op.instance_name
7234
    pnode_name = self.pnode.name
7235

    
7236
    ht_kind = self.op.hypervisor
7237
    if ht_kind in constants.HTS_REQ_PORT:
7238
      network_port = self.cfg.AllocatePort()
7239
    else:
7240
      network_port = None
7241

    
7242
    if constants.ENABLE_FILE_STORAGE:
7243
      # this is needed because os.path.join does not accept None arguments
7244
      if self.op.file_storage_dir is None:
7245
        string_file_storage_dir = ""
7246
      else:
7247
        string_file_storage_dir = self.op.file_storage_dir
7248

    
7249
      # build the full file storage dir path
7250
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7251
                                        string_file_storage_dir, instance)
7252
    else:
7253
      file_storage_dir = ""
7254

    
7255
    disks = _GenerateDiskTemplate(self,
7256
                                  self.op.disk_template,
7257
                                  instance, pnode_name,
7258
                                  self.secondaries,
7259
                                  self.disks,
7260
                                  file_storage_dir,
7261
                                  self.op.file_driver,
7262
                                  0)
7263

    
7264
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7265
                            primary_node=pnode_name,
7266
                            nics=self.nics, disks=disks,
7267
                            disk_template=self.op.disk_template,
7268
                            admin_up=False,
7269
                            network_port=network_port,
7270
                            beparams=self.op.beparams,
7271
                            hvparams=self.op.hvparams,
7272
                            hypervisor=self.op.hypervisor,
7273
                            osparams=self.op.osparams,
7274
                            )
7275

    
7276
    if self.adopt_disks:
7277
      # rename LVs to the newly-generated names; we need to construct
7278
      # 'fake' LV disks with the old data, plus the new unique_id
7279
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7280
      rename_to = []
7281
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7282
        rename_to.append(t_dsk.logical_id)
7283
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7284
        self.cfg.SetDiskID(t_dsk, pnode_name)
7285
      result = self.rpc.call_blockdev_rename(pnode_name,
7286
                                             zip(tmp_disks, rename_to))
7287
      result.Raise("Failed to rename adoped LVs")
7288
    else:
7289
      feedback_fn("* creating instance disks...")
7290
      try:
7291
        _CreateDisks(self, iobj)
7292
      except errors.OpExecError:
7293
        self.LogWarning("Device creation failed, reverting...")
7294
        try:
7295
          _RemoveDisks(self, iobj)
7296
        finally:
7297
          self.cfg.ReleaseDRBDMinors(instance)
7298
          raise
7299

    
7300
    feedback_fn("adding instance %s to cluster config" % instance)
7301

    
7302
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7303

    
7304
    # Declare that we don't want to remove the instance lock anymore, as we've
7305
    # added the instance to the config
7306
    del self.remove_locks[locking.LEVEL_INSTANCE]
7307
    # Unlock all the nodes
7308
    if self.op.mode == constants.INSTANCE_IMPORT:
7309
      nodes_keep = [self.op.src_node]
7310
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7311
                       if node != self.op.src_node]
7312
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7313
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7314
    else:
7315
      self.context.glm.release(locking.LEVEL_NODE)
7316
      del self.acquired_locks[locking.LEVEL_NODE]
7317

    
7318
    if self.op.wait_for_sync:
7319
      disk_abort = not _WaitForSync(self, iobj)
7320
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7321
      # make sure the disks are not degraded (still sync-ing is ok)
7322
      time.sleep(15)
7323
      feedback_fn("* checking mirrors status")
7324
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7325
    else:
7326
      disk_abort = False
7327

    
7328
    if disk_abort:
7329
      _RemoveDisks(self, iobj)
7330
      self.cfg.RemoveInstance(iobj.name)
7331
      # Make sure the instance lock gets removed
7332
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7333
      raise errors.OpExecError("There are some degraded disks for"
7334
                               " this instance")
7335

    
7336
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7337
      if self.op.mode == constants.INSTANCE_CREATE:
7338
        if not self.op.no_install:
7339
          feedback_fn("* running the instance OS create scripts...")
7340
          # FIXME: pass debug option from opcode to backend
7341
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7342
                                                 self.op.debug_level)
7343
          result.Raise("Could not add os for instance %s"
7344
                       " on node %s" % (instance, pnode_name))
7345

    
7346
      elif self.op.mode == constants.INSTANCE_IMPORT:
7347
        feedback_fn("* running the instance OS import scripts...")
7348

    
7349
        transfers = []
7350

    
7351
        for idx, image in enumerate(self.src_images):
7352
          if not image:
7353
            continue
7354

    
7355
          # FIXME: pass debug option from opcode to backend
7356
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7357
                                             constants.IEIO_FILE, (image, ),
7358
                                             constants.IEIO_SCRIPT,
7359
                                             (iobj.disks[idx], idx),
7360
                                             None)
7361
          transfers.append(dt)
7362

    
7363
        import_result = \
7364
          masterd.instance.TransferInstanceData(self, feedback_fn,
7365
                                                self.op.src_node, pnode_name,
7366
                                                self.pnode.secondary_ip,
7367
                                                iobj, transfers)
7368
        if not compat.all(import_result):
7369
          self.LogWarning("Some disks for instance %s on node %s were not"
7370
                          " imported successfully" % (instance, pnode_name))
7371

    
7372
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7373
        feedback_fn("* preparing remote import...")
7374
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
7375
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7376

    
7377
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7378
                                                     self.source_x509_ca,
7379
                                                     self._cds, timeouts)
7380
        if not compat.all(disk_results):
7381
          # TODO: Should the instance still be started, even if some disks
7382
          # failed to import (valid for local imports, too)?
7383
          self.LogWarning("Some disks for instance %s on node %s were not"
7384
                          " imported successfully" % (instance, pnode_name))
7385

    
7386
        # Run rename script on newly imported instance
7387
        assert iobj.name == instance
7388
        feedback_fn("Running rename script for %s" % instance)
7389
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7390
                                                   self.source_instance_name,
7391
                                                   self.op.debug_level)
7392
        if result.fail_msg:
7393
          self.LogWarning("Failed to run rename script for %s on node"
7394
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7395

    
7396
      else:
7397
        # also checked in the prereq part
7398
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7399
                                     % self.op.mode)
7400

    
7401
    if self.op.start:
7402
      iobj.admin_up = True
7403
      self.cfg.Update(iobj, feedback_fn)
7404
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7405
      feedback_fn("* starting instance...")
7406
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7407
      result.Raise("Could not start instance")
7408

    
7409
    return list(iobj.all_nodes)
7410

    
7411

    
7412
class LUConnectConsole(NoHooksLU):
7413
  """Connect to an instance's console.
7414

7415
  This is somewhat special in that it returns the command line that
7416
  you need to run on the master node in order to connect to the
7417
  console.
7418

7419
  """
7420
  _OP_PARAMS = [
7421
    _PInstanceName
7422
    ]
7423
  REQ_BGL = False
7424

    
7425
  def ExpandNames(self):
7426
    self._ExpandAndLockInstance()
7427

    
7428
  def CheckPrereq(self):
7429
    """Check prerequisites.
7430

7431
    This checks that the instance is in the cluster.
7432

7433
    """
7434
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7435
    assert self.instance is not None, \
7436
      "Cannot retrieve locked instance %s" % self.op.instance_name
7437
    _CheckNodeOnline(self, self.instance.primary_node)
7438

    
7439
  def Exec(self, feedback_fn):
7440
    """Connect to the console of an instance
7441

7442
    """
7443
    instance = self.instance
7444
    node = instance.primary_node
7445

    
7446
    node_insts = self.rpc.call_instance_list([node],
7447
                                             [instance.hypervisor])[node]
7448
    node_insts.Raise("Can't get node information from %s" % node)
7449

    
7450
    if instance.name not in node_insts.payload:
7451
      raise errors.OpExecError("Instance %s is not running." % instance.name)
7452

    
7453
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7454

    
7455
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7456
    cluster = self.cfg.GetClusterInfo()
7457
    # beparams and hvparams are passed separately, to avoid editing the
7458
    # instance and then saving the defaults in the instance itself.
7459
    hvparams = cluster.FillHV(instance)
7460
    beparams = cluster.FillBE(instance)
7461
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7462

    
7463
    # build ssh cmdline
7464
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7465

    
7466

    
7467
class LUReplaceDisks(LogicalUnit):
7468
  """Replace the disks of an instance.
7469

7470
  """
7471
  HPATH = "mirrors-replace"
7472
  HTYPE = constants.HTYPE_INSTANCE
7473
  _OP_PARAMS = [
7474
    _PInstanceName,
7475
    ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7476
    ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7477
    ("remote_node", None, _TMaybeString),
7478
    ("iallocator", None, _TMaybeString),
7479
    ("early_release", False, _TBool),
7480
    ]
7481
  REQ_BGL = False
7482

    
7483
  def CheckArguments(self):
7484
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7485
                                  self.op.iallocator)
7486

    
7487
  def ExpandNames(self):
7488
    self._ExpandAndLockInstance()
7489

    
7490
    if self.op.iallocator is not None:
7491
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7492

    
7493
    elif self.op.remote_node is not None:
7494
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7495
      self.op.remote_node = remote_node
7496

    
7497
      # Warning: do not remove the locking of the new secondary here
7498
      # unless DRBD8.AddChildren is changed to work in parallel;
7499
      # currently it doesn't since parallel invocations of
7500
      # FindUnusedMinor will conflict
7501
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7502
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7503

    
7504
    else:
7505
      self.needed_locks[locking.LEVEL_NODE] = []
7506
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7507

    
7508
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7509
                                   self.op.iallocator, self.op.remote_node,
7510
                                   self.op.disks, False, self.op.early_release)
7511

    
7512
    self.tasklets = [self.replacer]
7513

    
7514
  def DeclareLocks(self, level):
7515
    # If we're not already locking all nodes in the set we have to declare the
7516
    # instance's primary/secondary nodes.
7517
    if (level == locking.LEVEL_NODE and
7518
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7519
      self._LockInstancesNodes()
7520

    
7521
  def BuildHooksEnv(self):
7522
    """Build hooks env.
7523

7524
    This runs on the master, the primary and all the secondaries.
7525

7526
    """
7527
    instance = self.replacer.instance
7528
    env = {
7529
      "MODE": self.op.mode,
7530
      "NEW_SECONDARY": self.op.remote_node,
7531
      "OLD_SECONDARY": instance.secondary_nodes[0],
7532
      }
7533
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7534
    nl = [
7535
      self.cfg.GetMasterNode(),
7536
      instance.primary_node,
7537
      ]
7538
    if self.op.remote_node is not None:
7539
      nl.append(self.op.remote_node)
7540
    return env, nl, nl
7541

    
7542

    
7543
class TLReplaceDisks(Tasklet):
7544
  """Replaces disks for an instance.
7545

7546
  Note: Locking is not within the scope of this class.
7547

7548
  """
7549
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7550
               disks, delay_iallocator, early_release):
7551
    """Initializes this class.
7552

7553
    """
7554
    Tasklet.__init__(self, lu)
7555

    
7556
    # Parameters
7557
    self.instance_name = instance_name
7558
    self.mode = mode
7559
    self.iallocator_name = iallocator_name
7560
    self.remote_node = remote_node
7561
    self.disks = disks
7562
    self.delay_iallocator = delay_iallocator
7563
    self.early_release = early_release
7564

    
7565
    # Runtime data
7566
    self.instance = None
7567
    self.new_node = None
7568
    self.target_node = None
7569
    self.other_node = None
7570
    self.remote_node_info = None
7571
    self.node_secondary_ip = None
7572

    
7573
  @staticmethod
7574
  def CheckArguments(mode, remote_node, iallocator):
7575
    """Helper function for users of this class.
7576

7577
    """
7578
    # check for valid parameter combination
7579
    if mode == constants.REPLACE_DISK_CHG:
7580
      if remote_node is None and iallocator is None:
7581
        raise errors.OpPrereqError("When changing the secondary either an"
7582
                                   " iallocator script must be used or the"
7583
                                   " new node given", errors.ECODE_INVAL)
7584

    
7585
      if remote_node is not None and iallocator is not None:
7586
        raise errors.OpPrereqError("Give either the iallocator or the new"
7587
                                   " secondary, not both", errors.ECODE_INVAL)
7588

    
7589
    elif remote_node is not None or iallocator is not None:
7590
      # Not replacing the secondary
7591
      raise errors.OpPrereqError("The iallocator and new node options can"
7592
                                 " only be used when changing the"
7593
                                 " secondary node", errors.ECODE_INVAL)
7594

    
7595
  @staticmethod
7596
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7597
    """Compute a new secondary node using an IAllocator.
7598

7599
    """
7600
    ial = IAllocator(lu.cfg, lu.rpc,
7601
                     mode=constants.IALLOCATOR_MODE_RELOC,
7602
                     name=instance_name,
7603
                     relocate_from=relocate_from)
7604

    
7605
    ial.Run(iallocator_name)
7606

    
7607
    if not ial.success:
7608
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7609
                                 " %s" % (iallocator_name, ial.info),
7610
                                 errors.ECODE_NORES)
7611

    
7612
    if len(ial.result) != ial.required_nodes:
7613
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7614
                                 " of nodes (%s), required %s" %
7615
                                 (iallocator_name,
7616
                                  len(ial.result), ial.required_nodes),
7617
                                 errors.ECODE_FAULT)
7618

    
7619
    remote_node_name = ial.result[0]
7620

    
7621
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7622
               instance_name, remote_node_name)
7623

    
7624
    return remote_node_name
7625

    
7626
  def _FindFaultyDisks(self, node_name):
7627
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7628
                                    node_name, True)
7629

    
7630
  def CheckPrereq(self):
7631
    """Check prerequisites.
7632

7633
    This checks that the instance is in the cluster.
7634

7635
    """
7636
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7637
    assert instance is not None, \
7638
      "Cannot retrieve locked instance %s" % self.instance_name
7639

    
7640
    if instance.disk_template != constants.DT_DRBD8:
7641
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7642
                                 " instances", errors.ECODE_INVAL)
7643

    
7644
    if len(instance.secondary_nodes) != 1:
7645
      raise errors.OpPrereqError("The instance has a strange layout,"
7646
                                 " expected one secondary but found %d" %
7647
                                 len(instance.secondary_nodes),
7648
                                 errors.ECODE_FAULT)
7649

    
7650
    if not self.delay_iallocator:
7651
      self._CheckPrereq2()
7652

    
7653
  def _CheckPrereq2(self):
7654
    """Check prerequisites, second part.
7655

7656
    This function should always be part of CheckPrereq. It was separated and is
7657
    now called from Exec because during node evacuation iallocator was only
7658
    called with an unmodified cluster model, not taking planned changes into
7659
    account.
7660

7661
    """
7662
    instance = self.instance
7663
    secondary_node = instance.secondary_nodes[0]
7664

    
7665
    if self.iallocator_name is None:
7666
      remote_node = self.remote_node
7667
    else:
7668
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7669
                                       instance.name, instance.secondary_nodes)
7670

    
7671
    if remote_node is not None:
7672
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7673
      assert self.remote_node_info is not None, \
7674
        "Cannot retrieve locked node %s" % remote_node
7675
    else:
7676
      self.remote_node_info = None
7677

    
7678
    if remote_node == self.instance.primary_node:
7679
      raise errors.OpPrereqError("The specified node is the primary node of"
7680
                                 " the instance.", errors.ECODE_INVAL)
7681

    
7682
    if remote_node == secondary_node:
7683
      raise errors.OpPrereqError("The specified node is already the"
7684
                                 " secondary node of the instance.",
7685
                                 errors.ECODE_INVAL)
7686

    
7687
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7688
                                    constants.REPLACE_DISK_CHG):
7689
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7690
                                 errors.ECODE_INVAL)
7691

    
7692
    if self.mode == constants.REPLACE_DISK_AUTO:
7693
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7694
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7695

    
7696
      if faulty_primary and faulty_secondary:
7697
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7698
                                   " one node and can not be repaired"
7699
                                   " automatically" % self.instance_name,
7700
                                   errors.ECODE_STATE)
7701

    
7702
      if faulty_primary:
7703
        self.disks = faulty_primary
7704
        self.target_node = instance.primary_node
7705
        self.other_node = secondary_node
7706
        check_nodes = [self.target_node, self.other_node]
7707
      elif faulty_secondary:
7708
        self.disks = faulty_secondary
7709
        self.target_node = secondary_node
7710
        self.other_node = instance.primary_node
7711
        check_nodes = [self.target_node, self.other_node]
7712
      else:
7713
        self.disks = []
7714
        check_nodes = []
7715

    
7716
    else:
7717
      # Non-automatic modes
7718
      if self.mode == constants.REPLACE_DISK_PRI:
7719
        self.target_node = instance.primary_node
7720
        self.other_node = secondary_node
7721
        check_nodes = [self.target_node, self.other_node]
7722

    
7723
      elif self.mode == constants.REPLACE_DISK_SEC:
7724
        self.target_node = secondary_node
7725
        self.other_node = instance.primary_node
7726
        check_nodes = [self.target_node, self.other_node]
7727

    
7728
      elif self.mode == constants.REPLACE_DISK_CHG:
7729
        self.new_node = remote_node
7730
        self.other_node = instance.primary_node
7731
        self.target_node = secondary_node
7732
        check_nodes = [self.new_node, self.other_node]
7733

    
7734
        _CheckNodeNotDrained(self.lu, remote_node)
7735

    
7736
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7737
        assert old_node_info is not None
7738
        if old_node_info.offline and not self.early_release:
7739
          # doesn't make sense to delay the release
7740
          self.early_release = True
7741
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7742
                          " early-release mode", secondary_node)
7743

    
7744
      else:
7745
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7746
                                     self.mode)
7747

    
7748
      # If not specified all disks should be replaced
7749
      if not self.disks:
7750
        self.disks = range(len(self.instance.disks))
7751

    
7752
    for node in check_nodes:
7753
      _CheckNodeOnline(self.lu, node)
7754

    
7755
    # Check whether disks are valid
7756
    for disk_idx in self.disks:
7757
      instance.FindDisk(disk_idx)
7758

    
7759
    # Get secondary node IP addresses
7760
    node_2nd_ip = {}
7761

    
7762
    for node_name in [self.target_node, self.other_node, self.new_node]:
7763
      if node_name is not None:
7764
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7765

    
7766
    self.node_secondary_ip = node_2nd_ip
7767

    
7768
  def Exec(self, feedback_fn):
7769
    """Execute disk replacement.
7770

7771
    This dispatches the disk replacement to the appropriate handler.
7772

7773
    """
7774
    if self.delay_iallocator:
7775
      self._CheckPrereq2()
7776

    
7777
    if not self.disks:
7778
      feedback_fn("No disks need replacement")
7779
      return
7780

    
7781
    feedback_fn("Replacing disk(s) %s for %s" %
7782
                (utils.CommaJoin(self.disks), self.instance.name))
7783

    
7784
    activate_disks = (not self.instance.admin_up)
7785

    
7786
    # Activate the instance disks if we're replacing them on a down instance
7787
    if activate_disks:
7788
      _StartInstanceDisks(self.lu, self.instance, True)
7789

    
7790
    try:
7791
      # Should we replace the secondary node?
7792
      if self.new_node is not None:
7793
        fn = self._ExecDrbd8Secondary
7794
      else:
7795
        fn = self._ExecDrbd8DiskOnly
7796

    
7797
      return fn(feedback_fn)
7798

    
7799
    finally:
7800
      # Deactivate the instance disks if we're replacing them on a
7801
      # down instance
7802
      if activate_disks:
7803
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7804

    
7805
  def _CheckVolumeGroup(self, nodes):
7806
    self.lu.LogInfo("Checking volume groups")
7807

    
7808
    vgname = self.cfg.GetVGName()
7809

    
7810
    # Make sure volume group exists on all involved nodes
7811
    results = self.rpc.call_vg_list(nodes)
7812
    if not results:
7813
      raise errors.OpExecError("Can't list volume groups on the nodes")
7814

    
7815
    for node in nodes:
7816
      res = results[node]
7817
      res.Raise("Error checking node %s" % node)
7818
      if vgname not in res.payload:
7819
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7820
                                 (vgname, node))
7821

    
7822
  def _CheckDisksExistence(self, nodes):
7823
    # Check disk existence
7824
    for idx, dev in enumerate(self.instance.disks):
7825
      if idx not in self.disks:
7826
        continue
7827

    
7828
      for node in nodes:
7829
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7830
        self.cfg.SetDiskID(dev, node)
7831

    
7832
        result = self.rpc.call_blockdev_find(node, dev)
7833

    
7834
        msg = result.fail_msg
7835
        if msg or not result.payload:
7836
          if not msg:
7837
            msg = "disk not found"
7838
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7839
                                   (idx, node, msg))
7840

    
7841
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7842
    for idx, dev in enumerate(self.instance.disks):
7843
      if idx not in self.disks:
7844
        continue
7845

    
7846
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7847
                      (idx, node_name))
7848

    
7849
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7850
                                   ldisk=ldisk):
7851
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7852
                                 " replace disks for instance %s" %
7853
                                 (node_name, self.instance.name))
7854

    
7855
  def _CreateNewStorage(self, node_name):
7856
    vgname = self.cfg.GetVGName()
7857
    iv_names = {}
7858

    
7859
    for idx, dev in enumerate(self.instance.disks):
7860
      if idx not in self.disks:
7861
        continue
7862

    
7863
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7864

    
7865
      self.cfg.SetDiskID(dev, node_name)
7866

    
7867
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7868
      names = _GenerateUniqueNames(self.lu, lv_names)
7869

    
7870
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7871
                             logical_id=(vgname, names[0]))
7872
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7873
                             logical_id=(vgname, names[1]))
7874

    
7875
      new_lvs = [lv_data, lv_meta]
7876
      old_lvs = dev.children
7877
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7878

    
7879
      # we pass force_create=True to force the LVM creation
7880
      for new_lv in new_lvs:
7881
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7882
                        _GetInstanceInfoText(self.instance), False)
7883

    
7884
    return iv_names
7885

    
7886
  def _CheckDevices(self, node_name, iv_names):
7887
    for name, (dev, _, _) in iv_names.iteritems():
7888
      self.cfg.SetDiskID(dev, node_name)
7889

    
7890
      result = self.rpc.call_blockdev_find(node_name, dev)
7891

    
7892
      msg = result.fail_msg
7893
      if msg or not result.payload:
7894
        if not msg:
7895
          msg = "disk not found"
7896
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7897
                                 (name, msg))
7898

    
7899
      if result.payload.is_degraded:
7900
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7901

    
7902
  def _RemoveOldStorage(self, node_name, iv_names):
7903
    for name, (_, old_lvs, _) in iv_names.iteritems():
7904
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7905

    
7906
      for lv in old_lvs:
7907
        self.cfg.SetDiskID(lv, node_name)
7908

    
7909
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7910
        if msg:
7911
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7912
                             hint="remove unused LVs manually")
7913

    
7914
  def _ReleaseNodeLock(self, node_name):
7915
    """Releases the lock for a given node."""
7916
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7917

    
7918
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7919
    """Replace a disk on the primary or secondary for DRBD 8.
7920

7921
    The algorithm for replace is quite complicated:
7922

7923
      1. for each disk to be replaced:
7924

7925
        1. create new LVs on the target node with unique names
7926
        1. detach old LVs from the drbd device
7927
        1. rename old LVs to name_replaced.<time_t>
7928
        1. rename new LVs to old LVs
7929
        1. attach the new LVs (with the old names now) to the drbd device
7930

7931
      1. wait for sync across all devices
7932

7933
      1. for each modified disk:
7934

7935
        1. remove old LVs (which have the name name_replaces.<time_t>)
7936

7937
    Failures are not very well handled.
7938

7939
    """
7940
    steps_total = 6
7941

    
7942
    # Step: check device activation
7943
    self.lu.LogStep(1, steps_total, "Check device existence")
7944
    self._CheckDisksExistence([self.other_node, self.target_node])
7945
    self._CheckVolumeGroup([self.target_node, self.other_node])
7946

    
7947
    # Step: check other node consistency
7948
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7949
    self._CheckDisksConsistency(self.other_node,
7950
                                self.other_node == self.instance.primary_node,
7951
                                False)
7952

    
7953
    # Step: create new storage
7954
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7955
    iv_names = self._CreateNewStorage(self.target_node)
7956

    
7957
    # Step: for each lv, detach+rename*2+attach
7958
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7959
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7960
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7961

    
7962
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7963
                                                     old_lvs)
7964
      result.Raise("Can't detach drbd from local storage on node"
7965
                   " %s for device %s" % (self.target_node, dev.iv_name))
7966
      #dev.children = []
7967
      #cfg.Update(instance)
7968

    
7969
      # ok, we created the new LVs, so now we know we have the needed
7970
      # storage; as such, we proceed on the target node to rename
7971
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7972
      # using the assumption that logical_id == physical_id (which in
7973
      # turn is the unique_id on that node)
7974

    
7975
      # FIXME(iustin): use a better name for the replaced LVs
7976
      temp_suffix = int(time.time())
7977
      ren_fn = lambda d, suff: (d.physical_id[0],
7978
                                d.physical_id[1] + "_replaced-%s" % suff)
7979

    
7980
      # Build the rename list based on what LVs exist on the node
7981
      rename_old_to_new = []
7982
      for to_ren in old_lvs:
7983
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7984
        if not result.fail_msg and result.payload:
7985
          # device exists
7986
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7987

    
7988
      self.lu.LogInfo("Renaming the old LVs on the target node")
7989
      result = self.rpc.call_blockdev_rename(self.target_node,
7990
                                             rename_old_to_new)
7991
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7992

    
7993
      # Now we rename the new LVs to the old LVs
7994
      self.lu.LogInfo("Renaming the new LVs on the target node")
7995
      rename_new_to_old = [(new, old.physical_id)
7996
                           for old, new in zip(old_lvs, new_lvs)]
7997
      result = self.rpc.call_blockdev_rename(self.target_node,
7998
                                             rename_new_to_old)
7999
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8000

    
8001
      for old, new in zip(old_lvs, new_lvs):
8002
        new.logical_id = old.logical_id
8003
        self.cfg.SetDiskID(new, self.target_node)
8004

    
8005
      for disk in old_lvs:
8006
        disk.logical_id = ren_fn(disk, temp_suffix)
8007
        self.cfg.SetDiskID(disk, self.target_node)
8008

    
8009
      # Now that the new lvs have the old name, we can add them to the device
8010
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8011
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8012
                                                  new_lvs)
8013
      msg = result.fail_msg
8014
      if msg:
8015
        for new_lv in new_lvs:
8016
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8017
                                               new_lv).fail_msg
8018
          if msg2:
8019
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8020
                               hint=("cleanup manually the unused logical"
8021
                                     "volumes"))
8022
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8023

    
8024
      dev.children = new_lvs
8025

    
8026
      self.cfg.Update(self.instance, feedback_fn)
8027

    
8028
    cstep = 5
8029
    if self.early_release:
8030
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8031
      cstep += 1
8032
      self._RemoveOldStorage(self.target_node, iv_names)
8033
      # WARNING: we release both node locks here, do not do other RPCs
8034
      # than WaitForSync to the primary node
8035
      self._ReleaseNodeLock([self.target_node, self.other_node])
8036

    
8037
    # Wait for sync
8038
    # This can fail as the old devices are degraded and _WaitForSync
8039
    # does a combined result over all disks, so we don't check its return value
8040
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8041
    cstep += 1
8042
    _WaitForSync(self.lu, self.instance)
8043

    
8044
    # Check all devices manually
8045
    self._CheckDevices(self.instance.primary_node, iv_names)
8046

    
8047
    # Step: remove old storage
8048
    if not self.early_release:
8049
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8050
      cstep += 1
8051
      self._RemoveOldStorage(self.target_node, iv_names)
8052

    
8053
  def _ExecDrbd8Secondary(self, feedback_fn):
8054
    """Replace the secondary node for DRBD 8.
8055

8056
    The algorithm for replace is quite complicated:
8057
      - for all disks of the instance:
8058
        - create new LVs on the new node with same names
8059
        - shutdown the drbd device on the old secondary
8060
        - disconnect the drbd network on the primary
8061
        - create the drbd device on the new secondary
8062
        - network attach the drbd on the primary, using an artifice:
8063
          the drbd code for Attach() will connect to the network if it
8064
          finds a device which is connected to the good local disks but
8065
          not network enabled
8066
      - wait for sync across all devices
8067
      - remove all disks from the old secondary
8068

8069
    Failures are not very well handled.
8070

8071
    """
8072
    steps_total = 6
8073

    
8074
    # Step: check device activation
8075
    self.lu.LogStep(1, steps_total, "Check device existence")
8076
    self._CheckDisksExistence([self.instance.primary_node])
8077
    self._CheckVolumeGroup([self.instance.primary_node])
8078

    
8079
    # Step: check other node consistency
8080
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8081
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8082

    
8083
    # Step: create new storage
8084
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8085
    for idx, dev in enumerate(self.instance.disks):
8086
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8087
                      (self.new_node, idx))
8088
      # we pass force_create=True to force LVM creation
8089
      for new_lv in dev.children:
8090
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8091
                        _GetInstanceInfoText(self.instance), False)
8092

    
8093
    # Step 4: dbrd minors and drbd setups changes
8094
    # after this, we must manually remove the drbd minors on both the
8095
    # error and the success paths
8096
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8097
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8098
                                         for dev in self.instance.disks],
8099
                                        self.instance.name)
8100
    logging.debug("Allocated minors %r", minors)
8101

    
8102
    iv_names = {}
8103
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8104
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8105
                      (self.new_node, idx))
8106
      # create new devices on new_node; note that we create two IDs:
8107
      # one without port, so the drbd will be activated without
8108
      # networking information on the new node at this stage, and one
8109
      # with network, for the latter activation in step 4
8110
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8111
      if self.instance.primary_node == o_node1:
8112
        p_minor = o_minor1
8113
      else:
8114
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8115
        p_minor = o_minor2
8116

    
8117
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8118
                      p_minor, new_minor, o_secret)
8119
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8120
                    p_minor, new_minor, o_secret)
8121

    
8122
      iv_names[idx] = (dev, dev.children, new_net_id)
8123
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8124
                    new_net_id)
8125
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8126
                              logical_id=new_alone_id,
8127
                              children=dev.children,
8128
                              size=dev.size)
8129
      try:
8130
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8131
                              _GetInstanceInfoText(self.instance), False)
8132
      except errors.GenericError:
8133
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8134
        raise
8135

    
8136
    # We have new devices, shutdown the drbd on the old secondary
8137
    for idx, dev in enumerate(self.instance.disks):
8138
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8139
      self.cfg.SetDiskID(dev, self.target_node)
8140
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8141
      if msg:
8142
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8143
                           "node: %s" % (idx, msg),
8144
                           hint=("Please cleanup this device manually as"
8145
                                 " soon as possible"))
8146

    
8147
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8148
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8149
                                               self.node_secondary_ip,
8150
                                               self.instance.disks)\
8151
                                              [self.instance.primary_node]
8152

    
8153
    msg = result.fail_msg
8154
    if msg:
8155
      # detaches didn't succeed (unlikely)
8156
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8157
      raise errors.OpExecError("Can't detach the disks from the network on"
8158
                               " old node: %s" % (msg,))
8159

    
8160
    # if we managed to detach at least one, we update all the disks of
8161
    # the instance to point to the new secondary
8162
    self.lu.LogInfo("Updating instance configuration")
8163
    for dev, _, new_logical_id in iv_names.itervalues():
8164
      dev.logical_id = new_logical_id
8165
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8166

    
8167
    self.cfg.Update(self.instance, feedback_fn)
8168

    
8169
    # and now perform the drbd attach
8170
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8171
                    " (standalone => connected)")
8172
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8173
                                            self.new_node],
8174
                                           self.node_secondary_ip,
8175
                                           self.instance.disks,
8176
                                           self.instance.name,
8177
                                           False)
8178
    for to_node, to_result in result.items():
8179
      msg = to_result.fail_msg
8180
      if msg:
8181
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8182
                           to_node, msg,
8183
                           hint=("please do a gnt-instance info to see the"
8184
                                 " status of disks"))
8185
    cstep = 5
8186
    if self.early_release:
8187
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8188
      cstep += 1
8189
      self._RemoveOldStorage(self.target_node, iv_names)
8190
      # WARNING: we release all node locks here, do not do other RPCs
8191
      # than WaitForSync to the primary node
8192
      self._ReleaseNodeLock([self.instance.primary_node,
8193
                             self.target_node,
8194
                             self.new_node])
8195

    
8196
    # Wait for sync
8197
    # This can fail as the old devices are degraded and _WaitForSync
8198
    # does a combined result over all disks, so we don't check its return value
8199
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8200
    cstep += 1
8201
    _WaitForSync(self.lu, self.instance)
8202

    
8203
    # Check all devices manually
8204
    self._CheckDevices(self.instance.primary_node, iv_names)
8205

    
8206
    # Step: remove old storage
8207
    if not self.early_release:
8208
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8209
      self._RemoveOldStorage(self.target_node, iv_names)
8210

    
8211

    
8212
class LURepairNodeStorage(NoHooksLU):
8213
  """Repairs the volume group on a node.
8214

8215
  """
8216
  _OP_PARAMS = [
8217
    _PNodeName,
8218
    ("storage_type", _NoDefault, _CheckStorageType),
8219
    ("name", _NoDefault, _TNonEmptyString),
8220
    ("ignore_consistency", False, _TBool),
8221
    ]
8222
  REQ_BGL = False
8223

    
8224
  def CheckArguments(self):
8225
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8226

    
8227
    storage_type = self.op.storage_type
8228

    
8229
    if (constants.SO_FIX_CONSISTENCY not in
8230
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8231
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8232
                                 " repaired" % storage_type,
8233
                                 errors.ECODE_INVAL)
8234

    
8235
  def ExpandNames(self):
8236
    self.needed_locks = {
8237
      locking.LEVEL_NODE: [self.op.node_name],
8238
      }
8239

    
8240
  def _CheckFaultyDisks(self, instance, node_name):
8241
    """Ensure faulty disks abort the opcode or at least warn."""
8242
    try:
8243
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8244
                                  node_name, True):
8245
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8246
                                   " node '%s'" % (instance.name, node_name),
8247
                                   errors.ECODE_STATE)
8248
    except errors.OpPrereqError, err:
8249
      if self.op.ignore_consistency:
8250
        self.proc.LogWarning(str(err.args[0]))
8251
      else:
8252
        raise
8253

    
8254
  def CheckPrereq(self):
8255
    """Check prerequisites.
8256

8257
    """
8258
    # Check whether any instance on this node has faulty disks
8259
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8260
      if not inst.admin_up:
8261
        continue
8262
      check_nodes = set(inst.all_nodes)
8263
      check_nodes.discard(self.op.node_name)
8264
      for inst_node_name in check_nodes:
8265
        self._CheckFaultyDisks(inst, inst_node_name)
8266

    
8267
  def Exec(self, feedback_fn):
8268
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8269
                (self.op.name, self.op.node_name))
8270

    
8271
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8272
    result = self.rpc.call_storage_execute(self.op.node_name,
8273
                                           self.op.storage_type, st_args,
8274
                                           self.op.name,
8275
                                           constants.SO_FIX_CONSISTENCY)
8276
    result.Raise("Failed to repair storage unit '%s' on %s" %
8277
                 (self.op.name, self.op.node_name))
8278

    
8279

    
8280
class LUNodeEvacuationStrategy(NoHooksLU):
8281
  """Computes the node evacuation strategy.
8282

8283
  """
8284
  _OP_PARAMS = [
8285
    ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8286
    ("remote_node", None, _TMaybeString),
8287
    ("iallocator", None, _TMaybeString),
8288
    ]
8289
  REQ_BGL = False
8290

    
8291
  def CheckArguments(self):
8292
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8293

    
8294
  def ExpandNames(self):
8295
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8296
    self.needed_locks = locks = {}
8297
    if self.op.remote_node is None:
8298
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8299
    else:
8300
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8301
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8302

    
8303
  def Exec(self, feedback_fn):
8304
    if self.op.remote_node is not None:
8305
      instances = []
8306
      for node in self.op.nodes:
8307
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8308
      result = []
8309
      for i in instances:
8310
        if i.primary_node == self.op.remote_node:
8311
          raise errors.OpPrereqError("Node %s is the primary node of"
8312
                                     " instance %s, cannot use it as"
8313
                                     " secondary" %
8314
                                     (self.op.remote_node, i.name),
8315
                                     errors.ECODE_INVAL)
8316
        result.append([i.name, self.op.remote_node])
8317
    else:
8318
      ial = IAllocator(self.cfg, self.rpc,
8319
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8320
                       evac_nodes=self.op.nodes)
8321
      ial.Run(self.op.iallocator, validate=True)
8322
      if not ial.success:
8323
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8324
                                 errors.ECODE_NORES)
8325
      result = ial.result
8326
    return result
8327

    
8328

    
8329
class LUGrowDisk(LogicalUnit):
8330
  """Grow a disk of an instance.
8331

8332
  """
8333
  HPATH = "disk-grow"
8334
  HTYPE = constants.HTYPE_INSTANCE
8335
  _OP_PARAMS = [
8336
    _PInstanceName,
8337
    ("disk", _NoDefault, _TInt),
8338
    ("amount", _NoDefault, _TInt),
8339
    ("wait_for_sync", True, _TBool),
8340
    ]
8341
  REQ_BGL = False
8342

    
8343
  def ExpandNames(self):
8344
    self._ExpandAndLockInstance()
8345
    self.needed_locks[locking.LEVEL_NODE] = []
8346
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8347

    
8348
  def DeclareLocks(self, level):
8349
    if level == locking.LEVEL_NODE:
8350
      self._LockInstancesNodes()
8351

    
8352
  def BuildHooksEnv(self):
8353
    """Build hooks env.
8354

8355
    This runs on the master, the primary and all the secondaries.
8356

8357
    """
8358
    env = {
8359
      "DISK": self.op.disk,
8360
      "AMOUNT": self.op.amount,
8361
      }
8362
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8363
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8364
    return env, nl, nl
8365

    
8366
  def CheckPrereq(self):
8367
    """Check prerequisites.
8368

8369
    This checks that the instance is in the cluster.
8370

8371
    """
8372
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8373
    assert instance is not None, \
8374
      "Cannot retrieve locked instance %s" % self.op.instance_name
8375
    nodenames = list(instance.all_nodes)
8376
    for node in nodenames:
8377
      _CheckNodeOnline(self, node)
8378

    
8379
    self.instance = instance
8380

    
8381
    if instance.disk_template not in constants.DTS_GROWABLE:
8382
      raise errors.OpPrereqError("Instance's disk layout does not support"
8383
                                 " growing.", errors.ECODE_INVAL)
8384

    
8385
    self.disk = instance.FindDisk(self.op.disk)
8386

    
8387
    if instance.disk_template != constants.DT_FILE:
8388
      # TODO: check the free disk space for file, when that feature will be
8389
      # supported
8390
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8391

    
8392
  def Exec(self, feedback_fn):
8393
    """Execute disk grow.
8394

8395
    """
8396
    instance = self.instance
8397
    disk = self.disk
8398

    
8399
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8400
    if not disks_ok:
8401
      raise errors.OpExecError("Cannot activate block device to grow")
8402

    
8403
    for node in instance.all_nodes:
8404
      self.cfg.SetDiskID(disk, node)
8405
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8406
      result.Raise("Grow request failed to node %s" % node)
8407

    
8408
      # TODO: Rewrite code to work properly
8409
      # DRBD goes into sync mode for a short amount of time after executing the
8410
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8411
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8412
      # time is a work-around.
8413
      time.sleep(5)
8414

    
8415
    disk.RecordGrow(self.op.amount)
8416
    self.cfg.Update(instance, feedback_fn)
8417
    if self.op.wait_for_sync:
8418
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8419
      if disk_abort:
8420
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8421
                             " status.\nPlease check the instance.")
8422
      if not instance.admin_up:
8423
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8424
    elif not instance.admin_up:
8425
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8426
                           " not supposed to be running because no wait for"
8427
                           " sync mode was requested.")
8428

    
8429

    
8430
class LUQueryInstanceData(NoHooksLU):
8431
  """Query runtime instance data.
8432

8433
  """
8434
  _OP_PARAMS = [
8435
    ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8436
    ("static", False, _TBool),
8437
    ]
8438
  REQ_BGL = False
8439

    
8440
  def ExpandNames(self):
8441
    self.needed_locks = {}
8442
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8443

    
8444
    if self.op.instances:
8445
      self.wanted_names = []
8446
      for name in self.op.instances:
8447
        full_name = _ExpandInstanceName(self.cfg, name)
8448
        self.wanted_names.append(full_name)
8449
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8450
    else:
8451
      self.wanted_names = None
8452
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8453

    
8454
    self.needed_locks[locking.LEVEL_NODE] = []
8455
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8456

    
8457
  def DeclareLocks(self, level):
8458
    if level == locking.LEVEL_NODE:
8459
      self._LockInstancesNodes()
8460

    
8461
  def CheckPrereq(self):
8462
    """Check prerequisites.
8463

8464
    This only checks the optional instance list against the existing names.
8465

8466
    """
8467
    if self.wanted_names is None:
8468
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8469

    
8470
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8471
                             in self.wanted_names]
8472

    
8473
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8474
    """Returns the status of a block device
8475

8476
    """
8477
    if self.op.static or not node:
8478
      return None
8479

    
8480
    self.cfg.SetDiskID(dev, node)
8481

    
8482
    result = self.rpc.call_blockdev_find(node, dev)
8483
    if result.offline:
8484
      return None
8485

    
8486
    result.Raise("Can't compute disk status for %s" % instance_name)
8487

    
8488
    status = result.payload
8489
    if status is None:
8490
      return None
8491

    
8492
    return (status.dev_path, status.major, status.minor,
8493
            status.sync_percent, status.estimated_time,
8494
            status.is_degraded, status.ldisk_status)
8495

    
8496
  def _ComputeDiskStatus(self, instance, snode, dev):
8497
    """Compute block device status.
8498

8499
    """
8500
    if dev.dev_type in constants.LDS_DRBD:
8501
      # we change the snode then (otherwise we use the one passed in)
8502
      if dev.logical_id[0] == instance.primary_node:
8503
        snode = dev.logical_id[1]
8504
      else:
8505
        snode = dev.logical_id[0]
8506

    
8507
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8508
                                              instance.name, dev)
8509
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8510

    
8511
    if dev.children:
8512
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8513
                      for child in dev.children]
8514
    else:
8515
      dev_children = []
8516

    
8517
    data = {
8518
      "iv_name": dev.iv_name,
8519
      "dev_type": dev.dev_type,
8520
      "logical_id": dev.logical_id,
8521
      "physical_id": dev.physical_id,
8522
      "pstatus": dev_pstatus,
8523
      "sstatus": dev_sstatus,
8524
      "children": dev_children,
8525
      "mode": dev.mode,
8526
      "size": dev.size,
8527
      }
8528

    
8529
    return data
8530

    
8531
  def Exec(self, feedback_fn):
8532
    """Gather and return data"""
8533
    result = {}
8534

    
8535
    cluster = self.cfg.GetClusterInfo()
8536

    
8537
    for instance in self.wanted_instances:
8538
      if not self.op.static:
8539
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8540
                                                  instance.name,
8541
                                                  instance.hypervisor)
8542
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8543
        remote_info = remote_info.payload
8544
        if remote_info and "state" in remote_info:
8545
          remote_state = "up"
8546
        else:
8547
          remote_state = "down"
8548
      else:
8549
        remote_state = None
8550
      if instance.admin_up:
8551
        config_state = "up"
8552
      else:
8553
        config_state = "down"
8554

    
8555
      disks = [self._ComputeDiskStatus(instance, None, device)
8556
               for device in instance.disks]
8557

    
8558
      idict = {
8559
        "name": instance.name,
8560
        "config_state": config_state,
8561
        "run_state": remote_state,
8562
        "pnode": instance.primary_node,
8563
        "snodes": instance.secondary_nodes,
8564
        "os": instance.os,
8565
        # this happens to be the same format used for hooks
8566
        "nics": _NICListToTuple(self, instance.nics),
8567
        "disk_template": instance.disk_template,
8568
        "disks": disks,
8569
        "hypervisor": instance.hypervisor,
8570
        "network_port": instance.network_port,
8571
        "hv_instance": instance.hvparams,
8572
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8573
        "be_instance": instance.beparams,
8574
        "be_actual": cluster.FillBE(instance),
8575
        "os_instance": instance.osparams,
8576
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8577
        "serial_no": instance.serial_no,
8578
        "mtime": instance.mtime,
8579
        "ctime": instance.ctime,
8580
        "uuid": instance.uuid,
8581
        }
8582

    
8583
      result[instance.name] = idict
8584

    
8585
    return result
8586

    
8587

    
8588
class LUSetInstanceParams(LogicalUnit):
8589
  """Modifies an instances's parameters.
8590

8591
  """
8592
  HPATH = "instance-modify"
8593
  HTYPE = constants.HTYPE_INSTANCE
8594
  _OP_PARAMS = [
8595
    _PInstanceName,
8596
    ("nics", _EmptyList, _TList),
8597
    ("disks", _EmptyList, _TList),
8598
    ("beparams", _EmptyDict, _TDict),
8599
    ("hvparams", _EmptyDict, _TDict),
8600
    ("disk_template", None, _TMaybeString),
8601
    ("remote_node", None, _TMaybeString),
8602
    ("os_name", None, _TMaybeString),
8603
    ("force_variant", False, _TBool),
8604
    ("osparams", None, _TOr(_TDict, _TNone)),
8605
    _PForce,
8606
    ]
8607
  REQ_BGL = False
8608

    
8609
  def CheckArguments(self):
8610
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8611
            self.op.hvparams or self.op.beparams or self.op.os_name):
8612
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8613

    
8614
    if self.op.hvparams:
8615
      _CheckGlobalHvParams(self.op.hvparams)
8616

    
8617
    # Disk validation
8618
    disk_addremove = 0
8619
    for disk_op, disk_dict in self.op.disks:
8620
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8621
      if disk_op == constants.DDM_REMOVE:
8622
        disk_addremove += 1
8623
        continue
8624
      elif disk_op == constants.DDM_ADD:
8625
        disk_addremove += 1
8626
      else:
8627
        if not isinstance(disk_op, int):
8628
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8629
        if not isinstance(disk_dict, dict):
8630
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8631
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8632

    
8633
      if disk_op == constants.DDM_ADD:
8634
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8635
        if mode not in constants.DISK_ACCESS_SET:
8636
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8637
                                     errors.ECODE_INVAL)
8638
        size = disk_dict.get('size', None)
8639
        if size is None:
8640
          raise errors.OpPrereqError("Required disk parameter size missing",
8641
                                     errors.ECODE_INVAL)
8642
        try:
8643
          size = int(size)
8644
        except (TypeError, ValueError), err:
8645
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8646
                                     str(err), errors.ECODE_INVAL)
8647
        disk_dict['size'] = size
8648
      else:
8649
        # modification of disk
8650
        if 'size' in disk_dict:
8651
          raise errors.OpPrereqError("Disk size change not possible, use"
8652
                                     " grow-disk", errors.ECODE_INVAL)
8653

    
8654
    if disk_addremove > 1:
8655
      raise errors.OpPrereqError("Only one disk add or remove operation"
8656
                                 " supported at a time", errors.ECODE_INVAL)
8657

    
8658
    if self.op.disks and self.op.disk_template is not None:
8659
      raise errors.OpPrereqError("Disk template conversion and other disk"
8660
                                 " changes not supported at the same time",
8661
                                 errors.ECODE_INVAL)
8662

    
8663
    if self.op.disk_template:
8664
      _CheckDiskTemplate(self.op.disk_template)
8665
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8666
          self.op.remote_node is None):
8667
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8668
                                   " one requires specifying a secondary node",
8669
                                   errors.ECODE_INVAL)
8670

    
8671
    # NIC validation
8672
    nic_addremove = 0
8673
    for nic_op, nic_dict in self.op.nics:
8674
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8675
      if nic_op == constants.DDM_REMOVE:
8676
        nic_addremove += 1
8677
        continue
8678
      elif nic_op == constants.DDM_ADD:
8679
        nic_addremove += 1
8680
      else:
8681
        if not isinstance(nic_op, int):
8682
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8683
        if not isinstance(nic_dict, dict):
8684
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8685
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8686

    
8687
      # nic_dict should be a dict
8688
      nic_ip = nic_dict.get('ip', None)
8689
      if nic_ip is not None:
8690
        if nic_ip.lower() == constants.VALUE_NONE:
8691
          nic_dict['ip'] = None
8692
        else:
8693
          if not netutils.IsValidIP4(nic_ip):
8694
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8695
                                       errors.ECODE_INVAL)
8696

    
8697
      nic_bridge = nic_dict.get('bridge', None)
8698
      nic_link = nic_dict.get('link', None)
8699
      if nic_bridge and nic_link:
8700
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8701
                                   " at the same time", errors.ECODE_INVAL)
8702
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8703
        nic_dict['bridge'] = None
8704
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8705
        nic_dict['link'] = None
8706

    
8707
      if nic_op == constants.DDM_ADD:
8708
        nic_mac = nic_dict.get('mac', None)
8709
        if nic_mac is None:
8710
          nic_dict['mac'] = constants.VALUE_AUTO
8711

    
8712
      if 'mac' in nic_dict:
8713
        nic_mac = nic_dict['mac']
8714
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8715
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8716

    
8717
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8718
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8719
                                     " modifying an existing nic",
8720
                                     errors.ECODE_INVAL)
8721

    
8722
    if nic_addremove > 1:
8723
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8724
                                 " supported at a time", errors.ECODE_INVAL)
8725

    
8726
  def ExpandNames(self):
8727
    self._ExpandAndLockInstance()
8728
    self.needed_locks[locking.LEVEL_NODE] = []
8729
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8730

    
8731
  def DeclareLocks(self, level):
8732
    if level == locking.LEVEL_NODE:
8733
      self._LockInstancesNodes()
8734
      if self.op.disk_template and self.op.remote_node:
8735
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8736
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8737

    
8738
  def BuildHooksEnv(self):
8739
    """Build hooks env.
8740

8741
    This runs on the master, primary and secondaries.
8742

8743
    """
8744
    args = dict()
8745
    if constants.BE_MEMORY in self.be_new:
8746
      args['memory'] = self.be_new[constants.BE_MEMORY]
8747
    if constants.BE_VCPUS in self.be_new:
8748
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8749
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8750
    # information at all.
8751
    if self.op.nics:
8752
      args['nics'] = []
8753
      nic_override = dict(self.op.nics)
8754
      for idx, nic in enumerate(self.instance.nics):
8755
        if idx in nic_override:
8756
          this_nic_override = nic_override[idx]
8757
        else:
8758
          this_nic_override = {}
8759
        if 'ip' in this_nic_override:
8760
          ip = this_nic_override['ip']
8761
        else:
8762
          ip = nic.ip
8763
        if 'mac' in this_nic_override:
8764
          mac = this_nic_override['mac']
8765
        else:
8766
          mac = nic.mac
8767
        if idx in self.nic_pnew:
8768
          nicparams = self.nic_pnew[idx]
8769
        else:
8770
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8771
        mode = nicparams[constants.NIC_MODE]
8772
        link = nicparams[constants.NIC_LINK]
8773
        args['nics'].append((ip, mac, mode, link))
8774
      if constants.DDM_ADD in nic_override:
8775
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8776
        mac = nic_override[constants.DDM_ADD]['mac']
8777
        nicparams = self.nic_pnew[constants.DDM_ADD]
8778
        mode = nicparams[constants.NIC_MODE]
8779
        link = nicparams[constants.NIC_LINK]
8780
        args['nics'].append((ip, mac, mode, link))
8781
      elif constants.DDM_REMOVE in nic_override:
8782
        del args['nics'][-1]
8783

    
8784
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8785
    if self.op.disk_template:
8786
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8787
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8788
    return env, nl, nl
8789

    
8790
  def CheckPrereq(self):
8791
    """Check prerequisites.
8792

8793
    This only checks the instance list against the existing names.
8794

8795
    """
8796
    # checking the new params on the primary/secondary nodes
8797

    
8798
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8799
    cluster = self.cluster = self.cfg.GetClusterInfo()
8800
    assert self.instance is not None, \
8801
      "Cannot retrieve locked instance %s" % self.op.instance_name
8802
    pnode = instance.primary_node
8803
    nodelist = list(instance.all_nodes)
8804

    
8805
    # OS change
8806
    if self.op.os_name and not self.op.force:
8807
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8808
                      self.op.force_variant)
8809
      instance_os = self.op.os_name
8810
    else:
8811
      instance_os = instance.os
8812

    
8813
    if self.op.disk_template:
8814
      if instance.disk_template == self.op.disk_template:
8815
        raise errors.OpPrereqError("Instance already has disk template %s" %
8816
                                   instance.disk_template, errors.ECODE_INVAL)
8817

    
8818
      if (instance.disk_template,
8819
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8820
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8821
                                   " %s to %s" % (instance.disk_template,
8822
                                                  self.op.disk_template),
8823
                                   errors.ECODE_INVAL)
8824
      _CheckInstanceDown(self, instance, "cannot change disk template")
8825
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8826
        if self.op.remote_node == pnode:
8827
          raise errors.OpPrereqError("Given new secondary node %s is the same"
8828
                                     " as the primary node of the instance" %
8829
                                     self.op.remote_node, errors.ECODE_STATE)
8830
        _CheckNodeOnline(self, self.op.remote_node)
8831
        _CheckNodeNotDrained(self, self.op.remote_node)
8832
        disks = [{"size": d.size} for d in instance.disks]
8833
        required = _ComputeDiskSize(self.op.disk_template, disks)
8834
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8835

    
8836
    # hvparams processing
8837
    if self.op.hvparams:
8838
      hv_type = instance.hypervisor
8839
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8840
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8841
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8842

    
8843
      # local check
8844
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8845
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8846
      self.hv_new = hv_new # the new actual values
8847
      self.hv_inst = i_hvdict # the new dict (without defaults)
8848
    else:
8849
      self.hv_new = self.hv_inst = {}
8850

    
8851
    # beparams processing
8852
    if self.op.beparams:
8853
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8854
                                   use_none=True)
8855
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8856
      be_new = cluster.SimpleFillBE(i_bedict)
8857
      self.be_new = be_new # the new actual values
8858
      self.be_inst = i_bedict # the new dict (without defaults)
8859
    else:
8860
      self.be_new = self.be_inst = {}
8861

    
8862
    # osparams processing
8863
    if self.op.osparams:
8864
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8865
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8866
      self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8867
      self.os_inst = i_osdict # the new dict (without defaults)
8868
    else:
8869
      self.os_new = self.os_inst = {}
8870

    
8871
    self.warn = []
8872

    
8873
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8874
      mem_check_list = [pnode]
8875
      if be_new[constants.BE_AUTO_BALANCE]:
8876
        # either we changed auto_balance to yes or it was from before
8877
        mem_check_list.extend(instance.secondary_nodes)
8878
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8879
                                                  instance.hypervisor)
8880
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8881
                                         instance.hypervisor)
8882
      pninfo = nodeinfo[pnode]
8883
      msg = pninfo.fail_msg
8884
      if msg:
8885
        # Assume the primary node is unreachable and go ahead
8886
        self.warn.append("Can't get info from primary node %s: %s" %
8887
                         (pnode,  msg))
8888
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8889
        self.warn.append("Node data from primary node %s doesn't contain"
8890
                         " free memory information" % pnode)
8891
      elif instance_info.fail_msg:
8892
        self.warn.append("Can't get instance runtime information: %s" %
8893
                        instance_info.fail_msg)
8894
      else:
8895
        if instance_info.payload:
8896
          current_mem = int(instance_info.payload['memory'])
8897
        else:
8898
          # Assume instance not running
8899
          # (there is a slight race condition here, but it's not very probable,
8900
          # and we have no other way to check)
8901
          current_mem = 0
8902
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8903
                    pninfo.payload['memory_free'])
8904
        if miss_mem > 0:
8905
          raise errors.OpPrereqError("This change will prevent the instance"
8906
                                     " from starting, due to %d MB of memory"
8907
                                     " missing on its primary node" % miss_mem,
8908
                                     errors.ECODE_NORES)
8909

    
8910
      if be_new[constants.BE_AUTO_BALANCE]:
8911
        for node, nres in nodeinfo.items():
8912
          if node not in instance.secondary_nodes:
8913
            continue
8914
          msg = nres.fail_msg
8915
          if msg:
8916
            self.warn.append("Can't get info from secondary node %s: %s" %
8917
                             (node, msg))
8918
          elif not isinstance(nres.payload.get('memory_free', None), int):
8919
            self.warn.append("Secondary node %s didn't return free"
8920
                             " memory information" % node)
8921
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8922
            self.warn.append("Not enough memory to failover instance to"
8923
                             " secondary node %s" % node)
8924

    
8925
    # NIC processing
8926
    self.nic_pnew = {}
8927
    self.nic_pinst = {}
8928
    for nic_op, nic_dict in self.op.nics:
8929
      if nic_op == constants.DDM_REMOVE:
8930
        if not instance.nics:
8931
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8932
                                     errors.ECODE_INVAL)
8933
        continue
8934
      if nic_op != constants.DDM_ADD:
8935
        # an existing nic
8936
        if not instance.nics:
8937
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8938
                                     " no NICs" % nic_op,
8939
                                     errors.ECODE_INVAL)
8940
        if nic_op < 0 or nic_op >= len(instance.nics):
8941
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8942
                                     " are 0 to %d" %
8943
                                     (nic_op, len(instance.nics) - 1),
8944
                                     errors.ECODE_INVAL)
8945
        old_nic_params = instance.nics[nic_op].nicparams
8946
        old_nic_ip = instance.nics[nic_op].ip
8947
      else:
8948
        old_nic_params = {}
8949
        old_nic_ip = None
8950

    
8951
      update_params_dict = dict([(key, nic_dict[key])
8952
                                 for key in constants.NICS_PARAMETERS
8953
                                 if key in nic_dict])
8954

    
8955
      if 'bridge' in nic_dict:
8956
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8957

    
8958
      new_nic_params = _GetUpdatedParams(old_nic_params,
8959
                                         update_params_dict)
8960
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8961
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8962
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8963
      self.nic_pinst[nic_op] = new_nic_params
8964
      self.nic_pnew[nic_op] = new_filled_nic_params
8965
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8966

    
8967
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8968
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8969
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8970
        if msg:
8971
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8972
          if self.op.force:
8973
            self.warn.append(msg)
8974
          else:
8975
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8976
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8977
        if 'ip' in nic_dict:
8978
          nic_ip = nic_dict['ip']
8979
        else:
8980
          nic_ip = old_nic_ip
8981
        if nic_ip is None:
8982
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8983
                                     ' on a routed nic', errors.ECODE_INVAL)
8984
      if 'mac' in nic_dict:
8985
        nic_mac = nic_dict['mac']
8986
        if nic_mac is None:
8987
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8988
                                     errors.ECODE_INVAL)
8989
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8990
          # otherwise generate the mac
8991
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8992
        else:
8993
          # or validate/reserve the current one
8994
          try:
8995
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8996
          except errors.ReservationError:
8997
            raise errors.OpPrereqError("MAC address %s already in use"
8998
                                       " in cluster" % nic_mac,
8999
                                       errors.ECODE_NOTUNIQUE)
9000

    
9001
    # DISK processing
9002
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9003
      raise errors.OpPrereqError("Disk operations not supported for"
9004
                                 " diskless instances",
9005
                                 errors.ECODE_INVAL)
9006
    for disk_op, _ in self.op.disks:
9007
      if disk_op == constants.DDM_REMOVE:
9008
        if len(instance.disks) == 1:
9009
          raise errors.OpPrereqError("Cannot remove the last disk of"
9010
                                     " an instance", errors.ECODE_INVAL)
9011
        _CheckInstanceDown(self, instance, "cannot remove disks")
9012

    
9013
      if (disk_op == constants.DDM_ADD and
9014
          len(instance.nics) >= constants.MAX_DISKS):
9015
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9016
                                   " add more" % constants.MAX_DISKS,
9017
                                   errors.ECODE_STATE)
9018
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9019
        # an existing disk
9020
        if disk_op < 0 or disk_op >= len(instance.disks):
9021
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9022
                                     " are 0 to %d" %
9023
                                     (disk_op, len(instance.disks)),
9024
                                     errors.ECODE_INVAL)
9025

    
9026
    return
9027

    
9028
  def _ConvertPlainToDrbd(self, feedback_fn):
9029
    """Converts an instance from plain to drbd.
9030

9031
    """
9032
    feedback_fn("Converting template to drbd")
9033
    instance = self.instance
9034
    pnode = instance.primary_node
9035
    snode = self.op.remote_node
9036

    
9037
    # create a fake disk info for _GenerateDiskTemplate
9038
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9039
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9040
                                      instance.name, pnode, [snode],
9041
                                      disk_info, None, None, 0)
9042
    info = _GetInstanceInfoText(instance)
9043
    feedback_fn("Creating aditional volumes...")
9044
    # first, create the missing data and meta devices
9045
    for disk in new_disks:
9046
      # unfortunately this is... not too nice
9047
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9048
                            info, True)
9049
      for child in disk.children:
9050
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9051
    # at this stage, all new LVs have been created, we can rename the
9052
    # old ones
9053
    feedback_fn("Renaming original volumes...")
9054
    rename_list = [(o, n.children[0].logical_id)
9055
                   for (o, n) in zip(instance.disks, new_disks)]
9056
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9057
    result.Raise("Failed to rename original LVs")
9058

    
9059
    feedback_fn("Initializing DRBD devices...")
9060
    # all child devices are in place, we can now create the DRBD devices
9061
    for disk in new_disks:
9062
      for node in [pnode, snode]:
9063
        f_create = node == pnode
9064
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9065

    
9066
    # at this point, the instance has been modified
9067
    instance.disk_template = constants.DT_DRBD8
9068
    instance.disks = new_disks
9069
    self.cfg.Update(instance, feedback_fn)
9070

    
9071
    # disks are created, waiting for sync
9072
    disk_abort = not _WaitForSync(self, instance)
9073
    if disk_abort:
9074
      raise errors.OpExecError("There are some degraded disks for"
9075
                               " this instance, please cleanup manually")
9076

    
9077
  def _ConvertDrbdToPlain(self, feedback_fn):
9078
    """Converts an instance from drbd to plain.
9079

9080
    """
9081
    instance = self.instance
9082
    assert len(instance.secondary_nodes) == 1
9083
    pnode = instance.primary_node
9084
    snode = instance.secondary_nodes[0]
9085
    feedback_fn("Converting template to plain")
9086

    
9087
    old_disks = instance.disks
9088
    new_disks = [d.children[0] for d in old_disks]
9089

    
9090
    # copy over size and mode
9091
    for parent, child in zip(old_disks, new_disks):
9092
      child.size = parent.size
9093
      child.mode = parent.mode
9094

    
9095
    # update instance structure
9096
    instance.disks = new_disks
9097
    instance.disk_template = constants.DT_PLAIN
9098
    self.cfg.Update(instance, feedback_fn)
9099

    
9100
    feedback_fn("Removing volumes on the secondary node...")
9101
    for disk in old_disks:
9102
      self.cfg.SetDiskID(disk, snode)
9103
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9104
      if msg:
9105
        self.LogWarning("Could not remove block device %s on node %s,"
9106
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9107

    
9108
    feedback_fn("Removing unneeded volumes on the primary node...")
9109
    for idx, disk in enumerate(old_disks):
9110
      meta = disk.children[1]
9111
      self.cfg.SetDiskID(meta, pnode)
9112
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9113
      if msg:
9114
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9115
                        " continuing anyway: %s", idx, pnode, msg)
9116

    
9117

    
9118
  def Exec(self, feedback_fn):
9119
    """Modifies an instance.
9120

9121
    All parameters take effect only at the next restart of the instance.
9122

9123
    """
9124
    # Process here the warnings from CheckPrereq, as we don't have a
9125
    # feedback_fn there.
9126
    for warn in self.warn:
9127
      feedback_fn("WARNING: %s" % warn)
9128

    
9129
    result = []
9130
    instance = self.instance
9131
    # disk changes
9132
    for disk_op, disk_dict in self.op.disks:
9133
      if disk_op == constants.DDM_REMOVE:
9134
        # remove the last disk
9135
        device = instance.disks.pop()
9136
        device_idx = len(instance.disks)
9137
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9138
          self.cfg.SetDiskID(disk, node)
9139
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9140
          if msg:
9141
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9142
                            " continuing anyway", device_idx, node, msg)
9143
        result.append(("disk/%d" % device_idx, "remove"))
9144
      elif disk_op == constants.DDM_ADD:
9145
        # add a new disk
9146
        if instance.disk_template == constants.DT_FILE:
9147
          file_driver, file_path = instance.disks[0].logical_id
9148
          file_path = os.path.dirname(file_path)
9149
        else:
9150
          file_driver = file_path = None
9151
        disk_idx_base = len(instance.disks)
9152
        new_disk = _GenerateDiskTemplate(self,
9153
                                         instance.disk_template,
9154
                                         instance.name, instance.primary_node,
9155
                                         instance.secondary_nodes,
9156
                                         [disk_dict],
9157
                                         file_path,
9158
                                         file_driver,
9159
                                         disk_idx_base)[0]
9160
        instance.disks.append(new_disk)
9161
        info = _GetInstanceInfoText(instance)
9162

    
9163
        logging.info("Creating volume %s for instance %s",
9164
                     new_disk.iv_name, instance.name)
9165
        # Note: this needs to be kept in sync with _CreateDisks
9166
        #HARDCODE
9167
        for node in instance.all_nodes:
9168
          f_create = node == instance.primary_node
9169
          try:
9170
            _CreateBlockDev(self, node, instance, new_disk,
9171
                            f_create, info, f_create)
9172
          except errors.OpExecError, err:
9173
            self.LogWarning("Failed to create volume %s (%s) on"
9174
                            " node %s: %s",
9175
                            new_disk.iv_name, new_disk, node, err)
9176
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9177
                       (new_disk.size, new_disk.mode)))
9178
      else:
9179
        # change a given disk
9180
        instance.disks[disk_op].mode = disk_dict['mode']
9181
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9182

    
9183
    if self.op.disk_template:
9184
      r_shut = _ShutdownInstanceDisks(self, instance)
9185
      if not r_shut:
9186
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9187
                                 " proceed with disk template conversion")
9188
      mode = (instance.disk_template, self.op.disk_template)
9189
      try:
9190
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9191
      except:
9192
        self.cfg.ReleaseDRBDMinors(instance.name)
9193
        raise
9194
      result.append(("disk_template", self.op.disk_template))
9195

    
9196
    # NIC changes
9197
    for nic_op, nic_dict in self.op.nics:
9198
      if nic_op == constants.DDM_REMOVE:
9199
        # remove the last nic
9200
        del instance.nics[-1]
9201
        result.append(("nic.%d" % len(instance.nics), "remove"))
9202
      elif nic_op == constants.DDM_ADD:
9203
        # mac and bridge should be set, by now
9204
        mac = nic_dict['mac']
9205
        ip = nic_dict.get('ip', None)
9206
        nicparams = self.nic_pinst[constants.DDM_ADD]
9207
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9208
        instance.nics.append(new_nic)
9209
        result.append(("nic.%d" % (len(instance.nics) - 1),
9210
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9211
                       (new_nic.mac, new_nic.ip,
9212
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9213
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9214
                       )))
9215
      else:
9216
        for key in 'mac', 'ip':
9217
          if key in nic_dict:
9218
            setattr(instance.nics[nic_op], key, nic_dict[key])
9219
        if nic_op in self.nic_pinst:
9220
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9221
        for key, val in nic_dict.iteritems():
9222
          result.append(("nic.%s/%d" % (key, nic_op), val))
9223

    
9224
    # hvparams changes
9225
    if self.op.hvparams:
9226
      instance.hvparams = self.hv_inst
9227
      for key, val in self.op.hvparams.iteritems():
9228
        result.append(("hv/%s" % key, val))
9229

    
9230
    # beparams changes
9231
    if self.op.beparams:
9232
      instance.beparams = self.be_inst
9233
      for key, val in self.op.beparams.iteritems():
9234
        result.append(("be/%s" % key, val))
9235

    
9236
    # OS change
9237
    if self.op.os_name:
9238
      instance.os = self.op.os_name
9239

    
9240
    # osparams changes
9241
    if self.op.osparams:
9242
      instance.osparams = self.os_inst
9243
      for key, val in self.op.osparams.iteritems():
9244
        result.append(("os/%s" % key, val))
9245

    
9246
    self.cfg.Update(instance, feedback_fn)
9247

    
9248
    return result
9249

    
9250
  _DISK_CONVERSIONS = {
9251
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9252
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9253
    }
9254

    
9255

    
9256
class LUQueryExports(NoHooksLU):
9257
  """Query the exports list
9258

9259
  """
9260
  _OP_PARAMS = [
9261
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9262
    ("use_locking", False, _TBool),
9263
    ]
9264
  REQ_BGL = False
9265

    
9266
  def ExpandNames(self):
9267
    self.needed_locks = {}
9268
    self.share_locks[locking.LEVEL_NODE] = 1
9269
    if not self.op.nodes:
9270
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9271
    else:
9272
      self.needed_locks[locking.LEVEL_NODE] = \
9273
        _GetWantedNodes(self, self.op.nodes)
9274

    
9275
  def Exec(self, feedback_fn):
9276
    """Compute the list of all the exported system images.
9277

9278
    @rtype: dict
9279
    @return: a dictionary with the structure node->(export-list)
9280
        where export-list is a list of the instances exported on
9281
        that node.
9282

9283
    """
9284
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9285
    rpcresult = self.rpc.call_export_list(self.nodes)
9286
    result = {}
9287
    for node in rpcresult:
9288
      if rpcresult[node].fail_msg:
9289
        result[node] = False
9290
      else:
9291
        result[node] = rpcresult[node].payload
9292

    
9293
    return result
9294

    
9295

    
9296
class LUPrepareExport(NoHooksLU):
9297
  """Prepares an instance for an export and returns useful information.
9298

9299
  """
9300
  _OP_PARAMS = [
9301
    _PInstanceName,
9302
    ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9303
    ]
9304
  REQ_BGL = False
9305

    
9306
  def ExpandNames(self):
9307
    self._ExpandAndLockInstance()
9308

    
9309
  def CheckPrereq(self):
9310
    """Check prerequisites.
9311

9312
    """
9313
    instance_name = self.op.instance_name
9314

    
9315
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9316
    assert self.instance is not None, \
9317
          "Cannot retrieve locked instance %s" % self.op.instance_name
9318
    _CheckNodeOnline(self, self.instance.primary_node)
9319

    
9320
    self._cds = _GetClusterDomainSecret()
9321

    
9322
  def Exec(self, feedback_fn):
9323
    """Prepares an instance for an export.
9324

9325
    """
9326
    instance = self.instance
9327

    
9328
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9329
      salt = utils.GenerateSecret(8)
9330

    
9331
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9332
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9333
                                              constants.RIE_CERT_VALIDITY)
9334
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9335

    
9336
      (name, cert_pem) = result.payload
9337

    
9338
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9339
                                             cert_pem)
9340

    
9341
      return {
9342
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9343
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9344
                          salt),
9345
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9346
        }
9347

    
9348
    return None
9349

    
9350

    
9351
class LUExportInstance(LogicalUnit):
9352
  """Export an instance to an image in the cluster.
9353

9354
  """
9355
  HPATH = "instance-export"
9356
  HTYPE = constants.HTYPE_INSTANCE
9357
  _OP_PARAMS = [
9358
    _PInstanceName,
9359
    ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9360
    ("shutdown", True, _TBool),
9361
    _PShutdownTimeout,
9362
    ("remove_instance", False, _TBool),
9363
    ("ignore_remove_failures", False, _TBool),
9364
    ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9365
    ("x509_key_name", None, _TOr(_TList, _TNone)),
9366
    ("destination_x509_ca", None, _TMaybeString),
9367
    ]
9368
  REQ_BGL = False
9369

    
9370
  def CheckArguments(self):
9371
    """Check the arguments.
9372

9373
    """
9374
    self.x509_key_name = self.op.x509_key_name
9375
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9376

    
9377
    if self.op.remove_instance and not self.op.shutdown:
9378
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9379
                                 " down before")
9380

    
9381
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9382
      if not self.x509_key_name:
9383
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9384
                                   errors.ECODE_INVAL)
9385

    
9386
      if not self.dest_x509_ca_pem:
9387
        raise errors.OpPrereqError("Missing destination X509 CA",
9388
                                   errors.ECODE_INVAL)
9389

    
9390
  def ExpandNames(self):
9391
    self._ExpandAndLockInstance()
9392

    
9393
    # Lock all nodes for local exports
9394
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9395
      # FIXME: lock only instance primary and destination node
9396
      #
9397
      # Sad but true, for now we have do lock all nodes, as we don't know where
9398
      # the previous export might be, and in this LU we search for it and
9399
      # remove it from its current node. In the future we could fix this by:
9400
      #  - making a tasklet to search (share-lock all), then create the
9401
      #    new one, then one to remove, after
9402
      #  - removing the removal operation altogether
9403
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9404

    
9405
  def DeclareLocks(self, level):
9406
    """Last minute lock declaration."""
9407
    # All nodes are locked anyway, so nothing to do here.
9408

    
9409
  def BuildHooksEnv(self):
9410
    """Build hooks env.
9411

9412
    This will run on the master, primary node and target node.
9413

9414
    """
9415
    env = {
9416
      "EXPORT_MODE": self.op.mode,
9417
      "EXPORT_NODE": self.op.target_node,
9418
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9419
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9420
      # TODO: Generic function for boolean env variables
9421
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9422
      }
9423

    
9424
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9425

    
9426
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9427

    
9428
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9429
      nl.append(self.op.target_node)
9430

    
9431
    return env, nl, nl
9432

    
9433
  def CheckPrereq(self):
9434
    """Check prerequisites.
9435

9436
    This checks that the instance and node names are valid.
9437

9438
    """
9439
    instance_name = self.op.instance_name
9440

    
9441
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9442
    assert self.instance is not None, \
9443
          "Cannot retrieve locked instance %s" % self.op.instance_name
9444
    _CheckNodeOnline(self, self.instance.primary_node)
9445

    
9446
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9447
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9448
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9449
      assert self.dst_node is not None
9450

    
9451
      _CheckNodeOnline(self, self.dst_node.name)
9452
      _CheckNodeNotDrained(self, self.dst_node.name)
9453

    
9454
      self._cds = None
9455
      self.dest_disk_info = None
9456
      self.dest_x509_ca = None
9457

    
9458
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9459
      self.dst_node = None
9460

    
9461
      if len(self.op.target_node) != len(self.instance.disks):
9462
        raise errors.OpPrereqError(("Received destination information for %s"
9463
                                    " disks, but instance %s has %s disks") %
9464
                                   (len(self.op.target_node), instance_name,
9465
                                    len(self.instance.disks)),
9466
                                   errors.ECODE_INVAL)
9467

    
9468
      cds = _GetClusterDomainSecret()
9469

    
9470
      # Check X509 key name
9471
      try:
9472
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9473
      except (TypeError, ValueError), err:
9474
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9475

    
9476
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9477
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9478
                                   errors.ECODE_INVAL)
9479

    
9480
      # Load and verify CA
9481
      try:
9482
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9483
      except OpenSSL.crypto.Error, err:
9484
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9485
                                   (err, ), errors.ECODE_INVAL)
9486

    
9487
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9488
      if errcode is not None:
9489
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9490
                                   (msg, ), errors.ECODE_INVAL)
9491

    
9492
      self.dest_x509_ca = cert
9493

    
9494
      # Verify target information
9495
      disk_info = []
9496
      for idx, disk_data in enumerate(self.op.target_node):
9497
        try:
9498
          (host, port, magic) = \
9499
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9500
        except errors.GenericError, err:
9501
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9502
                                     (idx, err), errors.ECODE_INVAL)
9503

    
9504
        disk_info.append((host, port, magic))
9505

    
9506
      assert len(disk_info) == len(self.op.target_node)
9507
      self.dest_disk_info = disk_info
9508

    
9509
    else:
9510
      raise errors.ProgrammerError("Unhandled export mode %r" %
9511
                                   self.op.mode)
9512

    
9513
    # instance disk type verification
9514
    # TODO: Implement export support for file-based disks
9515
    for disk in self.instance.disks:
9516
      if disk.dev_type == constants.LD_FILE:
9517
        raise errors.OpPrereqError("Export not supported for instances with"
9518
                                   " file-based disks", errors.ECODE_INVAL)
9519

    
9520
  def _CleanupExports(self, feedback_fn):
9521
    """Removes exports of current instance from all other nodes.
9522

9523
    If an instance in a cluster with nodes A..D was exported to node C, its
9524
    exports will be removed from the nodes A, B and D.
9525

9526
    """
9527
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9528

    
9529
    nodelist = self.cfg.GetNodeList()
9530
    nodelist.remove(self.dst_node.name)
9531

    
9532
    # on one-node clusters nodelist will be empty after the removal
9533
    # if we proceed the backup would be removed because OpQueryExports
9534
    # substitutes an empty list with the full cluster node list.
9535
    iname = self.instance.name
9536
    if nodelist:
9537
      feedback_fn("Removing old exports for instance %s" % iname)
9538
      exportlist = self.rpc.call_export_list(nodelist)
9539
      for node in exportlist:
9540
        if exportlist[node].fail_msg:
9541
          continue
9542
        if iname in exportlist[node].payload:
9543
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9544
          if msg:
9545
            self.LogWarning("Could not remove older export for instance %s"
9546
                            " on node %s: %s", iname, node, msg)
9547

    
9548
  def Exec(self, feedback_fn):
9549
    """Export an instance to an image in the cluster.
9550

9551
    """
9552
    assert self.op.mode in constants.EXPORT_MODES
9553

    
9554
    instance = self.instance
9555
    src_node = instance.primary_node
9556

    
9557
    if self.op.shutdown:
9558
      # shutdown the instance, but not the disks
9559
      feedback_fn("Shutting down instance %s" % instance.name)
9560
      result = self.rpc.call_instance_shutdown(src_node, instance,
9561
                                               self.op.shutdown_timeout)
9562
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9563
      result.Raise("Could not shutdown instance %s on"
9564
                   " node %s" % (instance.name, src_node))
9565

    
9566
    # set the disks ID correctly since call_instance_start needs the
9567
    # correct drbd minor to create the symlinks
9568
    for disk in instance.disks:
9569
      self.cfg.SetDiskID(disk, src_node)
9570

    
9571
    activate_disks = (not instance.admin_up)
9572

    
9573
    if activate_disks:
9574
      # Activate the instance disks if we'exporting a stopped instance
9575
      feedback_fn("Activating disks for %s" % instance.name)
9576
      _StartInstanceDisks(self, instance, None)
9577

    
9578
    try:
9579
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9580
                                                     instance)
9581

    
9582
      helper.CreateSnapshots()
9583
      try:
9584
        if (self.op.shutdown and instance.admin_up and
9585
            not self.op.remove_instance):
9586
          assert not activate_disks
9587
          feedback_fn("Starting instance %s" % instance.name)
9588
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9589
          msg = result.fail_msg
9590
          if msg:
9591
            feedback_fn("Failed to start instance: %s" % msg)
9592
            _ShutdownInstanceDisks(self, instance)
9593
            raise errors.OpExecError("Could not start instance: %s" % msg)
9594

    
9595
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9596
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9597
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9598
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9599
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9600

    
9601
          (key_name, _, _) = self.x509_key_name
9602

    
9603
          dest_ca_pem = \
9604
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9605
                                            self.dest_x509_ca)
9606

    
9607
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9608
                                                     key_name, dest_ca_pem,
9609
                                                     timeouts)
9610
      finally:
9611
        helper.Cleanup()
9612

    
9613
      # Check for backwards compatibility
9614
      assert len(dresults) == len(instance.disks)
9615
      assert compat.all(isinstance(i, bool) for i in dresults), \
9616
             "Not all results are boolean: %r" % dresults
9617

    
9618
    finally:
9619
      if activate_disks:
9620
        feedback_fn("Deactivating disks for %s" % instance.name)
9621
        _ShutdownInstanceDisks(self, instance)
9622

    
9623
    if not (compat.all(dresults) and fin_resu):
9624
      failures = []
9625
      if not fin_resu:
9626
        failures.append("export finalization")
9627
      if not compat.all(dresults):
9628
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9629
                               if not dsk)
9630
        failures.append("disk export: disk(s) %s" % fdsk)
9631

    
9632
      raise errors.OpExecError("Export failed, errors in %s" %
9633
                               utils.CommaJoin(failures))
9634

    
9635
    # At this point, the export was successful, we can cleanup/finish
9636

    
9637
    # Remove instance if requested
9638
    if self.op.remove_instance:
9639
      feedback_fn("Removing instance %s" % instance.name)
9640
      _RemoveInstance(self, feedback_fn, instance,
9641
                      self.op.ignore_remove_failures)
9642

    
9643
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9644
      self._CleanupExports(feedback_fn)
9645

    
9646
    return fin_resu, dresults
9647

    
9648

    
9649
class LURemoveExport(NoHooksLU):
9650
  """Remove exports related to the named instance.
9651

9652
  """
9653
  _OP_PARAMS = [
9654
    _PInstanceName,
9655
    ]
9656
  REQ_BGL = False
9657

    
9658
  def ExpandNames(self):
9659
    self.needed_locks = {}
9660
    # We need all nodes to be locked in order for RemoveExport to work, but we
9661
    # don't need to lock the instance itself, as nothing will happen to it (and
9662
    # we can remove exports also for a removed instance)
9663
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9664

    
9665
  def Exec(self, feedback_fn):
9666
    """Remove any export.
9667

9668
    """
9669
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9670
    # If the instance was not found we'll try with the name that was passed in.
9671
    # This will only work if it was an FQDN, though.
9672
    fqdn_warn = False
9673
    if not instance_name:
9674
      fqdn_warn = True
9675
      instance_name = self.op.instance_name
9676

    
9677
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9678
    exportlist = self.rpc.call_export_list(locked_nodes)
9679
    found = False
9680
    for node in exportlist:
9681
      msg = exportlist[node].fail_msg
9682
      if msg:
9683
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9684
        continue
9685
      if instance_name in exportlist[node].payload:
9686
        found = True
9687
        result = self.rpc.call_export_remove(node, instance_name)
9688
        msg = result.fail_msg
9689
        if msg:
9690
          logging.error("Could not remove export for instance %s"
9691
                        " on node %s: %s", instance_name, node, msg)
9692

    
9693
    if fqdn_warn and not found:
9694
      feedback_fn("Export not found. If trying to remove an export belonging"
9695
                  " to a deleted instance please use its Fully Qualified"
9696
                  " Domain Name.")
9697

    
9698

    
9699
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9700
  """Generic tags LU.
9701

9702
  This is an abstract class which is the parent of all the other tags LUs.
9703

9704
  """
9705

    
9706
  def ExpandNames(self):
9707
    self.needed_locks = {}
9708
    if self.op.kind == constants.TAG_NODE:
9709
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9710
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9711
    elif self.op.kind == constants.TAG_INSTANCE:
9712
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9713
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9714

    
9715
  def CheckPrereq(self):
9716
    """Check prerequisites.
9717

9718
    """
9719
    if self.op.kind == constants.TAG_CLUSTER:
9720
      self.target = self.cfg.GetClusterInfo()
9721
    elif self.op.kind == constants.TAG_NODE:
9722
      self.target = self.cfg.GetNodeInfo(self.op.name)
9723
    elif self.op.kind == constants.TAG_INSTANCE:
9724
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9725
    else:
9726
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9727
                                 str(self.op.kind), errors.ECODE_INVAL)
9728

    
9729

    
9730
class LUGetTags(TagsLU):
9731
  """Returns the tags of a given object.
9732

9733
  """
9734
  _OP_PARAMS = [
9735
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9736
    ("name", _NoDefault, _TNonEmptyString),
9737
    ]
9738
  REQ_BGL = False
9739

    
9740
  def Exec(self, feedback_fn):
9741
    """Returns the tag list.
9742

9743
    """
9744
    return list(self.target.GetTags())
9745

    
9746

    
9747
class LUSearchTags(NoHooksLU):
9748
  """Searches the tags for a given pattern.
9749

9750
  """
9751
  _OP_PARAMS = [
9752
    ("pattern", _NoDefault, _TNonEmptyString),
9753
    ]
9754
  REQ_BGL = False
9755

    
9756
  def ExpandNames(self):
9757
    self.needed_locks = {}
9758

    
9759
  def CheckPrereq(self):
9760
    """Check prerequisites.
9761

9762
    This checks the pattern passed for validity by compiling it.
9763

9764
    """
9765
    try:
9766
      self.re = re.compile(self.op.pattern)
9767
    except re.error, err:
9768
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9769
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9770

    
9771
  def Exec(self, feedback_fn):
9772
    """Returns the tag list.
9773

9774
    """
9775
    cfg = self.cfg
9776
    tgts = [("/cluster", cfg.GetClusterInfo())]
9777
    ilist = cfg.GetAllInstancesInfo().values()
9778
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9779
    nlist = cfg.GetAllNodesInfo().values()
9780
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9781
    results = []
9782
    for path, target in tgts:
9783
      for tag in target.GetTags():
9784
        if self.re.search(tag):
9785
          results.append((path, tag))
9786
    return results
9787

    
9788

    
9789
class LUAddTags(TagsLU):
9790
  """Sets a tag on a given object.
9791

9792
  """
9793
  _OP_PARAMS = [
9794
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9795
    ("name", _NoDefault, _TNonEmptyString),
9796
    ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9797
    ]
9798
  REQ_BGL = False
9799

    
9800
  def CheckPrereq(self):
9801
    """Check prerequisites.
9802

9803
    This checks the type and length of the tag name and value.
9804

9805
    """
9806
    TagsLU.CheckPrereq(self)
9807
    for tag in self.op.tags:
9808
      objects.TaggableObject.ValidateTag(tag)
9809

    
9810
  def Exec(self, feedback_fn):
9811
    """Sets the tag.
9812

9813
    """
9814
    try:
9815
      for tag in self.op.tags:
9816
        self.target.AddTag(tag)
9817
    except errors.TagError, err:
9818
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9819
    self.cfg.Update(self.target, feedback_fn)
9820

    
9821

    
9822
class LUDelTags(TagsLU):
9823
  """Delete a list of tags from a given object.
9824

9825
  """
9826
  _OP_PARAMS = [
9827
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9828
    ("name", _NoDefault, _TNonEmptyString),
9829
    ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9830
    ]
9831
  REQ_BGL = False
9832

    
9833
  def CheckPrereq(self):
9834
    """Check prerequisites.
9835

9836
    This checks that we have the given tag.
9837

9838
    """
9839
    TagsLU.CheckPrereq(self)
9840
    for tag in self.op.tags:
9841
      objects.TaggableObject.ValidateTag(tag)
9842
    del_tags = frozenset(self.op.tags)
9843
    cur_tags = self.target.GetTags()
9844
    if not del_tags <= cur_tags:
9845
      diff_tags = del_tags - cur_tags
9846
      diff_names = ["'%s'" % tag for tag in diff_tags]
9847
      diff_names.sort()
9848
      raise errors.OpPrereqError("Tag(s) %s not found" %
9849
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9850

    
9851
  def Exec(self, feedback_fn):
9852
    """Remove the tag from the object.
9853

9854
    """
9855
    for tag in self.op.tags:
9856
      self.target.RemoveTag(tag)
9857
    self.cfg.Update(self.target, feedback_fn)
9858

    
9859

    
9860
class LUTestDelay(NoHooksLU):
9861
  """Sleep for a specified amount of time.
9862

9863
  This LU sleeps on the master and/or nodes for a specified amount of
9864
  time.
9865

9866
  """
9867
  _OP_PARAMS = [
9868
    ("duration", _NoDefault, _TFloat),
9869
    ("on_master", True, _TBool),
9870
    ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9871
    ("repeat", 0, _TPositiveInt)
9872
    ]
9873
  REQ_BGL = False
9874

    
9875
  def ExpandNames(self):
9876
    """Expand names and set required locks.
9877

9878
    This expands the node list, if any.
9879

9880
    """
9881
    self.needed_locks = {}
9882
    if self.op.on_nodes:
9883
      # _GetWantedNodes can be used here, but is not always appropriate to use
9884
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9885
      # more information.
9886
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9887
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9888

    
9889
  def _TestDelay(self):
9890
    """Do the actual sleep.
9891

9892
    """
9893
    if self.op.on_master:
9894
      if not utils.TestDelay(self.op.duration):
9895
        raise errors.OpExecError("Error during master delay test")
9896
    if self.op.on_nodes:
9897
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9898
      for node, node_result in result.items():
9899
        node_result.Raise("Failure during rpc call to node %s" % node)
9900

    
9901
  def Exec(self, feedback_fn):
9902
    """Execute the test delay opcode, with the wanted repetitions.
9903

9904
    """
9905
    if self.op.repeat == 0:
9906
      self._TestDelay()
9907
    else:
9908
      top_value = self.op.repeat - 1
9909
      for i in range(self.op.repeat):
9910
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9911
        self._TestDelay()
9912

    
9913

    
9914
class LUTestJobqueue(NoHooksLU):
9915
  """Utility LU to test some aspects of the job queue.
9916

9917
  """
9918
  _OP_PARAMS = [
9919
    ("notify_waitlock", False, _TBool),
9920
    ("notify_exec", False, _TBool),
9921
    ("log_messages", _EmptyList, _TListOf(_TString)),
9922
    ("fail", False, _TBool),
9923
    ]
9924
  REQ_BGL = False
9925

    
9926
  # Must be lower than default timeout for WaitForJobChange to see whether it
9927
  # notices changed jobs
9928
  _CLIENT_CONNECT_TIMEOUT = 20.0
9929
  _CLIENT_CONFIRM_TIMEOUT = 60.0
9930

    
9931
  @classmethod
9932
  def _NotifyUsingSocket(cls, cb, errcls):
9933
    """Opens a Unix socket and waits for another program to connect.
9934

9935
    @type cb: callable
9936
    @param cb: Callback to send socket name to client
9937
    @type errcls: class
9938
    @param errcls: Exception class to use for errors
9939

9940
    """
9941
    # Using a temporary directory as there's no easy way to create temporary
9942
    # sockets without writing a custom loop around tempfile.mktemp and
9943
    # socket.bind
9944
    tmpdir = tempfile.mkdtemp()
9945
    try:
9946
      tmpsock = utils.PathJoin(tmpdir, "sock")
9947

    
9948
      logging.debug("Creating temporary socket at %s", tmpsock)
9949
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
9950
      try:
9951
        sock.bind(tmpsock)
9952
        sock.listen(1)
9953

    
9954
        # Send details to client
9955
        cb(tmpsock)
9956

    
9957
        # Wait for client to connect before continuing
9958
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
9959
        try:
9960
          (conn, _) = sock.accept()
9961
        except socket.error, err:
9962
          raise errcls("Client didn't connect in time (%s)" % err)
9963
      finally:
9964
        sock.close()
9965
    finally:
9966
      # Remove as soon as client is connected
9967
      shutil.rmtree(tmpdir)
9968

    
9969
    # Wait for client to close
9970
    try:
9971
      try:
9972
        # pylint: disable-msg=E1101
9973
        # Instance of '_socketobject' has no ... member
9974
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
9975
        conn.recv(1)
9976
      except socket.error, err:
9977
        raise errcls("Client failed to confirm notification (%s)" % err)
9978
    finally:
9979
      conn.close()
9980

    
9981
  def _SendNotification(self, test, arg, sockname):
9982
    """Sends a notification to the client.
9983

9984
    @type test: string
9985
    @param test: Test name
9986
    @param arg: Test argument (depends on test)
9987
    @type sockname: string
9988
    @param sockname: Socket path
9989

9990
    """
9991
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
9992

    
9993
  def _Notify(self, prereq, test, arg):
9994
    """Notifies the client of a test.
9995

9996
    @type prereq: bool
9997
    @param prereq: Whether this is a prereq-phase test
9998
    @type test: string
9999
    @param test: Test name
10000
    @param arg: Test argument (depends on test)
10001

10002
    """
10003
    if prereq:
10004
      errcls = errors.OpPrereqError
10005
    else:
10006
      errcls = errors.OpExecError
10007

    
10008
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10009
                                                  test, arg),
10010
                                   errcls)
10011

    
10012
  def CheckArguments(self):
10013
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10014
    self.expandnames_calls = 0
10015

    
10016
  def ExpandNames(self):
10017
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10018
    if checkargs_calls < 1:
10019
      raise errors.ProgrammerError("CheckArguments was not called")
10020

    
10021
    self.expandnames_calls += 1
10022

    
10023
    if self.op.notify_waitlock:
10024
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10025

    
10026
    self.LogInfo("Expanding names")
10027

    
10028
    # Get lock on master node (just to get a lock, not for a particular reason)
10029
    self.needed_locks = {
10030
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10031
      }
10032

    
10033
  def Exec(self, feedback_fn):
10034
    if self.expandnames_calls < 1:
10035
      raise errors.ProgrammerError("ExpandNames was not called")
10036

    
10037
    if self.op.notify_exec:
10038
      self._Notify(False, constants.JQT_EXEC, None)
10039

    
10040
    self.LogInfo("Executing")
10041

    
10042
    if self.op.log_messages:
10043
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10044
      for idx, msg in enumerate(self.op.log_messages):
10045
        self.LogInfo("Sending log message %s", idx + 1)
10046
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10047
        # Report how many test messages have been sent
10048
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10049

    
10050
    if self.op.fail:
10051
      raise errors.OpExecError("Opcode failure was requested")
10052

    
10053
    return True
10054

    
10055

    
10056
class IAllocator(object):
10057
  """IAllocator framework.
10058

10059
  An IAllocator instance has three sets of attributes:
10060
    - cfg that is needed to query the cluster
10061
    - input data (all members of the _KEYS class attribute are required)
10062
    - four buffer attributes (in|out_data|text), that represent the
10063
      input (to the external script) in text and data structure format,
10064
      and the output from it, again in two formats
10065
    - the result variables from the script (success, info, nodes) for
10066
      easy usage
10067

10068
  """
10069
  # pylint: disable-msg=R0902
10070
  # lots of instance attributes
10071
  _ALLO_KEYS = [
10072
    "name", "mem_size", "disks", "disk_template",
10073
    "os", "tags", "nics", "vcpus", "hypervisor",
10074
    ]
10075
  _RELO_KEYS = [
10076
    "name", "relocate_from",
10077
    ]
10078
  _EVAC_KEYS = [
10079
    "evac_nodes",
10080
    ]
10081

    
10082
  def __init__(self, cfg, rpc, mode, **kwargs):
10083
    self.cfg = cfg
10084
    self.rpc = rpc
10085
    # init buffer variables
10086
    self.in_text = self.out_text = self.in_data = self.out_data = None
10087
    # init all input fields so that pylint is happy
10088
    self.mode = mode
10089
    self.mem_size = self.disks = self.disk_template = None
10090
    self.os = self.tags = self.nics = self.vcpus = None
10091
    self.hypervisor = None
10092
    self.relocate_from = None
10093
    self.name = None
10094
    self.evac_nodes = None
10095
    # computed fields
10096
    self.required_nodes = None
10097
    # init result fields
10098
    self.success = self.info = self.result = None
10099
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10100
      keyset = self._ALLO_KEYS
10101
      fn = self._AddNewInstance
10102
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10103
      keyset = self._RELO_KEYS
10104
      fn = self._AddRelocateInstance
10105
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10106
      keyset = self._EVAC_KEYS
10107
      fn = self._AddEvacuateNodes
10108
    else:
10109
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10110
                                   " IAllocator" % self.mode)
10111
    for key in kwargs:
10112
      if key not in keyset:
10113
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
10114
                                     " IAllocator" % key)
10115
      setattr(self, key, kwargs[key])
10116

    
10117
    for key in keyset:
10118
      if key not in kwargs:
10119
        raise errors.ProgrammerError("Missing input parameter '%s' to"
10120
                                     " IAllocator" % key)
10121
    self._BuildInputData(fn)
10122

    
10123
  def _ComputeClusterData(self):
10124
    """Compute the generic allocator input data.
10125

10126
    This is the data that is independent of the actual operation.
10127

10128
    """
10129
    cfg = self.cfg
10130
    cluster_info = cfg.GetClusterInfo()
10131
    # cluster data
10132
    data = {
10133
      "version": constants.IALLOCATOR_VERSION,
10134
      "cluster_name": cfg.GetClusterName(),
10135
      "cluster_tags": list(cluster_info.GetTags()),
10136
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10137
      # we don't have job IDs
10138
      }
10139
    iinfo = cfg.GetAllInstancesInfo().values()
10140
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10141

    
10142
    # node data
10143
    node_results = {}
10144
    node_list = cfg.GetNodeList()
10145

    
10146
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10147
      hypervisor_name = self.hypervisor
10148
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10149
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10150
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10151
      hypervisor_name = cluster_info.enabled_hypervisors[0]
10152

    
10153
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10154
                                        hypervisor_name)
10155
    node_iinfo = \
10156
      self.rpc.call_all_instances_info(node_list,
10157
                                       cluster_info.enabled_hypervisors)
10158
    for nname, nresult in node_data.items():
10159
      # first fill in static (config-based) values
10160
      ninfo = cfg.GetNodeInfo(nname)
10161
      pnr = {
10162
        "tags": list(ninfo.GetTags()),
10163
        "primary_ip": ninfo.primary_ip,
10164
        "secondary_ip": ninfo.secondary_ip,
10165
        "offline": ninfo.offline,
10166
        "drained": ninfo.drained,
10167
        "master_candidate": ninfo.master_candidate,
10168
        }
10169

    
10170
      if not (ninfo.offline or ninfo.drained):
10171
        nresult.Raise("Can't get data for node %s" % nname)
10172
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10173
                                nname)
10174
        remote_info = nresult.payload
10175

    
10176
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
10177
                     'vg_size', 'vg_free', 'cpu_total']:
10178
          if attr not in remote_info:
10179
            raise errors.OpExecError("Node '%s' didn't return attribute"
10180
                                     " '%s'" % (nname, attr))
10181
          if not isinstance(remote_info[attr], int):
10182
            raise errors.OpExecError("Node '%s' returned invalid value"
10183
                                     " for '%s': %s" %
10184
                                     (nname, attr, remote_info[attr]))
10185
        # compute memory used by primary instances
10186
        i_p_mem = i_p_up_mem = 0
10187
        for iinfo, beinfo in i_list:
10188
          if iinfo.primary_node == nname:
10189
            i_p_mem += beinfo[constants.BE_MEMORY]
10190
            if iinfo.name not in node_iinfo[nname].payload:
10191
              i_used_mem = 0
10192
            else:
10193
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10194
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10195
            remote_info['memory_free'] -= max(0, i_mem_diff)
10196

    
10197
            if iinfo.admin_up:
10198
              i_p_up_mem += beinfo[constants.BE_MEMORY]
10199

    
10200
        # compute memory used by instances
10201
        pnr_dyn = {
10202
          "total_memory": remote_info['memory_total'],
10203
          "reserved_memory": remote_info['memory_dom0'],
10204
          "free_memory": remote_info['memory_free'],
10205
          "total_disk": remote_info['vg_size'],
10206
          "free_disk": remote_info['vg_free'],
10207
          "total_cpus": remote_info['cpu_total'],
10208
          "i_pri_memory": i_p_mem,
10209
          "i_pri_up_memory": i_p_up_mem,
10210
          }
10211
        pnr.update(pnr_dyn)
10212

    
10213
      node_results[nname] = pnr
10214
    data["nodes"] = node_results
10215

    
10216
    # instance data
10217
    instance_data = {}
10218
    for iinfo, beinfo in i_list:
10219
      nic_data = []
10220
      for nic in iinfo.nics:
10221
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10222
        nic_dict = {"mac": nic.mac,
10223
                    "ip": nic.ip,
10224
                    "mode": filled_params[constants.NIC_MODE],
10225
                    "link": filled_params[constants.NIC_LINK],
10226
                   }
10227
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10228
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10229
        nic_data.append(nic_dict)
10230
      pir = {
10231
        "tags": list(iinfo.GetTags()),
10232
        "admin_up": iinfo.admin_up,
10233
        "vcpus": beinfo[constants.BE_VCPUS],
10234
        "memory": beinfo[constants.BE_MEMORY],
10235
        "os": iinfo.os,
10236
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10237
        "nics": nic_data,
10238
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10239
        "disk_template": iinfo.disk_template,
10240
        "hypervisor": iinfo.hypervisor,
10241
        }
10242
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10243
                                                 pir["disks"])
10244
      instance_data[iinfo.name] = pir
10245

    
10246
    data["instances"] = instance_data
10247

    
10248
    self.in_data = data
10249

    
10250
  def _AddNewInstance(self):
10251
    """Add new instance data to allocator structure.
10252

10253
    This in combination with _AllocatorGetClusterData will create the
10254
    correct structure needed as input for the allocator.
10255

10256
    The checks for the completeness of the opcode must have already been
10257
    done.
10258

10259
    """
10260
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10261

    
10262
    if self.disk_template in constants.DTS_NET_MIRROR:
10263
      self.required_nodes = 2
10264
    else:
10265
      self.required_nodes = 1
10266
    request = {
10267
      "name": self.name,
10268
      "disk_template": self.disk_template,
10269
      "tags": self.tags,
10270
      "os": self.os,
10271
      "vcpus": self.vcpus,
10272
      "memory": self.mem_size,
10273
      "disks": self.disks,
10274
      "disk_space_total": disk_space,
10275
      "nics": self.nics,
10276
      "required_nodes": self.required_nodes,
10277
      }
10278
    return request
10279

    
10280
  def _AddRelocateInstance(self):
10281
    """Add relocate instance data to allocator structure.
10282

10283
    This in combination with _IAllocatorGetClusterData will create the
10284
    correct structure needed as input for the allocator.
10285

10286
    The checks for the completeness of the opcode must have already been
10287
    done.
10288

10289
    """
10290
    instance = self.cfg.GetInstanceInfo(self.name)
10291
    if instance is None:
10292
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
10293
                                   " IAllocator" % self.name)
10294

    
10295
    if instance.disk_template not in constants.DTS_NET_MIRROR:
10296
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10297
                                 errors.ECODE_INVAL)
10298

    
10299
    if len(instance.secondary_nodes) != 1:
10300
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
10301
                                 errors.ECODE_STATE)
10302

    
10303
    self.required_nodes = 1
10304
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
10305
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10306

    
10307
    request = {
10308
      "name": self.name,
10309
      "disk_space_total": disk_space,
10310
      "required_nodes": self.required_nodes,
10311
      "relocate_from": self.relocate_from,
10312
      }
10313
    return request
10314

    
10315
  def _AddEvacuateNodes(self):
10316
    """Add evacuate nodes data to allocator structure.
10317

10318
    """
10319
    request = {
10320
      "evac_nodes": self.evac_nodes
10321
      }
10322
    return request
10323

    
10324
  def _BuildInputData(self, fn):
10325
    """Build input data structures.
10326

10327
    """
10328
    self._ComputeClusterData()
10329

    
10330
    request = fn()
10331
    request["type"] = self.mode
10332
    self.in_data["request"] = request
10333

    
10334
    self.in_text = serializer.Dump(self.in_data)
10335

    
10336
  def Run(self, name, validate=True, call_fn=None):
10337
    """Run an instance allocator and return the results.
10338

10339
    """
10340
    if call_fn is None:
10341
      call_fn = self.rpc.call_iallocator_runner
10342

    
10343
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10344
    result.Raise("Failure while running the iallocator script")
10345

    
10346
    self.out_text = result.payload
10347
    if validate:
10348
      self._ValidateResult()
10349

    
10350
  def _ValidateResult(self):
10351
    """Process the allocator results.
10352

10353
    This will process and if successful save the result in
10354
    self.out_data and the other parameters.
10355

10356
    """
10357
    try:
10358
      rdict = serializer.Load(self.out_text)
10359
    except Exception, err:
10360
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10361

    
10362
    if not isinstance(rdict, dict):
10363
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10364

    
10365
    # TODO: remove backwards compatiblity in later versions
10366
    if "nodes" in rdict and "result" not in rdict:
10367
      rdict["result"] = rdict["nodes"]
10368
      del rdict["nodes"]
10369

    
10370
    for key in "success", "info", "result":
10371
      if key not in rdict:
10372
        raise errors.OpExecError("Can't parse iallocator results:"
10373
                                 " missing key '%s'" % key)
10374
      setattr(self, key, rdict[key])
10375

    
10376
    if not isinstance(rdict["result"], list):
10377
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10378
                               " is not a list")
10379
    self.out_data = rdict
10380

    
10381

    
10382
class LUTestAllocator(NoHooksLU):
10383
  """Run allocator tests.
10384

10385
  This LU runs the allocator tests
10386

10387
  """
10388
  _OP_PARAMS = [
10389
    ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10390
    ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10391
    ("name", _NoDefault, _TNonEmptyString),
10392
    ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10393
      _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10394
               _TOr(_TNone, _TNonEmptyString))))),
10395
    ("disks", _NoDefault, _TOr(_TNone, _TList)),
10396
    ("hypervisor", None, _TMaybeString),
10397
    ("allocator", None, _TMaybeString),
10398
    ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10399
    ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10400
    ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10401
    ("os", None, _TMaybeString),
10402
    ("disk_template", None, _TMaybeString),
10403
    ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10404
    ]
10405

    
10406
  def CheckPrereq(self):
10407
    """Check prerequisites.
10408

10409
    This checks the opcode parameters depending on the director and mode test.
10410

10411
    """
10412
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10413
      for attr in ["mem_size", "disks", "disk_template",
10414
                   "os", "tags", "nics", "vcpus"]:
10415
        if not hasattr(self.op, attr):
10416
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10417
                                     attr, errors.ECODE_INVAL)
10418
      iname = self.cfg.ExpandInstanceName(self.op.name)
10419
      if iname is not None:
10420
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10421
                                   iname, errors.ECODE_EXISTS)
10422
      if not isinstance(self.op.nics, list):
10423
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10424
                                   errors.ECODE_INVAL)
10425
      if not isinstance(self.op.disks, list):
10426
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10427
                                   errors.ECODE_INVAL)
10428
      for row in self.op.disks:
10429
        if (not isinstance(row, dict) or
10430
            "size" not in row or
10431
            not isinstance(row["size"], int) or
10432
            "mode" not in row or
10433
            row["mode"] not in ['r', 'w']):
10434
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10435
                                     " parameter", errors.ECODE_INVAL)
10436
      if self.op.hypervisor is None:
10437
        self.op.hypervisor = self.cfg.GetHypervisorType()
10438
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10439
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10440
      self.op.name = fname
10441
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10442
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10443
      if not hasattr(self.op, "evac_nodes"):
10444
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10445
                                   " opcode input", errors.ECODE_INVAL)
10446
    else:
10447
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10448
                                 self.op.mode, errors.ECODE_INVAL)
10449

    
10450
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10451
      if self.op.allocator is None:
10452
        raise errors.OpPrereqError("Missing allocator name",
10453
                                   errors.ECODE_INVAL)
10454
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10455
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10456
                                 self.op.direction, errors.ECODE_INVAL)
10457

    
10458
  def Exec(self, feedback_fn):
10459
    """Run the allocator test.
10460

10461
    """
10462
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10463
      ial = IAllocator(self.cfg, self.rpc,
10464
                       mode=self.op.mode,
10465
                       name=self.op.name,
10466
                       mem_size=self.op.mem_size,
10467
                       disks=self.op.disks,
10468
                       disk_template=self.op.disk_template,
10469
                       os=self.op.os,
10470
                       tags=self.op.tags,
10471
                       nics=self.op.nics,
10472
                       vcpus=self.op.vcpus,
10473
                       hypervisor=self.op.hypervisor,
10474
                       )
10475
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10476
      ial = IAllocator(self.cfg, self.rpc,
10477
                       mode=self.op.mode,
10478
                       name=self.op.name,
10479
                       relocate_from=list(self.relocate_from),
10480
                       )
10481
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10482
      ial = IAllocator(self.cfg, self.rpc,
10483
                       mode=self.op.mode,
10484
                       evac_nodes=self.op.evac_nodes)
10485
    else:
10486
      raise errors.ProgrammerError("Uncatched mode %s in"
10487
                                   " LUTestAllocator.Exec", self.op.mode)
10488

    
10489
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10490
      result = ial.in_text
10491
    else:
10492
      ial.Run(self.op.allocator, validate=False)
10493
      result = ial.out_text
10494
    return result