Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ c950e9f2

History | View | Annotate | Download (366.5 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42

    
43
from ganeti import ssh
44
from ganeti import utils
45
from ganeti import errors
46
from ganeti import hypervisor
47
from ganeti import locking
48
from ganeti import constants
49
from ganeti import objects
50
from ganeti import serializer
51
from ganeti import ssconf
52
from ganeti import uidpool
53
from ganeti import compat
54
from ganeti import masterd
55
from ganeti import netutils
56

    
57
import ganeti.masterd.instance # pylint: disable-msg=W0611
58

    
59

    
60
# Modifiable default values; need to define these here before the
61
# actual LUs
62

    
63
def _EmptyList():
64
  """Returns an empty list.
65

66
  """
67
  return []
68

    
69

    
70
def _EmptyDict():
71
  """Returns an empty dict.
72

73
  """
74
  return {}
75

    
76

    
77
#: The without-default default value
78
_NoDefault = object()
79

    
80

    
81
#: The no-type (value to complex to check it in the type system)
82
_NoType = object()
83

    
84

    
85
# Some basic types
86
def _TNotNone(val):
87
  """Checks if the given value is not None.
88

89
  """
90
  return val is not None
91

    
92

    
93
def _TNone(val):
94
  """Checks if the given value is None.
95

96
  """
97
  return val is None
98

    
99

    
100
def _TBool(val):
101
  """Checks if the given value is a boolean.
102

103
  """
104
  return isinstance(val, bool)
105

    
106

    
107
def _TInt(val):
108
  """Checks if the given value is an integer.
109

110
  """
111
  return isinstance(val, int)
112

    
113

    
114
def _TFloat(val):
115
  """Checks if the given value is a float.
116

117
  """
118
  return isinstance(val, float)
119

    
120

    
121
def _TString(val):
122
  """Checks if the given value is a string.
123

124
  """
125
  return isinstance(val, basestring)
126

    
127

    
128
def _TTrue(val):
129
  """Checks if a given value evaluates to a boolean True value.
130

131
  """
132
  return bool(val)
133

    
134

    
135
def _TElemOf(target_list):
136
  """Builds a function that checks if a given value is a member of a list.
137

138
  """
139
  return lambda val: val in target_list
140

    
141

    
142
# Container types
143
def _TList(val):
144
  """Checks if the given value is a list.
145

146
  """
147
  return isinstance(val, list)
148

    
149

    
150
def _TDict(val):
151
  """Checks if the given value is a dictionary.
152

153
  """
154
  return isinstance(val, dict)
155

    
156

    
157
# Combinator types
158
def _TAnd(*args):
159
  """Combine multiple functions using an AND operation.
160

161
  """
162
  def fn(val):
163
    return compat.all(t(val) for t in args)
164
  return fn
165

    
166

    
167
def _TOr(*args):
168
  """Combine multiple functions using an AND operation.
169

170
  """
171
  def fn(val):
172
    return compat.any(t(val) for t in args)
173
  return fn
174

    
175

    
176
# Type aliases
177

    
178
#: a non-empty string
179
_TNonEmptyString = _TAnd(_TString, _TTrue)
180

    
181

    
182
#: a maybe non-empty string
183
_TMaybeString = _TOr(_TNonEmptyString, _TNone)
184

    
185

    
186
#: a maybe boolean (bool or none)
187
_TMaybeBool = _TOr(_TBool, _TNone)
188

    
189

    
190
#: a positive integer
191
_TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
192

    
193
#: a strictly positive integer
194
_TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
195

    
196

    
197
def _TListOf(my_type):
198
  """Checks if a given value is a list with all elements of the same type.
199

200
  """
201
  return _TAnd(_TList,
202
               lambda lst: compat.all(my_type(v) for v in lst))
203

    
204

    
205
def _TDictOf(key_type, val_type):
206
  """Checks a dict type for the type of its key/values.
207

208
  """
209
  return _TAnd(_TDict,
210
               lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
211
                                and compat.all(val_type(v)
212
                                               for v in my_dict.values())))
213

    
214

    
215
# Common opcode attributes
216

    
217
#: output fields for a query operation
218
_POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
219

    
220

    
221
#: the shutdown timeout
222
_PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
223
                     _TPositiveInt)
224

    
225
#: the force parameter
226
_PForce = ("force", False, _TBool)
227

    
228
#: a required instance name (for single-instance LUs)
229
_PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
230

    
231

    
232
#: a required node name (for single-node LUs)
233
_PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
234

    
235
#: the migration type (live/non-live)
236
_PMigrationMode = ("mode", None, _TOr(_TNone,
237
                                      _TElemOf(constants.HT_MIGRATION_MODES)))
238

    
239
#: the obsolete 'live' mode (boolean)
240
_PMigrationLive = ("live", None, _TMaybeBool)
241

    
242

    
243
# End types
244
class LogicalUnit(object):
245
  """Logical Unit base class.
246

247
  Subclasses must follow these rules:
248
    - implement ExpandNames
249
    - implement CheckPrereq (except when tasklets are used)
250
    - implement Exec (except when tasklets are used)
251
    - implement BuildHooksEnv
252
    - redefine HPATH and HTYPE
253
    - optionally redefine their run requirements:
254
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
255

256
  Note that all commands require root permissions.
257

258
  @ivar dry_run_result: the value (if any) that will be returned to the caller
259
      in dry-run mode (signalled by opcode dry_run parameter)
260
  @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
261
      they should get if not already defined, and types they must match
262

263
  """
264
  HPATH = None
265
  HTYPE = None
266
  _OP_PARAMS = []
267
  REQ_BGL = True
268

    
269
  def __init__(self, processor, op, context, rpc):
270
    """Constructor for LogicalUnit.
271

272
    This needs to be overridden in derived classes in order to check op
273
    validity.
274

275
    """
276
    self.proc = processor
277
    self.op = op
278
    self.cfg = context.cfg
279
    self.context = context
280
    self.rpc = rpc
281
    # Dicts used to declare locking needs to mcpu
282
    self.needed_locks = None
283
    self.acquired_locks = {}
284
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
285
    self.add_locks = {}
286
    self.remove_locks = {}
287
    # Used to force good behavior when calling helper functions
288
    self.recalculate_locks = {}
289
    self.__ssh = None
290
    # logging
291
    self.Log = processor.Log # pylint: disable-msg=C0103
292
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
293
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
294
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
295
    # support for dry-run
296
    self.dry_run_result = None
297
    # support for generic debug attribute
298
    if (not hasattr(self.op, "debug_level") or
299
        not isinstance(self.op.debug_level, int)):
300
      self.op.debug_level = 0
301

    
302
    # Tasklets
303
    self.tasklets = None
304

    
305
    # The new kind-of-type-system
306
    op_id = self.op.OP_ID
307
    for attr_name, aval, test in self._OP_PARAMS:
308
      if not hasattr(op, attr_name):
309
        if aval == _NoDefault:
310
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
311
                                     (op_id, attr_name), errors.ECODE_INVAL)
312
        else:
313
          if callable(aval):
314
            dval = aval()
315
          else:
316
            dval = aval
317
          setattr(self.op, attr_name, dval)
318
      attr_val = getattr(op, attr_name)
319
      if test == _NoType:
320
        # no tests here
321
        continue
322
      if not callable(test):
323
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
324
                                     " given type is not a proper type (%s)" %
325
                                     (op_id, attr_name, test))
326
      if not test(attr_val):
327
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
328
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
329
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
330
                                   (op_id, attr_name), errors.ECODE_INVAL)
331

    
332
    self.CheckArguments()
333

    
334
  def __GetSSH(self):
335
    """Returns the SshRunner object
336

337
    """
338
    if not self.__ssh:
339
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
340
    return self.__ssh
341

    
342
  ssh = property(fget=__GetSSH)
343

    
344
  def CheckArguments(self):
345
    """Check syntactic validity for the opcode arguments.
346

347
    This method is for doing a simple syntactic check and ensure
348
    validity of opcode parameters, without any cluster-related
349
    checks. While the same can be accomplished in ExpandNames and/or
350
    CheckPrereq, doing these separate is better because:
351

352
      - ExpandNames is left as as purely a lock-related function
353
      - CheckPrereq is run after we have acquired locks (and possible
354
        waited for them)
355

356
    The function is allowed to change the self.op attribute so that
357
    later methods can no longer worry about missing parameters.
358

359
    """
360
    pass
361

    
362
  def ExpandNames(self):
363
    """Expand names for this LU.
364

365
    This method is called before starting to execute the opcode, and it should
366
    update all the parameters of the opcode to their canonical form (e.g. a
367
    short node name must be fully expanded after this method has successfully
368
    completed). This way locking, hooks, logging, ecc. can work correctly.
369

370
    LUs which implement this method must also populate the self.needed_locks
371
    member, as a dict with lock levels as keys, and a list of needed lock names
372
    as values. Rules:
373

374
      - use an empty dict if you don't need any lock
375
      - if you don't need any lock at a particular level omit that level
376
      - don't put anything for the BGL level
377
      - if you want all locks at a level use locking.ALL_SET as a value
378

379
    If you need to share locks (rather than acquire them exclusively) at one
380
    level you can modify self.share_locks, setting a true value (usually 1) for
381
    that level. By default locks are not shared.
382

383
    This function can also define a list of tasklets, which then will be
384
    executed in order instead of the usual LU-level CheckPrereq and Exec
385
    functions, if those are not defined by the LU.
386

387
    Examples::
388

389
      # Acquire all nodes and one instance
390
      self.needed_locks = {
391
        locking.LEVEL_NODE: locking.ALL_SET,
392
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
393
      }
394
      # Acquire just two nodes
395
      self.needed_locks = {
396
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
397
      }
398
      # Acquire no locks
399
      self.needed_locks = {} # No, you can't leave it to the default value None
400

401
    """
402
    # The implementation of this method is mandatory only if the new LU is
403
    # concurrent, so that old LUs don't need to be changed all at the same
404
    # time.
405
    if self.REQ_BGL:
406
      self.needed_locks = {} # Exclusive LUs don't need locks.
407
    else:
408
      raise NotImplementedError
409

    
410
  def DeclareLocks(self, level):
411
    """Declare LU locking needs for a level
412

413
    While most LUs can just declare their locking needs at ExpandNames time,
414
    sometimes there's the need to calculate some locks after having acquired
415
    the ones before. This function is called just before acquiring locks at a
416
    particular level, but after acquiring the ones at lower levels, and permits
417
    such calculations. It can be used to modify self.needed_locks, and by
418
    default it does nothing.
419

420
    This function is only called if you have something already set in
421
    self.needed_locks for the level.
422

423
    @param level: Locking level which is going to be locked
424
    @type level: member of ganeti.locking.LEVELS
425

426
    """
427

    
428
  def CheckPrereq(self):
429
    """Check prerequisites for this LU.
430

431
    This method should check that the prerequisites for the execution
432
    of this LU are fulfilled. It can do internode communication, but
433
    it should be idempotent - no cluster or system changes are
434
    allowed.
435

436
    The method should raise errors.OpPrereqError in case something is
437
    not fulfilled. Its return value is ignored.
438

439
    This method should also update all the parameters of the opcode to
440
    their canonical form if it hasn't been done by ExpandNames before.
441

442
    """
443
    if self.tasklets is not None:
444
      for (idx, tl) in enumerate(self.tasklets):
445
        logging.debug("Checking prerequisites for tasklet %s/%s",
446
                      idx + 1, len(self.tasklets))
447
        tl.CheckPrereq()
448
    else:
449
      pass
450

    
451
  def Exec(self, feedback_fn):
452
    """Execute the LU.
453

454
    This method should implement the actual work. It should raise
455
    errors.OpExecError for failures that are somewhat dealt with in
456
    code, or expected.
457

458
    """
459
    if self.tasklets is not None:
460
      for (idx, tl) in enumerate(self.tasklets):
461
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
462
        tl.Exec(feedback_fn)
463
    else:
464
      raise NotImplementedError
465

    
466
  def BuildHooksEnv(self):
467
    """Build hooks environment for this LU.
468

469
    This method should return a three-node tuple consisting of: a dict
470
    containing the environment that will be used for running the
471
    specific hook for this LU, a list of node names on which the hook
472
    should run before the execution, and a list of node names on which
473
    the hook should run after the execution.
474

475
    The keys of the dict must not have 'GANETI_' prefixed as this will
476
    be handled in the hooks runner. Also note additional keys will be
477
    added by the hooks runner. If the LU doesn't define any
478
    environment, an empty dict (and not None) should be returned.
479

480
    No nodes should be returned as an empty list (and not None).
481

482
    Note that if the HPATH for a LU class is None, this function will
483
    not be called.
484

485
    """
486
    raise NotImplementedError
487

    
488
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
489
    """Notify the LU about the results of its hooks.
490

491
    This method is called every time a hooks phase is executed, and notifies
492
    the Logical Unit about the hooks' result. The LU can then use it to alter
493
    its result based on the hooks.  By default the method does nothing and the
494
    previous result is passed back unchanged but any LU can define it if it
495
    wants to use the local cluster hook-scripts somehow.
496

497
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
498
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
499
    @param hook_results: the results of the multi-node hooks rpc call
500
    @param feedback_fn: function used send feedback back to the caller
501
    @param lu_result: the previous Exec result this LU had, or None
502
        in the PRE phase
503
    @return: the new Exec result, based on the previous result
504
        and hook results
505

506
    """
507
    # API must be kept, thus we ignore the unused argument and could
508
    # be a function warnings
509
    # pylint: disable-msg=W0613,R0201
510
    return lu_result
511

    
512
  def _ExpandAndLockInstance(self):
513
    """Helper function to expand and lock an instance.
514

515
    Many LUs that work on an instance take its name in self.op.instance_name
516
    and need to expand it and then declare the expanded name for locking. This
517
    function does it, and then updates self.op.instance_name to the expanded
518
    name. It also initializes needed_locks as a dict, if this hasn't been done
519
    before.
520

521
    """
522
    if self.needed_locks is None:
523
      self.needed_locks = {}
524
    else:
525
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
526
        "_ExpandAndLockInstance called with instance-level locks set"
527
    self.op.instance_name = _ExpandInstanceName(self.cfg,
528
                                                self.op.instance_name)
529
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
530

    
531
  def _LockInstancesNodes(self, primary_only=False):
532
    """Helper function to declare instances' nodes for locking.
533

534
    This function should be called after locking one or more instances to lock
535
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
536
    with all primary or secondary nodes for instances already locked and
537
    present in self.needed_locks[locking.LEVEL_INSTANCE].
538

539
    It should be called from DeclareLocks, and for safety only works if
540
    self.recalculate_locks[locking.LEVEL_NODE] is set.
541

542
    In the future it may grow parameters to just lock some instance's nodes, or
543
    to just lock primaries or secondary nodes, if needed.
544

545
    If should be called in DeclareLocks in a way similar to::
546

547
      if level == locking.LEVEL_NODE:
548
        self._LockInstancesNodes()
549

550
    @type primary_only: boolean
551
    @param primary_only: only lock primary nodes of locked instances
552

553
    """
554
    assert locking.LEVEL_NODE in self.recalculate_locks, \
555
      "_LockInstancesNodes helper function called with no nodes to recalculate"
556

    
557
    # TODO: check if we're really been called with the instance locks held
558

    
559
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
560
    # future we might want to have different behaviors depending on the value
561
    # of self.recalculate_locks[locking.LEVEL_NODE]
562
    wanted_nodes = []
563
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
564
      instance = self.context.cfg.GetInstanceInfo(instance_name)
565
      wanted_nodes.append(instance.primary_node)
566
      if not primary_only:
567
        wanted_nodes.extend(instance.secondary_nodes)
568

    
569
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
570
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
571
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
572
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
573

    
574
    del self.recalculate_locks[locking.LEVEL_NODE]
575

    
576

    
577
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
578
  """Simple LU which runs no hooks.
579

580
  This LU is intended as a parent for other LogicalUnits which will
581
  run no hooks, in order to reduce duplicate code.
582

583
  """
584
  HPATH = None
585
  HTYPE = None
586

    
587
  def BuildHooksEnv(self):
588
    """Empty BuildHooksEnv for NoHooksLu.
589

590
    This just raises an error.
591

592
    """
593
    assert False, "BuildHooksEnv called for NoHooksLUs"
594

    
595

    
596
class Tasklet:
597
  """Tasklet base class.
598

599
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
600
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
601
  tasklets know nothing about locks.
602

603
  Subclasses must follow these rules:
604
    - Implement CheckPrereq
605
    - Implement Exec
606

607
  """
608
  def __init__(self, lu):
609
    self.lu = lu
610

    
611
    # Shortcuts
612
    self.cfg = lu.cfg
613
    self.rpc = lu.rpc
614

    
615
  def CheckPrereq(self):
616
    """Check prerequisites for this tasklets.
617

618
    This method should check whether the prerequisites for the execution of
619
    this tasklet are fulfilled. It can do internode communication, but it
620
    should be idempotent - no cluster or system changes are allowed.
621

622
    The method should raise errors.OpPrereqError in case something is not
623
    fulfilled. Its return value is ignored.
624

625
    This method should also update all parameters to their canonical form if it
626
    hasn't been done before.
627

628
    """
629
    pass
630

    
631
  def Exec(self, feedback_fn):
632
    """Execute the tasklet.
633

634
    This method should implement the actual work. It should raise
635
    errors.OpExecError for failures that are somewhat dealt with in code, or
636
    expected.
637

638
    """
639
    raise NotImplementedError
640

    
641

    
642
def _GetWantedNodes(lu, nodes):
643
  """Returns list of checked and expanded node names.
644

645
  @type lu: L{LogicalUnit}
646
  @param lu: the logical unit on whose behalf we execute
647
  @type nodes: list
648
  @param nodes: list of node names or None for all nodes
649
  @rtype: list
650
  @return: the list of nodes, sorted
651
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
652

653
  """
654
  if not nodes:
655
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
656
      " non-empty list of nodes whose name is to be expanded.")
657

    
658
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
659
  return utils.NiceSort(wanted)
660

    
661

    
662
def _GetWantedInstances(lu, instances):
663
  """Returns list of checked and expanded instance names.
664

665
  @type lu: L{LogicalUnit}
666
  @param lu: the logical unit on whose behalf we execute
667
  @type instances: list
668
  @param instances: list of instance names or None for all instances
669
  @rtype: list
670
  @return: the list of instances, sorted
671
  @raise errors.OpPrereqError: if the instances parameter is wrong type
672
  @raise errors.OpPrereqError: if any of the passed instances is not found
673

674
  """
675
  if instances:
676
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
677
  else:
678
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
679
  return wanted
680

    
681

    
682
def _GetUpdatedParams(old_params, update_dict,
683
                      use_default=True, use_none=False):
684
  """Return the new version of a parameter dictionary.
685

686
  @type old_params: dict
687
  @param old_params: old parameters
688
  @type update_dict: dict
689
  @param update_dict: dict containing new parameter values, or
690
      constants.VALUE_DEFAULT to reset the parameter to its default
691
      value
692
  @param use_default: boolean
693
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
694
      values as 'to be deleted' values
695
  @param use_none: boolean
696
  @type use_none: whether to recognise C{None} values as 'to be
697
      deleted' values
698
  @rtype: dict
699
  @return: the new parameter dictionary
700

701
  """
702
  params_copy = copy.deepcopy(old_params)
703
  for key, val in update_dict.iteritems():
704
    if ((use_default and val == constants.VALUE_DEFAULT) or
705
        (use_none and val is None)):
706
      try:
707
        del params_copy[key]
708
      except KeyError:
709
        pass
710
    else:
711
      params_copy[key] = val
712
  return params_copy
713

    
714

    
715
def _CheckOutputFields(static, dynamic, selected):
716
  """Checks whether all selected fields are valid.
717

718
  @type static: L{utils.FieldSet}
719
  @param static: static fields set
720
  @type dynamic: L{utils.FieldSet}
721
  @param dynamic: dynamic fields set
722

723
  """
724
  f = utils.FieldSet()
725
  f.Extend(static)
726
  f.Extend(dynamic)
727

    
728
  delta = f.NonMatching(selected)
729
  if delta:
730
    raise errors.OpPrereqError("Unknown output fields selected: %s"
731
                               % ",".join(delta), errors.ECODE_INVAL)
732

    
733

    
734
def _CheckGlobalHvParams(params):
735
  """Validates that given hypervisor params are not global ones.
736

737
  This will ensure that instances don't get customised versions of
738
  global params.
739

740
  """
741
  used_globals = constants.HVC_GLOBALS.intersection(params)
742
  if used_globals:
743
    msg = ("The following hypervisor parameters are global and cannot"
744
           " be customized at instance level, please modify them at"
745
           " cluster level: %s" % utils.CommaJoin(used_globals))
746
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
747

    
748

    
749
def _CheckNodeOnline(lu, node):
750
  """Ensure that a given node is online.
751

752
  @param lu: the LU on behalf of which we make the check
753
  @param node: the node to check
754
  @raise errors.OpPrereqError: if the node is offline
755

756
  """
757
  if lu.cfg.GetNodeInfo(node).offline:
758
    raise errors.OpPrereqError("Can't use offline node %s" % node,
759
                               errors.ECODE_INVAL)
760

    
761

    
762
def _CheckNodeNotDrained(lu, node):
763
  """Ensure that a given node is not drained.
764

765
  @param lu: the LU on behalf of which we make the check
766
  @param node: the node to check
767
  @raise errors.OpPrereqError: if the node is drained
768

769
  """
770
  if lu.cfg.GetNodeInfo(node).drained:
771
    raise errors.OpPrereqError("Can't use drained node %s" % node,
772
                               errors.ECODE_INVAL)
773

    
774

    
775
def _CheckNodeHasOS(lu, node, os_name, force_variant):
776
  """Ensure that a node supports a given OS.
777

778
  @param lu: the LU on behalf of which we make the check
779
  @param node: the node to check
780
  @param os_name: the OS to query about
781
  @param force_variant: whether to ignore variant errors
782
  @raise errors.OpPrereqError: if the node is not supporting the OS
783

784
  """
785
  result = lu.rpc.call_os_get(node, os_name)
786
  result.Raise("OS '%s' not in supported OS list for node %s" %
787
               (os_name, node),
788
               prereq=True, ecode=errors.ECODE_INVAL)
789
  if not force_variant:
790
    _CheckOSVariant(result.payload, os_name)
791

    
792

    
793
def _RequireFileStorage():
794
  """Checks that file storage is enabled.
795

796
  @raise errors.OpPrereqError: when file storage is disabled
797

798
  """
799
  if not constants.ENABLE_FILE_STORAGE:
800
    raise errors.OpPrereqError("File storage disabled at configure time",
801
                               errors.ECODE_INVAL)
802

    
803

    
804
def _CheckDiskTemplate(template):
805
  """Ensure a given disk template is valid.
806

807
  """
808
  if template not in constants.DISK_TEMPLATES:
809
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
810
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
811
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
812
  if template == constants.DT_FILE:
813
    _RequireFileStorage()
814
  return True
815

    
816

    
817
def _CheckStorageType(storage_type):
818
  """Ensure a given storage type is valid.
819

820
  """
821
  if storage_type not in constants.VALID_STORAGE_TYPES:
822
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
823
                               errors.ECODE_INVAL)
824
  if storage_type == constants.ST_FILE:
825
    _RequireFileStorage()
826
  return True
827

    
828

    
829
def _GetClusterDomainSecret():
830
  """Reads the cluster domain secret.
831

832
  """
833
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
834
                               strict=True)
835

    
836

    
837
def _CheckInstanceDown(lu, instance, reason):
838
  """Ensure that an instance is not running."""
839
  if instance.admin_up:
840
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
841
                               (instance.name, reason), errors.ECODE_STATE)
842

    
843
  pnode = instance.primary_node
844
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
845
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
846
              prereq=True, ecode=errors.ECODE_ENVIRON)
847

    
848
  if instance.name in ins_l.payload:
849
    raise errors.OpPrereqError("Instance %s is running, %s" %
850
                               (instance.name, reason), errors.ECODE_STATE)
851

    
852

    
853
def _ExpandItemName(fn, name, kind):
854
  """Expand an item name.
855

856
  @param fn: the function to use for expansion
857
  @param name: requested item name
858
  @param kind: text description ('Node' or 'Instance')
859
  @return: the resolved (full) name
860
  @raise errors.OpPrereqError: if the item is not found
861

862
  """
863
  full_name = fn(name)
864
  if full_name is None:
865
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
866
                               errors.ECODE_NOENT)
867
  return full_name
868

    
869

    
870
def _ExpandNodeName(cfg, name):
871
  """Wrapper over L{_ExpandItemName} for nodes."""
872
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
873

    
874

    
875
def _ExpandInstanceName(cfg, name):
876
  """Wrapper over L{_ExpandItemName} for instance."""
877
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
878

    
879

    
880
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
881
                          memory, vcpus, nics, disk_template, disks,
882
                          bep, hvp, hypervisor_name):
883
  """Builds instance related env variables for hooks
884

885
  This builds the hook environment from individual variables.
886

887
  @type name: string
888
  @param name: the name of the instance
889
  @type primary_node: string
890
  @param primary_node: the name of the instance's primary node
891
  @type secondary_nodes: list
892
  @param secondary_nodes: list of secondary nodes as strings
893
  @type os_type: string
894
  @param os_type: the name of the instance's OS
895
  @type status: boolean
896
  @param status: the should_run status of the instance
897
  @type memory: string
898
  @param memory: the memory size of the instance
899
  @type vcpus: string
900
  @param vcpus: the count of VCPUs the instance has
901
  @type nics: list
902
  @param nics: list of tuples (ip, mac, mode, link) representing
903
      the NICs the instance has
904
  @type disk_template: string
905
  @param disk_template: the disk template of the instance
906
  @type disks: list
907
  @param disks: the list of (size, mode) pairs
908
  @type bep: dict
909
  @param bep: the backend parameters for the instance
910
  @type hvp: dict
911
  @param hvp: the hypervisor parameters for the instance
912
  @type hypervisor_name: string
913
  @param hypervisor_name: the hypervisor for the instance
914
  @rtype: dict
915
  @return: the hook environment for this instance
916

917
  """
918
  if status:
919
    str_status = "up"
920
  else:
921
    str_status = "down"
922
  env = {
923
    "OP_TARGET": name,
924
    "INSTANCE_NAME": name,
925
    "INSTANCE_PRIMARY": primary_node,
926
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
927
    "INSTANCE_OS_TYPE": os_type,
928
    "INSTANCE_STATUS": str_status,
929
    "INSTANCE_MEMORY": memory,
930
    "INSTANCE_VCPUS": vcpus,
931
    "INSTANCE_DISK_TEMPLATE": disk_template,
932
    "INSTANCE_HYPERVISOR": hypervisor_name,
933
  }
934

    
935
  if nics:
936
    nic_count = len(nics)
937
    for idx, (ip, mac, mode, link) in enumerate(nics):
938
      if ip is None:
939
        ip = ""
940
      env["INSTANCE_NIC%d_IP" % idx] = ip
941
      env["INSTANCE_NIC%d_MAC" % idx] = mac
942
      env["INSTANCE_NIC%d_MODE" % idx] = mode
943
      env["INSTANCE_NIC%d_LINK" % idx] = link
944
      if mode == constants.NIC_MODE_BRIDGED:
945
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
946
  else:
947
    nic_count = 0
948

    
949
  env["INSTANCE_NIC_COUNT"] = nic_count
950

    
951
  if disks:
952
    disk_count = len(disks)
953
    for idx, (size, mode) in enumerate(disks):
954
      env["INSTANCE_DISK%d_SIZE" % idx] = size
955
      env["INSTANCE_DISK%d_MODE" % idx] = mode
956
  else:
957
    disk_count = 0
958

    
959
  env["INSTANCE_DISK_COUNT"] = disk_count
960

    
961
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
962
    for key, value in source.items():
963
      env["INSTANCE_%s_%s" % (kind, key)] = value
964

    
965
  return env
966

    
967

    
968
def _NICListToTuple(lu, nics):
969
  """Build a list of nic information tuples.
970

971
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
972
  value in LUQueryInstanceData.
973

974
  @type lu:  L{LogicalUnit}
975
  @param lu: the logical unit on whose behalf we execute
976
  @type nics: list of L{objects.NIC}
977
  @param nics: list of nics to convert to hooks tuples
978

979
  """
980
  hooks_nics = []
981
  cluster = lu.cfg.GetClusterInfo()
982
  for nic in nics:
983
    ip = nic.ip
984
    mac = nic.mac
985
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
986
    mode = filled_params[constants.NIC_MODE]
987
    link = filled_params[constants.NIC_LINK]
988
    hooks_nics.append((ip, mac, mode, link))
989
  return hooks_nics
990

    
991

    
992
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
993
  """Builds instance related env variables for hooks from an object.
994

995
  @type lu: L{LogicalUnit}
996
  @param lu: the logical unit on whose behalf we execute
997
  @type instance: L{objects.Instance}
998
  @param instance: the instance for which we should build the
999
      environment
1000
  @type override: dict
1001
  @param override: dictionary with key/values that will override
1002
      our values
1003
  @rtype: dict
1004
  @return: the hook environment dictionary
1005

1006
  """
1007
  cluster = lu.cfg.GetClusterInfo()
1008
  bep = cluster.FillBE(instance)
1009
  hvp = cluster.FillHV(instance)
1010
  args = {
1011
    'name': instance.name,
1012
    'primary_node': instance.primary_node,
1013
    'secondary_nodes': instance.secondary_nodes,
1014
    'os_type': instance.os,
1015
    'status': instance.admin_up,
1016
    'memory': bep[constants.BE_MEMORY],
1017
    'vcpus': bep[constants.BE_VCPUS],
1018
    'nics': _NICListToTuple(lu, instance.nics),
1019
    'disk_template': instance.disk_template,
1020
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1021
    'bep': bep,
1022
    'hvp': hvp,
1023
    'hypervisor_name': instance.hypervisor,
1024
  }
1025
  if override:
1026
    args.update(override)
1027
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1028

    
1029

    
1030
def _AdjustCandidatePool(lu, exceptions):
1031
  """Adjust the candidate pool after node operations.
1032

1033
  """
1034
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1035
  if mod_list:
1036
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1037
               utils.CommaJoin(node.name for node in mod_list))
1038
    for name in mod_list:
1039
      lu.context.ReaddNode(name)
1040
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1041
  if mc_now > mc_max:
1042
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1043
               (mc_now, mc_max))
1044

    
1045

    
1046
def _DecideSelfPromotion(lu, exceptions=None):
1047
  """Decide whether I should promote myself as a master candidate.
1048

1049
  """
1050
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1051
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1052
  # the new node will increase mc_max with one, so:
1053
  mc_should = min(mc_should + 1, cp_size)
1054
  return mc_now < mc_should
1055

    
1056

    
1057
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1058
  """Check that the brigdes needed by a list of nics exist.
1059

1060
  """
1061
  cluster = lu.cfg.GetClusterInfo()
1062
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1063
  brlist = [params[constants.NIC_LINK] for params in paramslist
1064
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1065
  if brlist:
1066
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1067
    result.Raise("Error checking bridges on destination node '%s'" %
1068
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1069

    
1070

    
1071
def _CheckInstanceBridgesExist(lu, instance, node=None):
1072
  """Check that the brigdes needed by an instance exist.
1073

1074
  """
1075
  if node is None:
1076
    node = instance.primary_node
1077
  _CheckNicsBridgesExist(lu, instance.nics, node)
1078

    
1079

    
1080
def _CheckOSVariant(os_obj, name):
1081
  """Check whether an OS name conforms to the os variants specification.
1082

1083
  @type os_obj: L{objects.OS}
1084
  @param os_obj: OS object to check
1085
  @type name: string
1086
  @param name: OS name passed by the user, to check for validity
1087

1088
  """
1089
  if not os_obj.supported_variants:
1090
    return
1091
  variant = objects.OS.GetVariant(name)
1092
  if not variant:
1093
    raise errors.OpPrereqError("OS name must include a variant",
1094
                               errors.ECODE_INVAL)
1095

    
1096
  if variant not in os_obj.supported_variants:
1097
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1098

    
1099

    
1100
def _GetNodeInstancesInner(cfg, fn):
1101
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1102

    
1103

    
1104
def _GetNodeInstances(cfg, node_name):
1105
  """Returns a list of all primary and secondary instances on a node.
1106

1107
  """
1108

    
1109
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1110

    
1111

    
1112
def _GetNodePrimaryInstances(cfg, node_name):
1113
  """Returns primary instances on a node.
1114

1115
  """
1116
  return _GetNodeInstancesInner(cfg,
1117
                                lambda inst: node_name == inst.primary_node)
1118

    
1119

    
1120
def _GetNodeSecondaryInstances(cfg, node_name):
1121
  """Returns secondary instances on a node.
1122

1123
  """
1124
  return _GetNodeInstancesInner(cfg,
1125
                                lambda inst: node_name in inst.secondary_nodes)
1126

    
1127

    
1128
def _GetStorageTypeArgs(cfg, storage_type):
1129
  """Returns the arguments for a storage type.
1130

1131
  """
1132
  # Special case for file storage
1133
  if storage_type == constants.ST_FILE:
1134
    # storage.FileStorage wants a list of storage directories
1135
    return [[cfg.GetFileStorageDir()]]
1136

    
1137
  return []
1138

    
1139

    
1140
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1141
  faulty = []
1142

    
1143
  for dev in instance.disks:
1144
    cfg.SetDiskID(dev, node_name)
1145

    
1146
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1147
  result.Raise("Failed to get disk status from node %s" % node_name,
1148
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1149

    
1150
  for idx, bdev_status in enumerate(result.payload):
1151
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1152
      faulty.append(idx)
1153

    
1154
  return faulty
1155

    
1156

    
1157
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1158
  """Check the sanity of iallocator and node arguments and use the
1159
  cluster-wide iallocator if appropriate.
1160

1161
  Check that at most one of (iallocator, node) is specified. If none is
1162
  specified, then the LU's opcode's iallocator slot is filled with the
1163
  cluster-wide default iallocator.
1164

1165
  @type iallocator_slot: string
1166
  @param iallocator_slot: the name of the opcode iallocator slot
1167
  @type node_slot: string
1168
  @param node_slot: the name of the opcode target node slot
1169

1170
  """
1171
  node = getattr(lu.op, node_slot, None)
1172
  iallocator = getattr(lu.op, iallocator_slot, None)
1173

    
1174
  if node is not None and iallocator is not None:
1175
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1176
                               errors.ECODE_INVAL)
1177
  elif node is None and iallocator is None:
1178
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1179
    if default_iallocator:
1180
      setattr(lu.op, iallocator_slot, default_iallocator)
1181
    else:
1182
      raise errors.OpPrereqError("No iallocator or node given and no"
1183
                                 " cluster-wide default iallocator found."
1184
                                 " Please specify either an iallocator or a"
1185
                                 " node, or set a cluster-wide default"
1186
                                 " iallocator.")
1187

    
1188

    
1189
class LUPostInitCluster(LogicalUnit):
1190
  """Logical unit for running hooks after cluster initialization.
1191

1192
  """
1193
  HPATH = "cluster-init"
1194
  HTYPE = constants.HTYPE_CLUSTER
1195

    
1196
  def BuildHooksEnv(self):
1197
    """Build hooks env.
1198

1199
    """
1200
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1201
    mn = self.cfg.GetMasterNode()
1202
    return env, [], [mn]
1203

    
1204
  def Exec(self, feedback_fn):
1205
    """Nothing to do.
1206

1207
    """
1208
    return True
1209

    
1210

    
1211
class LUDestroyCluster(LogicalUnit):
1212
  """Logical unit for destroying the cluster.
1213

1214
  """
1215
  HPATH = "cluster-destroy"
1216
  HTYPE = constants.HTYPE_CLUSTER
1217

    
1218
  def BuildHooksEnv(self):
1219
    """Build hooks env.
1220

1221
    """
1222
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1223
    return env, [], []
1224

    
1225
  def CheckPrereq(self):
1226
    """Check prerequisites.
1227

1228
    This checks whether the cluster is empty.
1229

1230
    Any errors are signaled by raising errors.OpPrereqError.
1231

1232
    """
1233
    master = self.cfg.GetMasterNode()
1234

    
1235
    nodelist = self.cfg.GetNodeList()
1236
    if len(nodelist) != 1 or nodelist[0] != master:
1237
      raise errors.OpPrereqError("There are still %d node(s) in"
1238
                                 " this cluster." % (len(nodelist) - 1),
1239
                                 errors.ECODE_INVAL)
1240
    instancelist = self.cfg.GetInstanceList()
1241
    if instancelist:
1242
      raise errors.OpPrereqError("There are still %d instance(s) in"
1243
                                 " this cluster." % len(instancelist),
1244
                                 errors.ECODE_INVAL)
1245

    
1246
  def Exec(self, feedback_fn):
1247
    """Destroys the cluster.
1248

1249
    """
1250
    master = self.cfg.GetMasterNode()
1251
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1252

    
1253
    # Run post hooks on master node before it's removed
1254
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1255
    try:
1256
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1257
    except:
1258
      # pylint: disable-msg=W0702
1259
      self.LogWarning("Errors occurred running hooks on %s" % master)
1260

    
1261
    result = self.rpc.call_node_stop_master(master, False)
1262
    result.Raise("Could not disable the master role")
1263

    
1264
    if modify_ssh_setup:
1265
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1266
      utils.CreateBackup(priv_key)
1267
      utils.CreateBackup(pub_key)
1268

    
1269
    return master
1270

    
1271

    
1272
def _VerifyCertificate(filename):
1273
  """Verifies a certificate for LUVerifyCluster.
1274

1275
  @type filename: string
1276
  @param filename: Path to PEM file
1277

1278
  """
1279
  try:
1280
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1281
                                           utils.ReadFile(filename))
1282
  except Exception, err: # pylint: disable-msg=W0703
1283
    return (LUVerifyCluster.ETYPE_ERROR,
1284
            "Failed to load X509 certificate %s: %s" % (filename, err))
1285

    
1286
  (errcode, msg) = \
1287
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1288
                                constants.SSL_CERT_EXPIRATION_ERROR)
1289

    
1290
  if msg:
1291
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1292
  else:
1293
    fnamemsg = None
1294

    
1295
  if errcode is None:
1296
    return (None, fnamemsg)
1297
  elif errcode == utils.CERT_WARNING:
1298
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1299
  elif errcode == utils.CERT_ERROR:
1300
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1301

    
1302
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1303

    
1304

    
1305
class LUVerifyCluster(LogicalUnit):
1306
  """Verifies the cluster status.
1307

1308
  """
1309
  HPATH = "cluster-verify"
1310
  HTYPE = constants.HTYPE_CLUSTER
1311
  _OP_PARAMS = [
1312
    ("skip_checks", _EmptyList,
1313
     _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1314
    ("verbose", False, _TBool),
1315
    ("error_codes", False, _TBool),
1316
    ("debug_simulate_errors", False, _TBool),
1317
    ]
1318
  REQ_BGL = False
1319

    
1320
  TCLUSTER = "cluster"
1321
  TNODE = "node"
1322
  TINSTANCE = "instance"
1323

    
1324
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1325
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1326
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1327
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1328
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1329
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1330
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1331
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1332
  ENODEDRBD = (TNODE, "ENODEDRBD")
1333
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1334
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1335
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1336
  ENODEHV = (TNODE, "ENODEHV")
1337
  ENODELVM = (TNODE, "ENODELVM")
1338
  ENODEN1 = (TNODE, "ENODEN1")
1339
  ENODENET = (TNODE, "ENODENET")
1340
  ENODEOS = (TNODE, "ENODEOS")
1341
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1342
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1343
  ENODERPC = (TNODE, "ENODERPC")
1344
  ENODESSH = (TNODE, "ENODESSH")
1345
  ENODEVERSION = (TNODE, "ENODEVERSION")
1346
  ENODESETUP = (TNODE, "ENODESETUP")
1347
  ENODETIME = (TNODE, "ENODETIME")
1348

    
1349
  ETYPE_FIELD = "code"
1350
  ETYPE_ERROR = "ERROR"
1351
  ETYPE_WARNING = "WARNING"
1352

    
1353
  class NodeImage(object):
1354
    """A class representing the logical and physical status of a node.
1355

1356
    @type name: string
1357
    @ivar name: the node name to which this object refers
1358
    @ivar volumes: a structure as returned from
1359
        L{ganeti.backend.GetVolumeList} (runtime)
1360
    @ivar instances: a list of running instances (runtime)
1361
    @ivar pinst: list of configured primary instances (config)
1362
    @ivar sinst: list of configured secondary instances (config)
1363
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1364
        of this node (config)
1365
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1366
    @ivar dfree: free disk, as reported by the node (runtime)
1367
    @ivar offline: the offline status (config)
1368
    @type rpc_fail: boolean
1369
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1370
        not whether the individual keys were correct) (runtime)
1371
    @type lvm_fail: boolean
1372
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1373
    @type hyp_fail: boolean
1374
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1375
    @type ghost: boolean
1376
    @ivar ghost: whether this is a known node or not (config)
1377
    @type os_fail: boolean
1378
    @ivar os_fail: whether the RPC call didn't return valid OS data
1379
    @type oslist: list
1380
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1381

1382
    """
1383
    def __init__(self, offline=False, name=None):
1384
      self.name = name
1385
      self.volumes = {}
1386
      self.instances = []
1387
      self.pinst = []
1388
      self.sinst = []
1389
      self.sbp = {}
1390
      self.mfree = 0
1391
      self.dfree = 0
1392
      self.offline = offline
1393
      self.rpc_fail = False
1394
      self.lvm_fail = False
1395
      self.hyp_fail = False
1396
      self.ghost = False
1397
      self.os_fail = False
1398
      self.oslist = {}
1399

    
1400
  def ExpandNames(self):
1401
    self.needed_locks = {
1402
      locking.LEVEL_NODE: locking.ALL_SET,
1403
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1404
    }
1405
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1406

    
1407
  def _Error(self, ecode, item, msg, *args, **kwargs):
1408
    """Format an error message.
1409

1410
    Based on the opcode's error_codes parameter, either format a
1411
    parseable error code, or a simpler error string.
1412

1413
    This must be called only from Exec and functions called from Exec.
1414

1415
    """
1416
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1417
    itype, etxt = ecode
1418
    # first complete the msg
1419
    if args:
1420
      msg = msg % args
1421
    # then format the whole message
1422
    if self.op.error_codes:
1423
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1424
    else:
1425
      if item:
1426
        item = " " + item
1427
      else:
1428
        item = ""
1429
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1430
    # and finally report it via the feedback_fn
1431
    self._feedback_fn("  - %s" % msg)
1432

    
1433
  def _ErrorIf(self, cond, *args, **kwargs):
1434
    """Log an error message if the passed condition is True.
1435

1436
    """
1437
    cond = bool(cond) or self.op.debug_simulate_errors
1438
    if cond:
1439
      self._Error(*args, **kwargs)
1440
    # do not mark the operation as failed for WARN cases only
1441
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1442
      self.bad = self.bad or cond
1443

    
1444
  def _VerifyNode(self, ninfo, nresult):
1445
    """Perform some basic validation on data returned from a node.
1446

1447
      - check the result data structure is well formed and has all the
1448
        mandatory fields
1449
      - check ganeti version
1450

1451
    @type ninfo: L{objects.Node}
1452
    @param ninfo: the node to check
1453
    @param nresult: the results from the node
1454
    @rtype: boolean
1455
    @return: whether overall this call was successful (and we can expect
1456
         reasonable values in the respose)
1457

1458
    """
1459
    node = ninfo.name
1460
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1461

    
1462
    # main result, nresult should be a non-empty dict
1463
    test = not nresult or not isinstance(nresult, dict)
1464
    _ErrorIf(test, self.ENODERPC, node,
1465
                  "unable to verify node: no data returned")
1466
    if test:
1467
      return False
1468

    
1469
    # compares ganeti version
1470
    local_version = constants.PROTOCOL_VERSION
1471
    remote_version = nresult.get("version", None)
1472
    test = not (remote_version and
1473
                isinstance(remote_version, (list, tuple)) and
1474
                len(remote_version) == 2)
1475
    _ErrorIf(test, self.ENODERPC, node,
1476
             "connection to node returned invalid data")
1477
    if test:
1478
      return False
1479

    
1480
    test = local_version != remote_version[0]
1481
    _ErrorIf(test, self.ENODEVERSION, node,
1482
             "incompatible protocol versions: master %s,"
1483
             " node %s", local_version, remote_version[0])
1484
    if test:
1485
      return False
1486

    
1487
    # node seems compatible, we can actually try to look into its results
1488

    
1489
    # full package version
1490
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1491
                  self.ENODEVERSION, node,
1492
                  "software version mismatch: master %s, node %s",
1493
                  constants.RELEASE_VERSION, remote_version[1],
1494
                  code=self.ETYPE_WARNING)
1495

    
1496
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1497
    if isinstance(hyp_result, dict):
1498
      for hv_name, hv_result in hyp_result.iteritems():
1499
        test = hv_result is not None
1500
        _ErrorIf(test, self.ENODEHV, node,
1501
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1502

    
1503

    
1504
    test = nresult.get(constants.NV_NODESETUP,
1505
                           ["Missing NODESETUP results"])
1506
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1507
             "; ".join(test))
1508

    
1509
    return True
1510

    
1511
  def _VerifyNodeTime(self, ninfo, nresult,
1512
                      nvinfo_starttime, nvinfo_endtime):
1513
    """Check the node time.
1514

1515
    @type ninfo: L{objects.Node}
1516
    @param ninfo: the node to check
1517
    @param nresult: the remote results for the node
1518
    @param nvinfo_starttime: the start time of the RPC call
1519
    @param nvinfo_endtime: the end time of the RPC call
1520

1521
    """
1522
    node = ninfo.name
1523
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1524

    
1525
    ntime = nresult.get(constants.NV_TIME, None)
1526
    try:
1527
      ntime_merged = utils.MergeTime(ntime)
1528
    except (ValueError, TypeError):
1529
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1530
      return
1531

    
1532
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1533
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1534
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1535
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1536
    else:
1537
      ntime_diff = None
1538

    
1539
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1540
             "Node time diverges by at least %s from master node time",
1541
             ntime_diff)
1542

    
1543
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1544
    """Check the node time.
1545

1546
    @type ninfo: L{objects.Node}
1547
    @param ninfo: the node to check
1548
    @param nresult: the remote results for the node
1549
    @param vg_name: the configured VG name
1550

1551
    """
1552
    if vg_name is None:
1553
      return
1554

    
1555
    node = ninfo.name
1556
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1557

    
1558
    # checks vg existence and size > 20G
1559
    vglist = nresult.get(constants.NV_VGLIST, None)
1560
    test = not vglist
1561
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1562
    if not test:
1563
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1564
                                            constants.MIN_VG_SIZE)
1565
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1566

    
1567
    # check pv names
1568
    pvlist = nresult.get(constants.NV_PVLIST, None)
1569
    test = pvlist is None
1570
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1571
    if not test:
1572
      # check that ':' is not present in PV names, since it's a
1573
      # special character for lvcreate (denotes the range of PEs to
1574
      # use on the PV)
1575
      for _, pvname, owner_vg in pvlist:
1576
        test = ":" in pvname
1577
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1578
                 " '%s' of VG '%s'", pvname, owner_vg)
1579

    
1580
  def _VerifyNodeNetwork(self, ninfo, nresult):
1581
    """Check the node time.
1582

1583
    @type ninfo: L{objects.Node}
1584
    @param ninfo: the node to check
1585
    @param nresult: the remote results for the node
1586

1587
    """
1588
    node = ninfo.name
1589
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1590

    
1591
    test = constants.NV_NODELIST not in nresult
1592
    _ErrorIf(test, self.ENODESSH, node,
1593
             "node hasn't returned node ssh connectivity data")
1594
    if not test:
1595
      if nresult[constants.NV_NODELIST]:
1596
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1597
          _ErrorIf(True, self.ENODESSH, node,
1598
                   "ssh communication with node '%s': %s", a_node, a_msg)
1599

    
1600
    test = constants.NV_NODENETTEST not in nresult
1601
    _ErrorIf(test, self.ENODENET, node,
1602
             "node hasn't returned node tcp connectivity data")
1603
    if not test:
1604
      if nresult[constants.NV_NODENETTEST]:
1605
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1606
        for anode in nlist:
1607
          _ErrorIf(True, self.ENODENET, node,
1608
                   "tcp communication with node '%s': %s",
1609
                   anode, nresult[constants.NV_NODENETTEST][anode])
1610

    
1611
    test = constants.NV_MASTERIP not in nresult
1612
    _ErrorIf(test, self.ENODENET, node,
1613
             "node hasn't returned node master IP reachability data")
1614
    if not test:
1615
      if not nresult[constants.NV_MASTERIP]:
1616
        if node == self.master_node:
1617
          msg = "the master node cannot reach the master IP (not configured?)"
1618
        else:
1619
          msg = "cannot reach the master IP"
1620
        _ErrorIf(True, self.ENODENET, node, msg)
1621

    
1622

    
1623
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1624
    """Verify an instance.
1625

1626
    This function checks to see if the required block devices are
1627
    available on the instance's node.
1628

1629
    """
1630
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1631
    node_current = instanceconfig.primary_node
1632

    
1633
    node_vol_should = {}
1634
    instanceconfig.MapLVsByNode(node_vol_should)
1635

    
1636
    for node in node_vol_should:
1637
      n_img = node_image[node]
1638
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1639
        # ignore missing volumes on offline or broken nodes
1640
        continue
1641
      for volume in node_vol_should[node]:
1642
        test = volume not in n_img.volumes
1643
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1644
                 "volume %s missing on node %s", volume, node)
1645

    
1646
    if instanceconfig.admin_up:
1647
      pri_img = node_image[node_current]
1648
      test = instance not in pri_img.instances and not pri_img.offline
1649
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1650
               "instance not running on its primary node %s",
1651
               node_current)
1652

    
1653
    for node, n_img in node_image.items():
1654
      if (not node == node_current):
1655
        test = instance in n_img.instances
1656
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1657
                 "instance should not run on node %s", node)
1658

    
1659
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1660
    """Verify if there are any unknown volumes in the cluster.
1661

1662
    The .os, .swap and backup volumes are ignored. All other volumes are
1663
    reported as unknown.
1664

1665
    @type reserved: L{ganeti.utils.FieldSet}
1666
    @param reserved: a FieldSet of reserved volume names
1667

1668
    """
1669
    for node, n_img in node_image.items():
1670
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1671
        # skip non-healthy nodes
1672
        continue
1673
      for volume in n_img.volumes:
1674
        test = ((node not in node_vol_should or
1675
                volume not in node_vol_should[node]) and
1676
                not reserved.Matches(volume))
1677
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1678
                      "volume %s is unknown", volume)
1679

    
1680
  def _VerifyOrphanInstances(self, instancelist, node_image):
1681
    """Verify the list of running instances.
1682

1683
    This checks what instances are running but unknown to the cluster.
1684

1685
    """
1686
    for node, n_img in node_image.items():
1687
      for o_inst in n_img.instances:
1688
        test = o_inst not in instancelist
1689
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1690
                      "instance %s on node %s should not exist", o_inst, node)
1691

    
1692
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1693
    """Verify N+1 Memory Resilience.
1694

1695
    Check that if one single node dies we can still start all the
1696
    instances it was primary for.
1697

1698
    """
1699
    for node, n_img in node_image.items():
1700
      # This code checks that every node which is now listed as
1701
      # secondary has enough memory to host all instances it is
1702
      # supposed to should a single other node in the cluster fail.
1703
      # FIXME: not ready for failover to an arbitrary node
1704
      # FIXME: does not support file-backed instances
1705
      # WARNING: we currently take into account down instances as well
1706
      # as up ones, considering that even if they're down someone
1707
      # might want to start them even in the event of a node failure.
1708
      for prinode, instances in n_img.sbp.items():
1709
        needed_mem = 0
1710
        for instance in instances:
1711
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1712
          if bep[constants.BE_AUTO_BALANCE]:
1713
            needed_mem += bep[constants.BE_MEMORY]
1714
        test = n_img.mfree < needed_mem
1715
        self._ErrorIf(test, self.ENODEN1, node,
1716
                      "not enough memory on to accommodate"
1717
                      " failovers should peer node %s fail", prinode)
1718

    
1719
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1720
                       master_files):
1721
    """Verifies and computes the node required file checksums.
1722

1723
    @type ninfo: L{objects.Node}
1724
    @param ninfo: the node to check
1725
    @param nresult: the remote results for the node
1726
    @param file_list: required list of files
1727
    @param local_cksum: dictionary of local files and their checksums
1728
    @param master_files: list of files that only masters should have
1729

1730
    """
1731
    node = ninfo.name
1732
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1733

    
1734
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1735
    test = not isinstance(remote_cksum, dict)
1736
    _ErrorIf(test, self.ENODEFILECHECK, node,
1737
             "node hasn't returned file checksum data")
1738
    if test:
1739
      return
1740

    
1741
    for file_name in file_list:
1742
      node_is_mc = ninfo.master_candidate
1743
      must_have = (file_name not in master_files) or node_is_mc
1744
      # missing
1745
      test1 = file_name not in remote_cksum
1746
      # invalid checksum
1747
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1748
      # existing and good
1749
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1750
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1751
               "file '%s' missing", file_name)
1752
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1753
               "file '%s' has wrong checksum", file_name)
1754
      # not candidate and this is not a must-have file
1755
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1756
               "file '%s' should not exist on non master"
1757
               " candidates (and the file is outdated)", file_name)
1758
      # all good, except non-master/non-must have combination
1759
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1760
               "file '%s' should not exist"
1761
               " on non master candidates", file_name)
1762

    
1763
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1764
                      drbd_map):
1765
    """Verifies and the node DRBD status.
1766

1767
    @type ninfo: L{objects.Node}
1768
    @param ninfo: the node to check
1769
    @param nresult: the remote results for the node
1770
    @param instanceinfo: the dict of instances
1771
    @param drbd_helper: the configured DRBD usermode helper
1772
    @param drbd_map: the DRBD map as returned by
1773
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1774

1775
    """
1776
    node = ninfo.name
1777
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1778

    
1779
    if drbd_helper:
1780
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1781
      test = (helper_result == None)
1782
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1783
               "no drbd usermode helper returned")
1784
      if helper_result:
1785
        status, payload = helper_result
1786
        test = not status
1787
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1788
                 "drbd usermode helper check unsuccessful: %s", payload)
1789
        test = status and (payload != drbd_helper)
1790
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1791
                 "wrong drbd usermode helper: %s", payload)
1792

    
1793
    # compute the DRBD minors
1794
    node_drbd = {}
1795
    for minor, instance in drbd_map[node].items():
1796
      test = instance not in instanceinfo
1797
      _ErrorIf(test, self.ECLUSTERCFG, None,
1798
               "ghost instance '%s' in temporary DRBD map", instance)
1799
        # ghost instance should not be running, but otherwise we
1800
        # don't give double warnings (both ghost instance and
1801
        # unallocated minor in use)
1802
      if test:
1803
        node_drbd[minor] = (instance, False)
1804
      else:
1805
        instance = instanceinfo[instance]
1806
        node_drbd[minor] = (instance.name, instance.admin_up)
1807

    
1808
    # and now check them
1809
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1810
    test = not isinstance(used_minors, (tuple, list))
1811
    _ErrorIf(test, self.ENODEDRBD, node,
1812
             "cannot parse drbd status file: %s", str(used_minors))
1813
    if test:
1814
      # we cannot check drbd status
1815
      return
1816

    
1817
    for minor, (iname, must_exist) in node_drbd.items():
1818
      test = minor not in used_minors and must_exist
1819
      _ErrorIf(test, self.ENODEDRBD, node,
1820
               "drbd minor %d of instance %s is not active", minor, iname)
1821
    for minor in used_minors:
1822
      test = minor not in node_drbd
1823
      _ErrorIf(test, self.ENODEDRBD, node,
1824
               "unallocated drbd minor %d is in use", minor)
1825

    
1826
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1827
    """Builds the node OS structures.
1828

1829
    @type ninfo: L{objects.Node}
1830
    @param ninfo: the node to check
1831
    @param nresult: the remote results for the node
1832
    @param nimg: the node image object
1833

1834
    """
1835
    node = ninfo.name
1836
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1837

    
1838
    remote_os = nresult.get(constants.NV_OSLIST, None)
1839
    test = (not isinstance(remote_os, list) or
1840
            not compat.all(isinstance(v, list) and len(v) == 7
1841
                           for v in remote_os))
1842

    
1843
    _ErrorIf(test, self.ENODEOS, node,
1844
             "node hasn't returned valid OS data")
1845

    
1846
    nimg.os_fail = test
1847

    
1848
    if test:
1849
      return
1850

    
1851
    os_dict = {}
1852

    
1853
    for (name, os_path, status, diagnose,
1854
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1855

    
1856
      if name not in os_dict:
1857
        os_dict[name] = []
1858

    
1859
      # parameters is a list of lists instead of list of tuples due to
1860
      # JSON lacking a real tuple type, fix it:
1861
      parameters = [tuple(v) for v in parameters]
1862
      os_dict[name].append((os_path, status, diagnose,
1863
                            set(variants), set(parameters), set(api_ver)))
1864

    
1865
    nimg.oslist = os_dict
1866

    
1867
  def _VerifyNodeOS(self, ninfo, nimg, base):
1868
    """Verifies the node OS list.
1869

1870
    @type ninfo: L{objects.Node}
1871
    @param ninfo: the node to check
1872
    @param nimg: the node image object
1873
    @param base: the 'template' node we match against (e.g. from the master)
1874

1875
    """
1876
    node = ninfo.name
1877
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1878

    
1879
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1880

    
1881
    for os_name, os_data in nimg.oslist.items():
1882
      assert os_data, "Empty OS status for OS %s?!" % os_name
1883
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1884
      _ErrorIf(not f_status, self.ENODEOS, node,
1885
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1886
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1887
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1888
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1889
      # this will catched in backend too
1890
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1891
               and not f_var, self.ENODEOS, node,
1892
               "OS %s with API at least %d does not declare any variant",
1893
               os_name, constants.OS_API_V15)
1894
      # comparisons with the 'base' image
1895
      test = os_name not in base.oslist
1896
      _ErrorIf(test, self.ENODEOS, node,
1897
               "Extra OS %s not present on reference node (%s)",
1898
               os_name, base.name)
1899
      if test:
1900
        continue
1901
      assert base.oslist[os_name], "Base node has empty OS status?"
1902
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1903
      if not b_status:
1904
        # base OS is invalid, skipping
1905
        continue
1906
      for kind, a, b in [("API version", f_api, b_api),
1907
                         ("variants list", f_var, b_var),
1908
                         ("parameters", f_param, b_param)]:
1909
        _ErrorIf(a != b, self.ENODEOS, node,
1910
                 "OS %s %s differs from reference node %s: %s vs. %s",
1911
                 kind, os_name, base.name,
1912
                 utils.CommaJoin(a), utils.CommaJoin(b))
1913

    
1914
    # check any missing OSes
1915
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1916
    _ErrorIf(missing, self.ENODEOS, node,
1917
             "OSes present on reference node %s but missing on this node: %s",
1918
             base.name, utils.CommaJoin(missing))
1919

    
1920
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1921
    """Verifies and updates the node volume data.
1922

1923
    This function will update a L{NodeImage}'s internal structures
1924
    with data from the remote call.
1925

1926
    @type ninfo: L{objects.Node}
1927
    @param ninfo: the node to check
1928
    @param nresult: the remote results for the node
1929
    @param nimg: the node image object
1930
    @param vg_name: the configured VG name
1931

1932
    """
1933
    node = ninfo.name
1934
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1935

    
1936
    nimg.lvm_fail = True
1937
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1938
    if vg_name is None:
1939
      pass
1940
    elif isinstance(lvdata, basestring):
1941
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1942
               utils.SafeEncode(lvdata))
1943
    elif not isinstance(lvdata, dict):
1944
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1945
    else:
1946
      nimg.volumes = lvdata
1947
      nimg.lvm_fail = False
1948

    
1949
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1950
    """Verifies and updates the node instance list.
1951

1952
    If the listing was successful, then updates this node's instance
1953
    list. Otherwise, it marks the RPC call as failed for the instance
1954
    list key.
1955

1956
    @type ninfo: L{objects.Node}
1957
    @param ninfo: the node to check
1958
    @param nresult: the remote results for the node
1959
    @param nimg: the node image object
1960

1961
    """
1962
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1963
    test = not isinstance(idata, list)
1964
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1965
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1966
    if test:
1967
      nimg.hyp_fail = True
1968
    else:
1969
      nimg.instances = idata
1970

    
1971
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1972
    """Verifies and computes a node information map
1973

1974
    @type ninfo: L{objects.Node}
1975
    @param ninfo: the node to check
1976
    @param nresult: the remote results for the node
1977
    @param nimg: the node image object
1978
    @param vg_name: the configured VG name
1979

1980
    """
1981
    node = ninfo.name
1982
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1983

    
1984
    # try to read free memory (from the hypervisor)
1985
    hv_info = nresult.get(constants.NV_HVINFO, None)
1986
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1987
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1988
    if not test:
1989
      try:
1990
        nimg.mfree = int(hv_info["memory_free"])
1991
      except (ValueError, TypeError):
1992
        _ErrorIf(True, self.ENODERPC, node,
1993
                 "node returned invalid nodeinfo, check hypervisor")
1994

    
1995
    # FIXME: devise a free space model for file based instances as well
1996
    if vg_name is not None:
1997
      test = (constants.NV_VGLIST not in nresult or
1998
              vg_name not in nresult[constants.NV_VGLIST])
1999
      _ErrorIf(test, self.ENODELVM, node,
2000
               "node didn't return data for the volume group '%s'"
2001
               " - it is either missing or broken", vg_name)
2002
      if not test:
2003
        try:
2004
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2005
        except (ValueError, TypeError):
2006
          _ErrorIf(True, self.ENODERPC, node,
2007
                   "node returned invalid LVM info, check LVM status")
2008

    
2009
  def BuildHooksEnv(self):
2010
    """Build hooks env.
2011

2012
    Cluster-Verify hooks just ran in the post phase and their failure makes
2013
    the output be logged in the verify output and the verification to fail.
2014

2015
    """
2016
    all_nodes = self.cfg.GetNodeList()
2017
    env = {
2018
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2019
      }
2020
    for node in self.cfg.GetAllNodesInfo().values():
2021
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2022

    
2023
    return env, [], all_nodes
2024

    
2025
  def Exec(self, feedback_fn):
2026
    """Verify integrity of cluster, performing various test on nodes.
2027

2028
    """
2029
    self.bad = False
2030
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2031
    verbose = self.op.verbose
2032
    self._feedback_fn = feedback_fn
2033
    feedback_fn("* Verifying global settings")
2034
    for msg in self.cfg.VerifyConfig():
2035
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2036

    
2037
    # Check the cluster certificates
2038
    for cert_filename in constants.ALL_CERT_FILES:
2039
      (errcode, msg) = _VerifyCertificate(cert_filename)
2040
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2041

    
2042
    vg_name = self.cfg.GetVGName()
2043
    drbd_helper = self.cfg.GetDRBDHelper()
2044
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2045
    cluster = self.cfg.GetClusterInfo()
2046
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2047
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2048
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2049
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2050
                        for iname in instancelist)
2051
    i_non_redundant = [] # Non redundant instances
2052
    i_non_a_balanced = [] # Non auto-balanced instances
2053
    n_offline = 0 # Count of offline nodes
2054
    n_drained = 0 # Count of nodes being drained
2055
    node_vol_should = {}
2056

    
2057
    # FIXME: verify OS list
2058
    # do local checksums
2059
    master_files = [constants.CLUSTER_CONF_FILE]
2060
    master_node = self.master_node = self.cfg.GetMasterNode()
2061
    master_ip = self.cfg.GetMasterIP()
2062

    
2063
    file_names = ssconf.SimpleStore().GetFileList()
2064
    file_names.extend(constants.ALL_CERT_FILES)
2065
    file_names.extend(master_files)
2066
    if cluster.modify_etc_hosts:
2067
      file_names.append(constants.ETC_HOSTS)
2068

    
2069
    local_checksums = utils.FingerprintFiles(file_names)
2070

    
2071
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2072
    node_verify_param = {
2073
      constants.NV_FILELIST: file_names,
2074
      constants.NV_NODELIST: [node.name for node in nodeinfo
2075
                              if not node.offline],
2076
      constants.NV_HYPERVISOR: hypervisors,
2077
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2078
                                  node.secondary_ip) for node in nodeinfo
2079
                                 if not node.offline],
2080
      constants.NV_INSTANCELIST: hypervisors,
2081
      constants.NV_VERSION: None,
2082
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2083
      constants.NV_NODESETUP: None,
2084
      constants.NV_TIME: None,
2085
      constants.NV_MASTERIP: (master_node, master_ip),
2086
      constants.NV_OSLIST: None,
2087
      }
2088

    
2089
    if vg_name is not None:
2090
      node_verify_param[constants.NV_VGLIST] = None
2091
      node_verify_param[constants.NV_LVLIST] = vg_name
2092
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2093
      node_verify_param[constants.NV_DRBDLIST] = None
2094

    
2095
    if drbd_helper:
2096
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2097

    
2098
    # Build our expected cluster state
2099
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2100
                                                 name=node.name))
2101
                      for node in nodeinfo)
2102

    
2103
    for instance in instancelist:
2104
      inst_config = instanceinfo[instance]
2105

    
2106
      for nname in inst_config.all_nodes:
2107
        if nname not in node_image:
2108
          # ghost node
2109
          gnode = self.NodeImage(name=nname)
2110
          gnode.ghost = True
2111
          node_image[nname] = gnode
2112

    
2113
      inst_config.MapLVsByNode(node_vol_should)
2114

    
2115
      pnode = inst_config.primary_node
2116
      node_image[pnode].pinst.append(instance)
2117

    
2118
      for snode in inst_config.secondary_nodes:
2119
        nimg = node_image[snode]
2120
        nimg.sinst.append(instance)
2121
        if pnode not in nimg.sbp:
2122
          nimg.sbp[pnode] = []
2123
        nimg.sbp[pnode].append(instance)
2124

    
2125
    # At this point, we have the in-memory data structures complete,
2126
    # except for the runtime information, which we'll gather next
2127

    
2128
    # Due to the way our RPC system works, exact response times cannot be
2129
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2130
    # time before and after executing the request, we can at least have a time
2131
    # window.
2132
    nvinfo_starttime = time.time()
2133
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2134
                                           self.cfg.GetClusterName())
2135
    nvinfo_endtime = time.time()
2136

    
2137
    all_drbd_map = self.cfg.ComputeDRBDMap()
2138

    
2139
    feedback_fn("* Verifying node status")
2140

    
2141
    refos_img = None
2142

    
2143
    for node_i in nodeinfo:
2144
      node = node_i.name
2145
      nimg = node_image[node]
2146

    
2147
      if node_i.offline:
2148
        if verbose:
2149
          feedback_fn("* Skipping offline node %s" % (node,))
2150
        n_offline += 1
2151
        continue
2152

    
2153
      if node == master_node:
2154
        ntype = "master"
2155
      elif node_i.master_candidate:
2156
        ntype = "master candidate"
2157
      elif node_i.drained:
2158
        ntype = "drained"
2159
        n_drained += 1
2160
      else:
2161
        ntype = "regular"
2162
      if verbose:
2163
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2164

    
2165
      msg = all_nvinfo[node].fail_msg
2166
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2167
      if msg:
2168
        nimg.rpc_fail = True
2169
        continue
2170

    
2171
      nresult = all_nvinfo[node].payload
2172

    
2173
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2174
      self._VerifyNodeNetwork(node_i, nresult)
2175
      self._VerifyNodeLVM(node_i, nresult, vg_name)
2176
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2177
                            master_files)
2178
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2179
                           all_drbd_map)
2180
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2181

    
2182
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2183
      self._UpdateNodeInstances(node_i, nresult, nimg)
2184
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2185
      self._UpdateNodeOS(node_i, nresult, nimg)
2186
      if not nimg.os_fail:
2187
        if refos_img is None:
2188
          refos_img = nimg
2189
        self._VerifyNodeOS(node_i, nimg, refos_img)
2190

    
2191
    feedback_fn("* Verifying instance status")
2192
    for instance in instancelist:
2193
      if verbose:
2194
        feedback_fn("* Verifying instance %s" % instance)
2195
      inst_config = instanceinfo[instance]
2196
      self._VerifyInstance(instance, inst_config, node_image)
2197
      inst_nodes_offline = []
2198

    
2199
      pnode = inst_config.primary_node
2200
      pnode_img = node_image[pnode]
2201
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2202
               self.ENODERPC, pnode, "instance %s, connection to"
2203
               " primary node failed", instance)
2204

    
2205
      if pnode_img.offline:
2206
        inst_nodes_offline.append(pnode)
2207

    
2208
      # If the instance is non-redundant we cannot survive losing its primary
2209
      # node, so we are not N+1 compliant. On the other hand we have no disk
2210
      # templates with more than one secondary so that situation is not well
2211
      # supported either.
2212
      # FIXME: does not support file-backed instances
2213
      if not inst_config.secondary_nodes:
2214
        i_non_redundant.append(instance)
2215
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2216
               instance, "instance has multiple secondary nodes: %s",
2217
               utils.CommaJoin(inst_config.secondary_nodes),
2218
               code=self.ETYPE_WARNING)
2219

    
2220
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2221
        i_non_a_balanced.append(instance)
2222

    
2223
      for snode in inst_config.secondary_nodes:
2224
        s_img = node_image[snode]
2225
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2226
                 "instance %s, connection to secondary node failed", instance)
2227

    
2228
        if s_img.offline:
2229
          inst_nodes_offline.append(snode)
2230

    
2231
      # warn that the instance lives on offline nodes
2232
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2233
               "instance lives on offline node(s) %s",
2234
               utils.CommaJoin(inst_nodes_offline))
2235
      # ... or ghost nodes
2236
      for node in inst_config.all_nodes:
2237
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2238
                 "instance lives on ghost node %s", node)
2239

    
2240
    feedback_fn("* Verifying orphan volumes")
2241
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2242
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2243

    
2244
    feedback_fn("* Verifying orphan instances")
2245
    self._VerifyOrphanInstances(instancelist, node_image)
2246

    
2247
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2248
      feedback_fn("* Verifying N+1 Memory redundancy")
2249
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2250

    
2251
    feedback_fn("* Other Notes")
2252
    if i_non_redundant:
2253
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2254
                  % len(i_non_redundant))
2255

    
2256
    if i_non_a_balanced:
2257
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2258
                  % len(i_non_a_balanced))
2259

    
2260
    if n_offline:
2261
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2262

    
2263
    if n_drained:
2264
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2265

    
2266
    return not self.bad
2267

    
2268
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2269
    """Analyze the post-hooks' result
2270

2271
    This method analyses the hook result, handles it, and sends some
2272
    nicely-formatted feedback back to the user.
2273

2274
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2275
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2276
    @param hooks_results: the results of the multi-node hooks rpc call
2277
    @param feedback_fn: function used send feedback back to the caller
2278
    @param lu_result: previous Exec result
2279
    @return: the new Exec result, based on the previous result
2280
        and hook results
2281

2282
    """
2283
    # We only really run POST phase hooks, and are only interested in
2284
    # their results
2285
    if phase == constants.HOOKS_PHASE_POST:
2286
      # Used to change hooks' output to proper indentation
2287
      indent_re = re.compile('^', re.M)
2288
      feedback_fn("* Hooks Results")
2289
      assert hooks_results, "invalid result from hooks"
2290

    
2291
      for node_name in hooks_results:
2292
        res = hooks_results[node_name]
2293
        msg = res.fail_msg
2294
        test = msg and not res.offline
2295
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2296
                      "Communication failure in hooks execution: %s", msg)
2297
        if res.offline or msg:
2298
          # No need to investigate payload if node is offline or gave an error.
2299
          # override manually lu_result here as _ErrorIf only
2300
          # overrides self.bad
2301
          lu_result = 1
2302
          continue
2303
        for script, hkr, output in res.payload:
2304
          test = hkr == constants.HKR_FAIL
2305
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2306
                        "Script %s failed, output:", script)
2307
          if test:
2308
            output = indent_re.sub('      ', output)
2309
            feedback_fn("%s" % output)
2310
            lu_result = 0
2311

    
2312
      return lu_result
2313

    
2314

    
2315
class LUVerifyDisks(NoHooksLU):
2316
  """Verifies the cluster disks status.
2317

2318
  """
2319
  REQ_BGL = False
2320

    
2321
  def ExpandNames(self):
2322
    self.needed_locks = {
2323
      locking.LEVEL_NODE: locking.ALL_SET,
2324
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2325
    }
2326
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2327

    
2328
  def Exec(self, feedback_fn):
2329
    """Verify integrity of cluster disks.
2330

2331
    @rtype: tuple of three items
2332
    @return: a tuple of (dict of node-to-node_error, list of instances
2333
        which need activate-disks, dict of instance: (node, volume) for
2334
        missing volumes
2335

2336
    """
2337
    result = res_nodes, res_instances, res_missing = {}, [], {}
2338

    
2339
    vg_name = self.cfg.GetVGName()
2340
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2341
    instances = [self.cfg.GetInstanceInfo(name)
2342
                 for name in self.cfg.GetInstanceList()]
2343

    
2344
    nv_dict = {}
2345
    for inst in instances:
2346
      inst_lvs = {}
2347
      if (not inst.admin_up or
2348
          inst.disk_template not in constants.DTS_NET_MIRROR):
2349
        continue
2350
      inst.MapLVsByNode(inst_lvs)
2351
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2352
      for node, vol_list in inst_lvs.iteritems():
2353
        for vol in vol_list:
2354
          nv_dict[(node, vol)] = inst
2355

    
2356
    if not nv_dict:
2357
      return result
2358

    
2359
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2360

    
2361
    for node in nodes:
2362
      # node_volume
2363
      node_res = node_lvs[node]
2364
      if node_res.offline:
2365
        continue
2366
      msg = node_res.fail_msg
2367
      if msg:
2368
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2369
        res_nodes[node] = msg
2370
        continue
2371

    
2372
      lvs = node_res.payload
2373
      for lv_name, (_, _, lv_online) in lvs.items():
2374
        inst = nv_dict.pop((node, lv_name), None)
2375
        if (not lv_online and inst is not None
2376
            and inst.name not in res_instances):
2377
          res_instances.append(inst.name)
2378

    
2379
    # any leftover items in nv_dict are missing LVs, let's arrange the
2380
    # data better
2381
    for key, inst in nv_dict.iteritems():
2382
      if inst.name not in res_missing:
2383
        res_missing[inst.name] = []
2384
      res_missing[inst.name].append(key)
2385

    
2386
    return result
2387

    
2388

    
2389
class LURepairDiskSizes(NoHooksLU):
2390
  """Verifies the cluster disks sizes.
2391

2392
  """
2393
  _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2394
  REQ_BGL = False
2395

    
2396
  def ExpandNames(self):
2397
    if self.op.instances:
2398
      self.wanted_names = []
2399
      for name in self.op.instances:
2400
        full_name = _ExpandInstanceName(self.cfg, name)
2401
        self.wanted_names.append(full_name)
2402
      self.needed_locks = {
2403
        locking.LEVEL_NODE: [],
2404
        locking.LEVEL_INSTANCE: self.wanted_names,
2405
        }
2406
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2407
    else:
2408
      self.wanted_names = None
2409
      self.needed_locks = {
2410
        locking.LEVEL_NODE: locking.ALL_SET,
2411
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2412
        }
2413
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2414

    
2415
  def DeclareLocks(self, level):
2416
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2417
      self._LockInstancesNodes(primary_only=True)
2418

    
2419
  def CheckPrereq(self):
2420
    """Check prerequisites.
2421

2422
    This only checks the optional instance list against the existing names.
2423

2424
    """
2425
    if self.wanted_names is None:
2426
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2427

    
2428
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2429
                             in self.wanted_names]
2430

    
2431
  def _EnsureChildSizes(self, disk):
2432
    """Ensure children of the disk have the needed disk size.
2433

2434
    This is valid mainly for DRBD8 and fixes an issue where the
2435
    children have smaller disk size.
2436

2437
    @param disk: an L{ganeti.objects.Disk} object
2438

2439
    """
2440
    if disk.dev_type == constants.LD_DRBD8:
2441
      assert disk.children, "Empty children for DRBD8?"
2442
      fchild = disk.children[0]
2443
      mismatch = fchild.size < disk.size
2444
      if mismatch:
2445
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2446
                     fchild.size, disk.size)
2447
        fchild.size = disk.size
2448

    
2449
      # and we recurse on this child only, not on the metadev
2450
      return self._EnsureChildSizes(fchild) or mismatch
2451
    else:
2452
      return False
2453

    
2454
  def Exec(self, feedback_fn):
2455
    """Verify the size of cluster disks.
2456

2457
    """
2458
    # TODO: check child disks too
2459
    # TODO: check differences in size between primary/secondary nodes
2460
    per_node_disks = {}
2461
    for instance in self.wanted_instances:
2462
      pnode = instance.primary_node
2463
      if pnode not in per_node_disks:
2464
        per_node_disks[pnode] = []
2465
      for idx, disk in enumerate(instance.disks):
2466
        per_node_disks[pnode].append((instance, idx, disk))
2467

    
2468
    changed = []
2469
    for node, dskl in per_node_disks.items():
2470
      newl = [v[2].Copy() for v in dskl]
2471
      for dsk in newl:
2472
        self.cfg.SetDiskID(dsk, node)
2473
      result = self.rpc.call_blockdev_getsizes(node, newl)
2474
      if result.fail_msg:
2475
        self.LogWarning("Failure in blockdev_getsizes call to node"
2476
                        " %s, ignoring", node)
2477
        continue
2478
      if len(result.data) != len(dskl):
2479
        self.LogWarning("Invalid result from node %s, ignoring node results",
2480
                        node)
2481
        continue
2482
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2483
        if size is None:
2484
          self.LogWarning("Disk %d of instance %s did not return size"
2485
                          " information, ignoring", idx, instance.name)
2486
          continue
2487
        if not isinstance(size, (int, long)):
2488
          self.LogWarning("Disk %d of instance %s did not return valid"
2489
                          " size information, ignoring", idx, instance.name)
2490
          continue
2491
        size = size >> 20
2492
        if size != disk.size:
2493
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2494
                       " correcting: recorded %d, actual %d", idx,
2495
                       instance.name, disk.size, size)
2496
          disk.size = size
2497
          self.cfg.Update(instance, feedback_fn)
2498
          changed.append((instance.name, idx, size))
2499
        if self._EnsureChildSizes(disk):
2500
          self.cfg.Update(instance, feedback_fn)
2501
          changed.append((instance.name, idx, disk.size))
2502
    return changed
2503

    
2504

    
2505
class LURenameCluster(LogicalUnit):
2506
  """Rename the cluster.
2507

2508
  """
2509
  HPATH = "cluster-rename"
2510
  HTYPE = constants.HTYPE_CLUSTER
2511
  _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2512

    
2513
  def BuildHooksEnv(self):
2514
    """Build hooks env.
2515

2516
    """
2517
    env = {
2518
      "OP_TARGET": self.cfg.GetClusterName(),
2519
      "NEW_NAME": self.op.name,
2520
      }
2521
    mn = self.cfg.GetMasterNode()
2522
    all_nodes = self.cfg.GetNodeList()
2523
    return env, [mn], all_nodes
2524

    
2525
  def CheckPrereq(self):
2526
    """Verify that the passed name is a valid one.
2527

2528
    """
2529
    hostname = netutils.GetHostInfo(self.op.name)
2530

    
2531
    new_name = hostname.name
2532
    self.ip = new_ip = hostname.ip
2533
    old_name = self.cfg.GetClusterName()
2534
    old_ip = self.cfg.GetMasterIP()
2535
    if new_name == old_name and new_ip == old_ip:
2536
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2537
                                 " cluster has changed",
2538
                                 errors.ECODE_INVAL)
2539
    if new_ip != old_ip:
2540
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2541
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2542
                                   " reachable on the network. Aborting." %
2543
                                   new_ip, errors.ECODE_NOTUNIQUE)
2544

    
2545
    self.op.name = new_name
2546

    
2547
  def Exec(self, feedback_fn):
2548
    """Rename the cluster.
2549

2550
    """
2551
    clustername = self.op.name
2552
    ip = self.ip
2553

    
2554
    # shutdown the master IP
2555
    master = self.cfg.GetMasterNode()
2556
    result = self.rpc.call_node_stop_master(master, False)
2557
    result.Raise("Could not disable the master role")
2558

    
2559
    try:
2560
      cluster = self.cfg.GetClusterInfo()
2561
      cluster.cluster_name = clustername
2562
      cluster.master_ip = ip
2563
      self.cfg.Update(cluster, feedback_fn)
2564

    
2565
      # update the known hosts file
2566
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2567
      node_list = self.cfg.GetNodeList()
2568
      try:
2569
        node_list.remove(master)
2570
      except ValueError:
2571
        pass
2572
      result = self.rpc.call_upload_file(node_list,
2573
                                         constants.SSH_KNOWN_HOSTS_FILE)
2574
      for to_node, to_result in result.iteritems():
2575
        msg = to_result.fail_msg
2576
        if msg:
2577
          msg = ("Copy of file %s to node %s failed: %s" %
2578
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2579
          self.proc.LogWarning(msg)
2580

    
2581
    finally:
2582
      result = self.rpc.call_node_start_master(master, False, False)
2583
      msg = result.fail_msg
2584
      if msg:
2585
        self.LogWarning("Could not re-enable the master role on"
2586
                        " the master, please restart manually: %s", msg)
2587

    
2588
    return clustername
2589

    
2590

    
2591
class LUSetClusterParams(LogicalUnit):
2592
  """Change the parameters of the cluster.
2593

2594
  """
2595
  HPATH = "cluster-modify"
2596
  HTYPE = constants.HTYPE_CLUSTER
2597
  _OP_PARAMS = [
2598
    ("vg_name", None, _TMaybeString),
2599
    ("enabled_hypervisors", None,
2600
     _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2601
    ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2602
    ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2603
    ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2604
    ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2605
    ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2606
    ("uid_pool", None, _NoType),
2607
    ("add_uids", None, _NoType),
2608
    ("remove_uids", None, _NoType),
2609
    ("maintain_node_health", None, _TMaybeBool),
2610
    ("nicparams", None, _TOr(_TDict, _TNone)),
2611
    ("drbd_helper", None, _TOr(_TString, _TNone)),
2612
    ("default_iallocator", None, _TMaybeString),
2613
    ("reserved_lvs", None, _TOr(_TListOf(_TNonEmptyString), _TNone)),
2614
    ]
2615
  REQ_BGL = False
2616

    
2617
  def CheckArguments(self):
2618
    """Check parameters
2619

2620
    """
2621
    if self.op.uid_pool:
2622
      uidpool.CheckUidPool(self.op.uid_pool)
2623

    
2624
    if self.op.add_uids:
2625
      uidpool.CheckUidPool(self.op.add_uids)
2626

    
2627
    if self.op.remove_uids:
2628
      uidpool.CheckUidPool(self.op.remove_uids)
2629

    
2630
  def ExpandNames(self):
2631
    # FIXME: in the future maybe other cluster params won't require checking on
2632
    # all nodes to be modified.
2633
    self.needed_locks = {
2634
      locking.LEVEL_NODE: locking.ALL_SET,
2635
    }
2636
    self.share_locks[locking.LEVEL_NODE] = 1
2637

    
2638
  def BuildHooksEnv(self):
2639
    """Build hooks env.
2640

2641
    """
2642
    env = {
2643
      "OP_TARGET": self.cfg.GetClusterName(),
2644
      "NEW_VG_NAME": self.op.vg_name,
2645
      }
2646
    mn = self.cfg.GetMasterNode()
2647
    return env, [mn], [mn]
2648

    
2649
  def CheckPrereq(self):
2650
    """Check prerequisites.
2651

2652
    This checks whether the given params don't conflict and
2653
    if the given volume group is valid.
2654

2655
    """
2656
    if self.op.vg_name is not None and not self.op.vg_name:
2657
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2658
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2659
                                   " instances exist", errors.ECODE_INVAL)
2660

    
2661
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2662
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2663
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2664
                                   " drbd-based instances exist",
2665
                                   errors.ECODE_INVAL)
2666

    
2667
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2668

    
2669
    # if vg_name not None, checks given volume group on all nodes
2670
    if self.op.vg_name:
2671
      vglist = self.rpc.call_vg_list(node_list)
2672
      for node in node_list:
2673
        msg = vglist[node].fail_msg
2674
        if msg:
2675
          # ignoring down node
2676
          self.LogWarning("Error while gathering data on node %s"
2677
                          " (ignoring node): %s", node, msg)
2678
          continue
2679
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2680
                                              self.op.vg_name,
2681
                                              constants.MIN_VG_SIZE)
2682
        if vgstatus:
2683
          raise errors.OpPrereqError("Error on node '%s': %s" %
2684
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2685

    
2686
    if self.op.drbd_helper:
2687
      # checks given drbd helper on all nodes
2688
      helpers = self.rpc.call_drbd_helper(node_list)
2689
      for node in node_list:
2690
        ninfo = self.cfg.GetNodeInfo(node)
2691
        if ninfo.offline:
2692
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2693
          continue
2694
        msg = helpers[node].fail_msg
2695
        if msg:
2696
          raise errors.OpPrereqError("Error checking drbd helper on node"
2697
                                     " '%s': %s" % (node, msg),
2698
                                     errors.ECODE_ENVIRON)
2699
        node_helper = helpers[node].payload
2700
        if node_helper != self.op.drbd_helper:
2701
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2702
                                     (node, node_helper), errors.ECODE_ENVIRON)
2703

    
2704
    self.cluster = cluster = self.cfg.GetClusterInfo()
2705
    # validate params changes
2706
    if self.op.beparams:
2707
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2708
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2709

    
2710
    if self.op.nicparams:
2711
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2712
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2713
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2714
      nic_errors = []
2715

    
2716
      # check all instances for consistency
2717
      for instance in self.cfg.GetAllInstancesInfo().values():
2718
        for nic_idx, nic in enumerate(instance.nics):
2719
          params_copy = copy.deepcopy(nic.nicparams)
2720
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2721

    
2722
          # check parameter syntax
2723
          try:
2724
            objects.NIC.CheckParameterSyntax(params_filled)
2725
          except errors.ConfigurationError, err:
2726
            nic_errors.append("Instance %s, nic/%d: %s" %
2727
                              (instance.name, nic_idx, err))
2728

    
2729
          # if we're moving instances to routed, check that they have an ip
2730
          target_mode = params_filled[constants.NIC_MODE]
2731
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2732
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2733
                              (instance.name, nic_idx))
2734
      if nic_errors:
2735
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2736
                                   "\n".join(nic_errors))
2737

    
2738
    # hypervisor list/parameters
2739
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2740
    if self.op.hvparams:
2741
      for hv_name, hv_dict in self.op.hvparams.items():
2742
        if hv_name not in self.new_hvparams:
2743
          self.new_hvparams[hv_name] = hv_dict
2744
        else:
2745
          self.new_hvparams[hv_name].update(hv_dict)
2746

    
2747
    # os hypervisor parameters
2748
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2749
    if self.op.os_hvp:
2750
      for os_name, hvs in self.op.os_hvp.items():
2751
        if os_name not in self.new_os_hvp:
2752
          self.new_os_hvp[os_name] = hvs
2753
        else:
2754
          for hv_name, hv_dict in hvs.items():
2755
            if hv_name not in self.new_os_hvp[os_name]:
2756
              self.new_os_hvp[os_name][hv_name] = hv_dict
2757
            else:
2758
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2759

    
2760
    # os parameters
2761
    self.new_osp = objects.FillDict(cluster.osparams, {})
2762
    if self.op.osparams:
2763
      for os_name, osp in self.op.osparams.items():
2764
        if os_name not in self.new_osp:
2765
          self.new_osp[os_name] = {}
2766

    
2767
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2768
                                                  use_none=True)
2769

    
2770
        if not self.new_osp[os_name]:
2771
          # we removed all parameters
2772
          del self.new_osp[os_name]
2773
        else:
2774
          # check the parameter validity (remote check)
2775
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2776
                         os_name, self.new_osp[os_name])
2777

    
2778
    # changes to the hypervisor list
2779
    if self.op.enabled_hypervisors is not None:
2780
      self.hv_list = self.op.enabled_hypervisors
2781
      for hv in self.hv_list:
2782
        # if the hypervisor doesn't already exist in the cluster
2783
        # hvparams, we initialize it to empty, and then (in both
2784
        # cases) we make sure to fill the defaults, as we might not
2785
        # have a complete defaults list if the hypervisor wasn't
2786
        # enabled before
2787
        if hv not in new_hvp:
2788
          new_hvp[hv] = {}
2789
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2790
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2791
    else:
2792
      self.hv_list = cluster.enabled_hypervisors
2793

    
2794
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2795
      # either the enabled list has changed, or the parameters have, validate
2796
      for hv_name, hv_params in self.new_hvparams.items():
2797
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2798
            (self.op.enabled_hypervisors and
2799
             hv_name in self.op.enabled_hypervisors)):
2800
          # either this is a new hypervisor, or its parameters have changed
2801
          hv_class = hypervisor.GetHypervisor(hv_name)
2802
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2803
          hv_class.CheckParameterSyntax(hv_params)
2804
          _CheckHVParams(self, node_list, hv_name, hv_params)
2805

    
2806
    if self.op.os_hvp:
2807
      # no need to check any newly-enabled hypervisors, since the
2808
      # defaults have already been checked in the above code-block
2809
      for os_name, os_hvp in self.new_os_hvp.items():
2810
        for hv_name, hv_params in os_hvp.items():
2811
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2812
          # we need to fill in the new os_hvp on top of the actual hv_p
2813
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2814
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2815
          hv_class = hypervisor.GetHypervisor(hv_name)
2816
          hv_class.CheckParameterSyntax(new_osp)
2817
          _CheckHVParams(self, node_list, hv_name, new_osp)
2818

    
2819
    if self.op.default_iallocator:
2820
      alloc_script = utils.FindFile(self.op.default_iallocator,
2821
                                    constants.IALLOCATOR_SEARCH_PATH,
2822
                                    os.path.isfile)
2823
      if alloc_script is None:
2824
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2825
                                   " specified" % self.op.default_iallocator,
2826
                                   errors.ECODE_INVAL)
2827

    
2828
  def Exec(self, feedback_fn):
2829
    """Change the parameters of the cluster.
2830

2831
    """
2832
    if self.op.vg_name is not None:
2833
      new_volume = self.op.vg_name
2834
      if not new_volume:
2835
        new_volume = None
2836
      if new_volume != self.cfg.GetVGName():
2837
        self.cfg.SetVGName(new_volume)
2838
      else:
2839
        feedback_fn("Cluster LVM configuration already in desired"
2840
                    " state, not changing")
2841
    if self.op.drbd_helper is not None:
2842
      new_helper = self.op.drbd_helper
2843
      if not new_helper:
2844
        new_helper = None
2845
      if new_helper != self.cfg.GetDRBDHelper():
2846
        self.cfg.SetDRBDHelper(new_helper)
2847
      else:
2848
        feedback_fn("Cluster DRBD helper already in desired state,"
2849
                    " not changing")
2850
    if self.op.hvparams:
2851
      self.cluster.hvparams = self.new_hvparams
2852
    if self.op.os_hvp:
2853
      self.cluster.os_hvp = self.new_os_hvp
2854
    if self.op.enabled_hypervisors is not None:
2855
      self.cluster.hvparams = self.new_hvparams
2856
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2857
    if self.op.beparams:
2858
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2859
    if self.op.nicparams:
2860
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2861
    if self.op.osparams:
2862
      self.cluster.osparams = self.new_osp
2863

    
2864
    if self.op.candidate_pool_size is not None:
2865
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2866
      # we need to update the pool size here, otherwise the save will fail
2867
      _AdjustCandidatePool(self, [])
2868

    
2869
    if self.op.maintain_node_health is not None:
2870
      self.cluster.maintain_node_health = self.op.maintain_node_health
2871

    
2872
    if self.op.add_uids is not None:
2873
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2874

    
2875
    if self.op.remove_uids is not None:
2876
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2877

    
2878
    if self.op.uid_pool is not None:
2879
      self.cluster.uid_pool = self.op.uid_pool
2880

    
2881
    if self.op.default_iallocator is not None:
2882
      self.cluster.default_iallocator = self.op.default_iallocator
2883

    
2884
    if self.op.reserved_lvs is not None:
2885
      self.cluster.reserved_lvs = self.op.reserved_lvs
2886

    
2887
    self.cfg.Update(self.cluster, feedback_fn)
2888

    
2889

    
2890
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2891
  """Distribute additional files which are part of the cluster configuration.
2892

2893
  ConfigWriter takes care of distributing the config and ssconf files, but
2894
  there are more files which should be distributed to all nodes. This function
2895
  makes sure those are copied.
2896

2897
  @param lu: calling logical unit
2898
  @param additional_nodes: list of nodes not in the config to distribute to
2899

2900
  """
2901
  # 1. Gather target nodes
2902
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2903
  dist_nodes = lu.cfg.GetOnlineNodeList()
2904
  if additional_nodes is not None:
2905
    dist_nodes.extend(additional_nodes)
2906
  if myself.name in dist_nodes:
2907
    dist_nodes.remove(myself.name)
2908

    
2909
  # 2. Gather files to distribute
2910
  dist_files = set([constants.ETC_HOSTS,
2911
                    constants.SSH_KNOWN_HOSTS_FILE,
2912
                    constants.RAPI_CERT_FILE,
2913
                    constants.RAPI_USERS_FILE,
2914
                    constants.CONFD_HMAC_KEY,
2915
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2916
                   ])
2917

    
2918
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2919
  for hv_name in enabled_hypervisors:
2920
    hv_class = hypervisor.GetHypervisor(hv_name)
2921
    dist_files.update(hv_class.GetAncillaryFiles())
2922

    
2923
  # 3. Perform the files upload
2924
  for fname in dist_files:
2925
    if os.path.exists(fname):
2926
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2927
      for to_node, to_result in result.items():
2928
        msg = to_result.fail_msg
2929
        if msg:
2930
          msg = ("Copy of file %s to node %s failed: %s" %
2931
                 (fname, to_node, msg))
2932
          lu.proc.LogWarning(msg)
2933

    
2934

    
2935
class LURedistributeConfig(NoHooksLU):
2936
  """Force the redistribution of cluster configuration.
2937

2938
  This is a very simple LU.
2939

2940
  """
2941
  REQ_BGL = False
2942

    
2943
  def ExpandNames(self):
2944
    self.needed_locks = {
2945
      locking.LEVEL_NODE: locking.ALL_SET,
2946
    }
2947
    self.share_locks[locking.LEVEL_NODE] = 1
2948

    
2949
  def Exec(self, feedback_fn):
2950
    """Redistribute the configuration.
2951

2952
    """
2953
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2954
    _RedistributeAncillaryFiles(self)
2955

    
2956

    
2957
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2958
  """Sleep and poll for an instance's disk to sync.
2959

2960
  """
2961
  if not instance.disks or disks is not None and not disks:
2962
    return True
2963

    
2964
  disks = _ExpandCheckDisks(instance, disks)
2965

    
2966
  if not oneshot:
2967
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2968

    
2969
  node = instance.primary_node
2970

    
2971
  for dev in disks:
2972
    lu.cfg.SetDiskID(dev, node)
2973

    
2974
  # TODO: Convert to utils.Retry
2975

    
2976
  retries = 0
2977
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2978
  while True:
2979
    max_time = 0
2980
    done = True
2981
    cumul_degraded = False
2982
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2983
    msg = rstats.fail_msg
2984
    if msg:
2985
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2986
      retries += 1
2987
      if retries >= 10:
2988
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2989
                                 " aborting." % node)
2990
      time.sleep(6)
2991
      continue
2992
    rstats = rstats.payload
2993
    retries = 0
2994
    for i, mstat in enumerate(rstats):
2995
      if mstat is None:
2996
        lu.LogWarning("Can't compute data for node %s/%s",
2997
                           node, disks[i].iv_name)
2998
        continue
2999

    
3000
      cumul_degraded = (cumul_degraded or
3001
                        (mstat.is_degraded and mstat.sync_percent is None))
3002
      if mstat.sync_percent is not None:
3003
        done = False
3004
        if mstat.estimated_time is not None:
3005
          rem_time = ("%s remaining (estimated)" %
3006
                      utils.FormatSeconds(mstat.estimated_time))
3007
          max_time = mstat.estimated_time
3008
        else:
3009
          rem_time = "no time estimate"
3010
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3011
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3012

    
3013
    # if we're done but degraded, let's do a few small retries, to
3014
    # make sure we see a stable and not transient situation; therefore
3015
    # we force restart of the loop
3016
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3017
      logging.info("Degraded disks found, %d retries left", degr_retries)
3018
      degr_retries -= 1
3019
      time.sleep(1)
3020
      continue
3021

    
3022
    if done or oneshot:
3023
      break
3024

    
3025
    time.sleep(min(60, max_time))
3026

    
3027
  if done:
3028
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3029
  return not cumul_degraded
3030

    
3031

    
3032
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3033
  """Check that mirrors are not degraded.
3034

3035
  The ldisk parameter, if True, will change the test from the
3036
  is_degraded attribute (which represents overall non-ok status for
3037
  the device(s)) to the ldisk (representing the local storage status).
3038

3039
  """
3040
  lu.cfg.SetDiskID(dev, node)
3041

    
3042
  result = True
3043

    
3044
  if on_primary or dev.AssembleOnSecondary():
3045
    rstats = lu.rpc.call_blockdev_find(node, dev)
3046
    msg = rstats.fail_msg
3047
    if msg:
3048
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3049
      result = False
3050
    elif not rstats.payload:
3051
      lu.LogWarning("Can't find disk on node %s", node)
3052
      result = False
3053
    else:
3054
      if ldisk:
3055
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3056
      else:
3057
        result = result and not rstats.payload.is_degraded
3058

    
3059
  if dev.children:
3060
    for child in dev.children:
3061
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3062

    
3063
  return result
3064

    
3065

    
3066
class LUDiagnoseOS(NoHooksLU):
3067
  """Logical unit for OS diagnose/query.
3068

3069
  """
3070
  _OP_PARAMS = [
3071
    _POutputFields,
3072
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3073
    ]
3074
  REQ_BGL = False
3075
  _HID = "hidden"
3076
  _BLK = "blacklisted"
3077
  _FIELDS_STATIC = utils.FieldSet()
3078
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
3079
                                   "parameters", "api_versions", _HID, _BLK)
3080

    
3081
  def CheckArguments(self):
3082
    if self.op.names:
3083
      raise errors.OpPrereqError("Selective OS query not supported",
3084
                                 errors.ECODE_INVAL)
3085

    
3086
    _CheckOutputFields(static=self._FIELDS_STATIC,
3087
                       dynamic=self._FIELDS_DYNAMIC,
3088
                       selected=self.op.output_fields)
3089

    
3090
  def ExpandNames(self):
3091
    # Lock all nodes, in shared mode
3092
    # Temporary removal of locks, should be reverted later
3093
    # TODO: reintroduce locks when they are lighter-weight
3094
    self.needed_locks = {}
3095
    #self.share_locks[locking.LEVEL_NODE] = 1
3096
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3097

    
3098
  @staticmethod
3099
  def _DiagnoseByOS(rlist):
3100
    """Remaps a per-node return list into an a per-os per-node dictionary
3101

3102
    @param rlist: a map with node names as keys and OS objects as values
3103

3104
    @rtype: dict
3105
    @return: a dictionary with osnames as keys and as value another
3106
        map, with nodes as keys and tuples of (path, status, diagnose,
3107
        variants, parameters, api_versions) as values, eg::
3108

3109
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3110
                                     (/srv/..., False, "invalid api")],
3111
                           "node2": [(/srv/..., True, "", [], [])]}
3112
          }
3113

3114
    """
3115
    all_os = {}
3116
    # we build here the list of nodes that didn't fail the RPC (at RPC
3117
    # level), so that nodes with a non-responding node daemon don't
3118
    # make all OSes invalid
3119
    good_nodes = [node_name for node_name in rlist
3120
                  if not rlist[node_name].fail_msg]
3121
    for node_name, nr in rlist.items():
3122
      if nr.fail_msg or not nr.payload:
3123
        continue
3124
      for (name, path, status, diagnose, variants,
3125
           params, api_versions) in nr.payload:
3126
        if name not in all_os:
3127
          # build a list of nodes for this os containing empty lists
3128
          # for each node in node_list
3129
          all_os[name] = {}
3130
          for nname in good_nodes:
3131
            all_os[name][nname] = []
3132
        # convert params from [name, help] to (name, help)
3133
        params = [tuple(v) for v in params]
3134
        all_os[name][node_name].append((path, status, diagnose,
3135
                                        variants, params, api_versions))
3136
    return all_os
3137

    
3138
  def Exec(self, feedback_fn):
3139
    """Compute the list of OSes.
3140

3141
    """
3142
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3143
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3144
    pol = self._DiagnoseByOS(node_data)
3145
    output = []
3146
    cluster = self.cfg.GetClusterInfo()
3147

    
3148
    for os_name, os_data in pol.items():
3149
      row = []
3150
      valid = True
3151
      (variants, params, api_versions) = null_state = (set(), set(), set())
3152
      for idx, osl in enumerate(os_data.values()):
3153
        valid = bool(valid and osl and osl[0][1])
3154
        if not valid:
3155
          (variants, params, api_versions) = null_state
3156
          break
3157
        node_variants, node_params, node_api = osl[0][3:6]
3158
        if idx == 0: # first entry
3159
          variants = set(node_variants)
3160
          params = set(node_params)
3161
          api_versions = set(node_api)
3162
        else: # keep consistency
3163
          variants.intersection_update(node_variants)
3164
          params.intersection_update(node_params)
3165
          api_versions.intersection_update(node_api)
3166

    
3167
      is_hid = os_name in cluster.hidden_oss
3168
      is_blk = os_name in cluster.blacklisted_oss
3169
      if ((self._HID not in self.op.output_fields and is_hid) or
3170
          (self._BLK not in self.op.output_fields and is_blk)):
3171
        continue
3172

    
3173
      for field in self.op.output_fields:
3174
        if field == "name":
3175
          val = os_name
3176
        elif field == "valid":
3177
          val = valid
3178
        elif field == "node_status":
3179
          # this is just a copy of the dict
3180
          val = {}
3181
          for node_name, nos_list in os_data.items():
3182
            val[node_name] = nos_list
3183
        elif field == "variants":
3184
          val = list(variants)
3185
        elif field == "parameters":
3186
          val = list(params)
3187
        elif field == "api_versions":
3188
          val = list(api_versions)
3189
        elif field == self._HID:
3190
          val = is_hid
3191
        elif field == self._BLK:
3192
          val = is_blk
3193
        else:
3194
          raise errors.ParameterError(field)
3195
        row.append(val)
3196
      output.append(row)
3197

    
3198
    return output
3199

    
3200

    
3201
class LURemoveNode(LogicalUnit):
3202
  """Logical unit for removing a node.
3203

3204
  """
3205
  HPATH = "node-remove"
3206
  HTYPE = constants.HTYPE_NODE
3207
  _OP_PARAMS = [
3208
    _PNodeName,
3209
    ]
3210

    
3211
  def BuildHooksEnv(self):
3212
    """Build hooks env.
3213

3214
    This doesn't run on the target node in the pre phase as a failed
3215
    node would then be impossible to remove.
3216

3217
    """
3218
    env = {
3219
      "OP_TARGET": self.op.node_name,
3220
      "NODE_NAME": self.op.node_name,
3221
      }
3222
    all_nodes = self.cfg.GetNodeList()
3223
    try:
3224
      all_nodes.remove(self.op.node_name)
3225
    except ValueError:
3226
      logging.warning("Node %s which is about to be removed not found"
3227
                      " in the all nodes list", self.op.node_name)
3228
    return env, all_nodes, all_nodes
3229

    
3230
  def CheckPrereq(self):
3231
    """Check prerequisites.
3232

3233
    This checks:
3234
     - the node exists in the configuration
3235
     - it does not have primary or secondary instances
3236
     - it's not the master
3237

3238
    Any errors are signaled by raising errors.OpPrereqError.
3239

3240
    """
3241
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3242
    node = self.cfg.GetNodeInfo(self.op.node_name)
3243
    assert node is not None
3244

    
3245
    instance_list = self.cfg.GetInstanceList()
3246

    
3247
    masternode = self.cfg.GetMasterNode()
3248
    if node.name == masternode:
3249
      raise errors.OpPrereqError("Node is the master node,"
3250
                                 " you need to failover first.",
3251
                                 errors.ECODE_INVAL)
3252

    
3253
    for instance_name in instance_list:
3254
      instance = self.cfg.GetInstanceInfo(instance_name)
3255
      if node.name in instance.all_nodes:
3256
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3257
                                   " please remove first." % instance_name,
3258
                                   errors.ECODE_INVAL)
3259
    self.op.node_name = node.name
3260
    self.node = node
3261

    
3262
  def Exec(self, feedback_fn):
3263
    """Removes the node from the cluster.
3264

3265
    """
3266
    node = self.node
3267
    logging.info("Stopping the node daemon and removing configs from node %s",
3268
                 node.name)
3269

    
3270
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3271

    
3272
    # Promote nodes to master candidate as needed
3273
    _AdjustCandidatePool(self, exceptions=[node.name])
3274
    self.context.RemoveNode(node.name)
3275

    
3276
    # Run post hooks on the node before it's removed
3277
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3278
    try:
3279
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3280
    except:
3281
      # pylint: disable-msg=W0702
3282
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3283

    
3284
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3285
    msg = result.fail_msg
3286
    if msg:
3287
      self.LogWarning("Errors encountered on the remote node while leaving"
3288
                      " the cluster: %s", msg)
3289

    
3290
    # Remove node from our /etc/hosts
3291
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3292
      # FIXME: this should be done via an rpc call to node daemon
3293
      utils.RemoveHostFromEtcHosts(node.name)
3294
      _RedistributeAncillaryFiles(self)
3295

    
3296

    
3297
class LUQueryNodes(NoHooksLU):
3298
  """Logical unit for querying nodes.
3299

3300
  """
3301
  # pylint: disable-msg=W0142
3302
  _OP_PARAMS = [
3303
    _POutputFields,
3304
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3305
    ("use_locking", False, _TBool),
3306
    ]
3307
  REQ_BGL = False
3308

    
3309
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3310
                    "master_candidate", "offline", "drained"]
3311

    
3312
  _FIELDS_DYNAMIC = utils.FieldSet(
3313
    "dtotal", "dfree",
3314
    "mtotal", "mnode", "mfree",
3315
    "bootid",
3316
    "ctotal", "cnodes", "csockets",
3317
    )
3318

    
3319
  _FIELDS_STATIC = utils.FieldSet(*[
3320
    "pinst_cnt", "sinst_cnt",
3321
    "pinst_list", "sinst_list",
3322
    "pip", "sip", "tags",
3323
    "master",
3324
    "role"] + _SIMPLE_FIELDS
3325
    )
3326

    
3327
  def CheckArguments(self):
3328
    _CheckOutputFields(static=self._FIELDS_STATIC,
3329
                       dynamic=self._FIELDS_DYNAMIC,
3330
                       selected=self.op.output_fields)
3331

    
3332
  def ExpandNames(self):
3333
    self.needed_locks = {}
3334
    self.share_locks[locking.LEVEL_NODE] = 1
3335

    
3336
    if self.op.names:
3337
      self.wanted = _GetWantedNodes(self, self.op.names)
3338
    else:
3339
      self.wanted = locking.ALL_SET
3340

    
3341
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3342
    self.do_locking = self.do_node_query and self.op.use_locking
3343
    if self.do_locking:
3344
      # if we don't request only static fields, we need to lock the nodes
3345
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
3346

    
3347
  def Exec(self, feedback_fn):
3348
    """Computes the list of nodes and their attributes.
3349

3350
    """
3351
    all_info = self.cfg.GetAllNodesInfo()
3352
    if self.do_locking:
3353
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
3354
    elif self.wanted != locking.ALL_SET:
3355
      nodenames = self.wanted
3356
      missing = set(nodenames).difference(all_info.keys())
3357
      if missing:
3358
        raise errors.OpExecError(
3359
          "Some nodes were removed before retrieving their data: %s" % missing)
3360
    else:
3361
      nodenames = all_info.keys()
3362

    
3363
    nodenames = utils.NiceSort(nodenames)
3364
    nodelist = [all_info[name] for name in nodenames]
3365

    
3366
    # begin data gathering
3367

    
3368
    if self.do_node_query:
3369
      live_data = {}
3370
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3371
                                          self.cfg.GetHypervisorType())
3372
      for name in nodenames:
3373
        nodeinfo = node_data[name]
3374
        if not nodeinfo.fail_msg and nodeinfo.payload:
3375
          nodeinfo = nodeinfo.payload
3376
          fn = utils.TryConvert
3377
          live_data[name] = {
3378
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3379
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3380
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
3381
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3382
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3383
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3384
            "bootid": nodeinfo.get('bootid', None),
3385
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3386
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3387
            }
3388
        else:
3389
          live_data[name] = {}
3390
    else:
3391
      live_data = dict.fromkeys(nodenames, {})
3392

    
3393
    node_to_primary = dict([(name, set()) for name in nodenames])
3394
    node_to_secondary = dict([(name, set()) for name in nodenames])
3395

    
3396
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3397
                             "sinst_cnt", "sinst_list"))
3398
    if inst_fields & frozenset(self.op.output_fields):
3399
      inst_data = self.cfg.GetAllInstancesInfo()
3400

    
3401
      for inst in inst_data.values():
3402
        if inst.primary_node in node_to_primary:
3403
          node_to_primary[inst.primary_node].add(inst.name)
3404
        for secnode in inst.secondary_nodes:
3405
          if secnode in node_to_secondary:
3406
            node_to_secondary[secnode].add(inst.name)
3407

    
3408
    master_node = self.cfg.GetMasterNode()
3409

    
3410
    # end data gathering
3411

    
3412
    output = []
3413
    for node in nodelist:
3414
      node_output = []
3415
      for field in self.op.output_fields:
3416
        if field in self._SIMPLE_FIELDS:
3417
          val = getattr(node, field)
3418
        elif field == "pinst_list":
3419
          val = list(node_to_primary[node.name])
3420
        elif field == "sinst_list":
3421
          val = list(node_to_secondary[node.name])
3422
        elif field == "pinst_cnt":
3423
          val = len(node_to_primary[node.name])
3424
        elif field == "sinst_cnt":
3425
          val = len(node_to_secondary[node.name])
3426
        elif field == "pip":
3427
          val = node.primary_ip
3428
        elif field == "sip":
3429
          val = node.secondary_ip
3430
        elif field == "tags":
3431
          val = list(node.GetTags())
3432
        elif field == "master":
3433
          val = node.name == master_node
3434
        elif self._FIELDS_DYNAMIC.Matches(field):
3435
          val = live_data[node.name].get(field, None)
3436
        elif field == "role":
3437
          if node.name == master_node:
3438
            val = "M"
3439
          elif node.master_candidate:
3440
            val = "C"
3441
          elif node.drained:
3442
            val = "D"
3443
          elif node.offline:
3444
            val = "O"
3445
          else:
3446
            val = "R"
3447
        else:
3448
          raise errors.ParameterError(field)
3449
        node_output.append(val)
3450
      output.append(node_output)
3451

    
3452
    return output
3453

    
3454

    
3455
class LUQueryNodeVolumes(NoHooksLU):
3456
  """Logical unit for getting volumes on node(s).
3457

3458
  """
3459
  _OP_PARAMS = [
3460
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3461
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3462
    ]
3463
  REQ_BGL = False
3464
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3465
  _FIELDS_STATIC = utils.FieldSet("node")
3466

    
3467
  def CheckArguments(self):
3468
    _CheckOutputFields(static=self._FIELDS_STATIC,
3469
                       dynamic=self._FIELDS_DYNAMIC,
3470
                       selected=self.op.output_fields)
3471

    
3472
  def ExpandNames(self):
3473
    self.needed_locks = {}
3474
    self.share_locks[locking.LEVEL_NODE] = 1
3475
    if not self.op.nodes:
3476
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3477
    else:
3478
      self.needed_locks[locking.LEVEL_NODE] = \
3479
        _GetWantedNodes(self, self.op.nodes)
3480

    
3481
  def Exec(self, feedback_fn):
3482
    """Computes the list of nodes and their attributes.
3483

3484
    """
3485
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3486
    volumes = self.rpc.call_node_volumes(nodenames)
3487

    
3488
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3489
             in self.cfg.GetInstanceList()]
3490

    
3491
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3492

    
3493
    output = []
3494
    for node in nodenames:
3495
      nresult = volumes[node]
3496
      if nresult.offline:
3497
        continue
3498
      msg = nresult.fail_msg
3499
      if msg:
3500
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3501
        continue
3502

    
3503
      node_vols = nresult.payload[:]
3504
      node_vols.sort(key=lambda vol: vol['dev'])
3505

    
3506
      for vol in node_vols:
3507
        node_output = []
3508
        for field in self.op.output_fields:
3509
          if field == "node":
3510
            val = node
3511
          elif field == "phys":
3512
            val = vol['dev']
3513
          elif field == "vg":
3514
            val = vol['vg']
3515
          elif field == "name":
3516
            val = vol['name']
3517
          elif field == "size":
3518
            val = int(float(vol['size']))
3519
          elif field == "instance":
3520
            for inst in ilist:
3521
              if node not in lv_by_node[inst]:
3522
                continue
3523
              if vol['name'] in lv_by_node[inst][node]:
3524
                val = inst.name
3525
                break
3526
            else:
3527
              val = '-'
3528
          else:
3529
            raise errors.ParameterError(field)
3530
          node_output.append(str(val))
3531

    
3532
        output.append(node_output)
3533

    
3534
    return output
3535

    
3536

    
3537
class LUQueryNodeStorage(NoHooksLU):
3538
  """Logical unit for getting information on storage units on node(s).
3539

3540
  """
3541
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3542
  _OP_PARAMS = [
3543
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3544
    ("storage_type", _NoDefault, _CheckStorageType),
3545
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3546
    ("name", None, _TMaybeString),
3547
    ]
3548
  REQ_BGL = False
3549

    
3550
  def CheckArguments(self):
3551
    _CheckOutputFields(static=self._FIELDS_STATIC,
3552
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3553
                       selected=self.op.output_fields)
3554

    
3555
  def ExpandNames(self):
3556
    self.needed_locks = {}
3557
    self.share_locks[locking.LEVEL_NODE] = 1
3558

    
3559
    if self.op.nodes:
3560
      self.needed_locks[locking.LEVEL_NODE] = \
3561
        _GetWantedNodes(self, self.op.nodes)
3562
    else:
3563
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3564

    
3565
  def Exec(self, feedback_fn):
3566
    """Computes the list of nodes and their attributes.
3567

3568
    """
3569
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3570

    
3571
    # Always get name to sort by
3572
    if constants.SF_NAME in self.op.output_fields:
3573
      fields = self.op.output_fields[:]
3574
    else:
3575
      fields = [constants.SF_NAME] + self.op.output_fields
3576

    
3577
    # Never ask for node or type as it's only known to the LU
3578
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3579
      while extra in fields:
3580
        fields.remove(extra)
3581

    
3582
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3583
    name_idx = field_idx[constants.SF_NAME]
3584

    
3585
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3586
    data = self.rpc.call_storage_list(self.nodes,
3587
                                      self.op.storage_type, st_args,
3588
                                      self.op.name, fields)
3589

    
3590
    result = []
3591

    
3592
    for node in utils.NiceSort(self.nodes):
3593
      nresult = data[node]
3594
      if nresult.offline:
3595
        continue
3596

    
3597
      msg = nresult.fail_msg
3598
      if msg:
3599
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3600
        continue
3601

    
3602
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3603

    
3604
      for name in utils.NiceSort(rows.keys()):
3605
        row = rows[name]
3606

    
3607
        out = []
3608

    
3609
        for field in self.op.output_fields:
3610
          if field == constants.SF_NODE:
3611
            val = node
3612
          elif field == constants.SF_TYPE:
3613
            val = self.op.storage_type
3614
          elif field in field_idx:
3615
            val = row[field_idx[field]]
3616
          else:
3617
            raise errors.ParameterError(field)
3618

    
3619
          out.append(val)
3620

    
3621
        result.append(out)
3622

    
3623
    return result
3624

    
3625

    
3626
class LUModifyNodeStorage(NoHooksLU):
3627
  """Logical unit for modifying a storage volume on a node.
3628

3629
  """
3630
  _OP_PARAMS = [
3631
    _PNodeName,
3632
    ("storage_type", _NoDefault, _CheckStorageType),
3633
    ("name", _NoDefault, _TNonEmptyString),
3634
    ("changes", _NoDefault, _TDict),
3635
    ]
3636
  REQ_BGL = False
3637

    
3638
  def CheckArguments(self):
3639
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3640

    
3641
    storage_type = self.op.storage_type
3642

    
3643
    try:
3644
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3645
    except KeyError:
3646
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3647
                                 " modified" % storage_type,
3648
                                 errors.ECODE_INVAL)
3649

    
3650
    diff = set(self.op.changes.keys()) - modifiable
3651
    if diff:
3652
      raise errors.OpPrereqError("The following fields can not be modified for"
3653
                                 " storage units of type '%s': %r" %
3654
                                 (storage_type, list(diff)),
3655
                                 errors.ECODE_INVAL)
3656

    
3657
  def ExpandNames(self):
3658
    self.needed_locks = {
3659
      locking.LEVEL_NODE: self.op.node_name,
3660
      }
3661

    
3662
  def Exec(self, feedback_fn):
3663
    """Computes the list of nodes and their attributes.
3664

3665
    """
3666
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3667
    result = self.rpc.call_storage_modify(self.op.node_name,
3668
                                          self.op.storage_type, st_args,
3669
                                          self.op.name, self.op.changes)
3670
    result.Raise("Failed to modify storage unit '%s' on %s" %
3671
                 (self.op.name, self.op.node_name))
3672

    
3673

    
3674
class LUAddNode(LogicalUnit):
3675
  """Logical unit for adding node to the cluster.
3676

3677
  """
3678
  HPATH = "node-add"
3679
  HTYPE = constants.HTYPE_NODE
3680
  _OP_PARAMS = [
3681
    _PNodeName,
3682
    ("primary_ip", None, _NoType),
3683
    ("secondary_ip", None, _TMaybeString),
3684
    ("readd", False, _TBool),
3685
    ]
3686

    
3687
  def CheckArguments(self):
3688
    # validate/normalize the node name
3689
    self.op.node_name = netutils.HostInfo.NormalizeName(self.op.node_name)
3690

    
3691
  def BuildHooksEnv(self):
3692
    """Build hooks env.
3693

3694
    This will run on all nodes before, and on all nodes + the new node after.
3695

3696
    """
3697
    env = {
3698
      "OP_TARGET": self.op.node_name,
3699
      "NODE_NAME": self.op.node_name,
3700
      "NODE_PIP": self.op.primary_ip,
3701
      "NODE_SIP": self.op.secondary_ip,
3702
      }
3703
    nodes_0 = self.cfg.GetNodeList()
3704
    nodes_1 = nodes_0 + [self.op.node_name, ]
3705
    return env, nodes_0, nodes_1
3706

    
3707
  def CheckPrereq(self):
3708
    """Check prerequisites.
3709

3710
    This checks:
3711
     - the new node is not already in the config
3712
     - it is resolvable
3713
     - its parameters (single/dual homed) matches the cluster
3714

3715
    Any errors are signaled by raising errors.OpPrereqError.
3716

3717
    """
3718
    node_name = self.op.node_name
3719
    cfg = self.cfg
3720

    
3721
    dns_data = netutils.GetHostInfo(node_name)
3722

    
3723
    node = dns_data.name
3724
    primary_ip = self.op.primary_ip = dns_data.ip
3725
    if self.op.secondary_ip is None:
3726
      self.op.secondary_ip = primary_ip
3727
    if not netutils.IsValidIP4(self.op.secondary_ip):
3728
      raise errors.OpPrereqError("Invalid secondary IP given",
3729
                                 errors.ECODE_INVAL)
3730
    secondary_ip = self.op.secondary_ip
3731

    
3732
    node_list = cfg.GetNodeList()
3733
    if not self.op.readd and node in node_list:
3734
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3735
                                 node, errors.ECODE_EXISTS)
3736
    elif self.op.readd and node not in node_list:
3737
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3738
                                 errors.ECODE_NOENT)
3739

    
3740
    self.changed_primary_ip = False
3741

    
3742
    for existing_node_name in node_list:
3743
      existing_node = cfg.GetNodeInfo(existing_node_name)
3744

    
3745
      if self.op.readd and node == existing_node_name:
3746
        if existing_node.secondary_ip != secondary_ip:
3747
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3748
                                     " address configuration as before",
3749
                                     errors.ECODE_INVAL)
3750
        if existing_node.primary_ip != primary_ip:
3751
          self.changed_primary_ip = True
3752

    
3753
        continue
3754

    
3755
      if (existing_node.primary_ip == primary_ip or
3756
          existing_node.secondary_ip == primary_ip or
3757
          existing_node.primary_ip == secondary_ip or
3758
          existing_node.secondary_ip == secondary_ip):
3759
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3760
                                   " existing node %s" % existing_node.name,
3761
                                   errors.ECODE_NOTUNIQUE)
3762

    
3763
    # check that the type of the node (single versus dual homed) is the
3764
    # same as for the master
3765
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3766
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3767
    newbie_singlehomed = secondary_ip == primary_ip
3768
    if master_singlehomed != newbie_singlehomed:
3769
      if master_singlehomed:
3770
        raise errors.OpPrereqError("The master has no private ip but the"
3771
                                   " new node has one",
3772
                                   errors.ECODE_INVAL)
3773
      else:
3774
        raise errors.OpPrereqError("The master has a private ip but the"
3775
                                   " new node doesn't have one",
3776
                                   errors.ECODE_INVAL)
3777

    
3778
    # checks reachability
3779
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3780
      raise errors.OpPrereqError("Node not reachable by ping",
3781
                                 errors.ECODE_ENVIRON)
3782

    
3783
    if not newbie_singlehomed:
3784
      # check reachability from my secondary ip to newbie's secondary ip
3785
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3786
                           source=myself.secondary_ip):
3787
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3788
                                   " based ping to noded port",
3789
                                   errors.ECODE_ENVIRON)
3790

    
3791
    if self.op.readd:
3792
      exceptions = [node]
3793
    else:
3794
      exceptions = []
3795

    
3796
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3797

    
3798
    if self.op.readd:
3799
      self.new_node = self.cfg.GetNodeInfo(node)
3800
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3801
    else:
3802
      self.new_node = objects.Node(name=node,
3803
                                   primary_ip=primary_ip,
3804
                                   secondary_ip=secondary_ip,
3805
                                   master_candidate=self.master_candidate,
3806
                                   offline=False, drained=False)
3807

    
3808
  def Exec(self, feedback_fn):
3809
    """Adds the new node to the cluster.
3810

3811
    """
3812
    new_node = self.new_node
3813
    node = new_node.name
3814

    
3815
    # for re-adds, reset the offline/drained/master-candidate flags;
3816
    # we need to reset here, otherwise offline would prevent RPC calls
3817
    # later in the procedure; this also means that if the re-add
3818
    # fails, we are left with a non-offlined, broken node
3819
    if self.op.readd:
3820
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3821
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3822
      # if we demote the node, we do cleanup later in the procedure
3823
      new_node.master_candidate = self.master_candidate
3824
      if self.changed_primary_ip:
3825
        new_node.primary_ip = self.op.primary_ip
3826

    
3827
    # notify the user about any possible mc promotion
3828
    if new_node.master_candidate:
3829
      self.LogInfo("Node will be a master candidate")
3830

    
3831
    # check connectivity
3832
    result = self.rpc.call_version([node])[node]
3833
    result.Raise("Can't get version information from node %s" % node)
3834
    if constants.PROTOCOL_VERSION == result.payload:
3835
      logging.info("Communication to node %s fine, sw version %s match",
3836
                   node, result.payload)
3837
    else:
3838
      raise errors.OpExecError("Version mismatch master version %s,"
3839
                               " node version %s" %
3840
                               (constants.PROTOCOL_VERSION, result.payload))
3841

    
3842
    # setup ssh on node
3843
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3844
      logging.info("Copy ssh key to node %s", node)
3845
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3846
      keyarray = []
3847
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3848
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3849
                  priv_key, pub_key]
3850

    
3851
      for i in keyfiles:
3852
        keyarray.append(utils.ReadFile(i))
3853

    
3854
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3855
                                      keyarray[2], keyarray[3], keyarray[4],
3856
                                      keyarray[5])
3857
      result.Raise("Cannot transfer ssh keys to the new node")
3858

    
3859
    # Add node to our /etc/hosts, and add key to known_hosts
3860
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3861
      # FIXME: this should be done via an rpc call to node daemon
3862
      utils.AddHostToEtcHosts(new_node.name)
3863

    
3864
    if new_node.secondary_ip != new_node.primary_ip:
3865
      result = self.rpc.call_node_has_ip_address(new_node.name,
3866
                                                 new_node.secondary_ip)
3867
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3868
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3869
      if not result.payload:
3870
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3871
                                 " you gave (%s). Please fix and re-run this"
3872
                                 " command." % new_node.secondary_ip)
3873

    
3874
    node_verify_list = [self.cfg.GetMasterNode()]
3875
    node_verify_param = {
3876
      constants.NV_NODELIST: [node],
3877
      # TODO: do a node-net-test as well?
3878
    }
3879

    
3880
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3881
                                       self.cfg.GetClusterName())
3882
    for verifier in node_verify_list:
3883
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3884
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3885
      if nl_payload:
3886
        for failed in nl_payload:
3887
          feedback_fn("ssh/hostname verification failed"
3888
                      " (checking from %s): %s" %
3889
                      (verifier, nl_payload[failed]))
3890
        raise errors.OpExecError("ssh/hostname verification failed.")
3891

    
3892
    if self.op.readd:
3893
      _RedistributeAncillaryFiles(self)
3894
      self.context.ReaddNode(new_node)
3895
      # make sure we redistribute the config
3896
      self.cfg.Update(new_node, feedback_fn)
3897
      # and make sure the new node will not have old files around
3898
      if not new_node.master_candidate:
3899
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3900
        msg = result.fail_msg
3901
        if msg:
3902
          self.LogWarning("Node failed to demote itself from master"
3903
                          " candidate status: %s" % msg)
3904
    else:
3905
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3906
      self.context.AddNode(new_node, self.proc.GetECId())
3907

    
3908

    
3909
class LUSetNodeParams(LogicalUnit):
3910
  """Modifies the parameters of a node.
3911

3912
  """
3913
  HPATH = "node-modify"
3914
  HTYPE = constants.HTYPE_NODE
3915
  _OP_PARAMS = [
3916
    _PNodeName,
3917
    ("master_candidate", None, _TMaybeBool),
3918
    ("offline", None, _TMaybeBool),
3919
    ("drained", None, _TMaybeBool),
3920
    ("auto_promote", False, _TBool),
3921
    _PForce,
3922
    ]
3923
  REQ_BGL = False
3924

    
3925
  def CheckArguments(self):
3926
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3927
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3928
    if all_mods.count(None) == 3:
3929
      raise errors.OpPrereqError("Please pass at least one modification",
3930
                                 errors.ECODE_INVAL)
3931
    if all_mods.count(True) > 1:
3932
      raise errors.OpPrereqError("Can't set the node into more than one"
3933
                                 " state at the same time",
3934
                                 errors.ECODE_INVAL)
3935

    
3936
    # Boolean value that tells us whether we're offlining or draining the node
3937
    self.offline_or_drain = (self.op.offline == True or
3938
                             self.op.drained == True)
3939
    self.deoffline_or_drain = (self.op.offline == False or
3940
                               self.op.drained == False)
3941
    self.might_demote = (self.op.master_candidate == False or
3942
                         self.offline_or_drain)
3943

    
3944
    self.lock_all = self.op.auto_promote and self.might_demote
3945

    
3946

    
3947
  def ExpandNames(self):
3948
    if self.lock_all:
3949
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3950
    else:
3951
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3952

    
3953
  def BuildHooksEnv(self):
3954
    """Build hooks env.
3955

3956
    This runs on the master node.
3957

3958
    """
3959
    env = {
3960
      "OP_TARGET": self.op.node_name,
3961
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3962
      "OFFLINE": str(self.op.offline),
3963
      "DRAINED": str(self.op.drained),
3964
      }
3965
    nl = [self.cfg.GetMasterNode(),
3966
          self.op.node_name]
3967
    return env, nl, nl
3968

    
3969
  def CheckPrereq(self):
3970
    """Check prerequisites.
3971

3972
    This only checks the instance list against the existing names.
3973

3974
    """
3975
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3976

    
3977
    if (self.op.master_candidate is not None or
3978
        self.op.drained is not None or
3979
        self.op.offline is not None):
3980
      # we can't change the master's node flags
3981
      if self.op.node_name == self.cfg.GetMasterNode():
3982
        raise errors.OpPrereqError("The master role can be changed"
3983
                                   " only via master-failover",
3984
                                   errors.ECODE_INVAL)
3985

    
3986

    
3987
    if node.master_candidate and self.might_demote and not self.lock_all:
3988
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3989
      # check if after removing the current node, we're missing master
3990
      # candidates
3991
      (mc_remaining, mc_should, _) = \
3992
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3993
      if mc_remaining < mc_should:
3994
        raise errors.OpPrereqError("Not enough master candidates, please"
3995
                                   " pass auto_promote to allow promotion",
3996
                                   errors.ECODE_INVAL)
3997

    
3998
    if (self.op.master_candidate == True and
3999
        ((node.offline and not self.op.offline == False) or
4000
         (node.drained and not self.op.drained == False))):
4001
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
4002
                                 " to master_candidate" % node.name,
4003
                                 errors.ECODE_INVAL)
4004

    
4005
    # If we're being deofflined/drained, we'll MC ourself if needed
4006
    if (self.deoffline_or_drain and not self.offline_or_drain and not
4007
        self.op.master_candidate == True and not node.master_candidate):
4008
      self.op.master_candidate = _DecideSelfPromotion(self)
4009
      if self.op.master_candidate:
4010
        self.LogInfo("Autopromoting node to master candidate")
4011

    
4012
    return
4013

    
4014
  def Exec(self, feedback_fn):
4015
    """Modifies a node.
4016

4017
    """
4018
    node = self.node
4019

    
4020
    result = []
4021
    changed_mc = False
4022

    
4023
    if self.op.offline is not None:
4024
      node.offline = self.op.offline
4025
      result.append(("offline", str(self.op.offline)))
4026
      if self.op.offline == True:
4027
        if node.master_candidate:
4028
          node.master_candidate = False
4029
          changed_mc = True
4030
          result.append(("master_candidate", "auto-demotion due to offline"))
4031
        if node.drained:
4032
          node.drained = False
4033
          result.append(("drained", "clear drained status due to offline"))
4034

    
4035
    if self.op.master_candidate is not None:
4036
      node.master_candidate = self.op.master_candidate
4037
      changed_mc = True
4038
      result.append(("master_candidate", str(self.op.master_candidate)))
4039
      if self.op.master_candidate == False:
4040
        rrc = self.rpc.call_node_demote_from_mc(node.name)
4041
        msg = rrc.fail_msg
4042
        if msg:
4043
          self.LogWarning("Node failed to demote itself: %s" % msg)
4044

    
4045
    if self.op.drained is not None:
4046
      node.drained = self.op.drained
4047
      result.append(("drained", str(self.op.drained)))
4048
      if self.op.drained == True:
4049
        if node.master_candidate:
4050
          node.master_candidate = False
4051
          changed_mc = True
4052
          result.append(("master_candidate", "auto-demotion due to drain"))
4053
          rrc = self.rpc.call_node_demote_from_mc(node.name)
4054
          msg = rrc.fail_msg
4055
          if msg:
4056
            self.LogWarning("Node failed to demote itself: %s" % msg)
4057
        if node.offline:
4058
          node.offline = False
4059
          result.append(("offline", "clear offline status due to drain"))
4060

    
4061
    # we locked all nodes, we adjust the CP before updating this node
4062
    if self.lock_all:
4063
      _AdjustCandidatePool(self, [node.name])
4064

    
4065
    # this will trigger configuration file update, if needed
4066
    self.cfg.Update(node, feedback_fn)
4067

    
4068
    # this will trigger job queue propagation or cleanup
4069
    if changed_mc:
4070
      self.context.ReaddNode(node)
4071

    
4072
    return result
4073

    
4074

    
4075
class LUPowercycleNode(NoHooksLU):
4076
  """Powercycles a node.
4077

4078
  """
4079
  _OP_PARAMS = [
4080
    _PNodeName,
4081
    _PForce,
4082
    ]
4083
  REQ_BGL = False
4084

    
4085
  def CheckArguments(self):
4086
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4087
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4088
      raise errors.OpPrereqError("The node is the master and the force"
4089
                                 " parameter was not set",
4090
                                 errors.ECODE_INVAL)
4091

    
4092
  def ExpandNames(self):
4093
    """Locking for PowercycleNode.
4094

4095
    This is a last-resort option and shouldn't block on other
4096
    jobs. Therefore, we grab no locks.
4097

4098
    """
4099
    self.needed_locks = {}
4100

    
4101
  def Exec(self, feedback_fn):
4102
    """Reboots a node.
4103

4104
    """
4105
    result = self.rpc.call_node_powercycle(self.op.node_name,
4106
                                           self.cfg.GetHypervisorType())
4107
    result.Raise("Failed to schedule the reboot")
4108
    return result.payload
4109

    
4110

    
4111
class LUQueryClusterInfo(NoHooksLU):
4112
  """Query cluster configuration.
4113

4114
  """
4115
  REQ_BGL = False
4116

    
4117
  def ExpandNames(self):
4118
    self.needed_locks = {}
4119

    
4120
  def Exec(self, feedback_fn):
4121
    """Return cluster config.
4122

4123
    """
4124
    cluster = self.cfg.GetClusterInfo()
4125
    os_hvp = {}
4126

    
4127
    # Filter just for enabled hypervisors
4128
    for os_name, hv_dict in cluster.os_hvp.items():
4129
      os_hvp[os_name] = {}
4130
      for hv_name, hv_params in hv_dict.items():
4131
        if hv_name in cluster.enabled_hypervisors:
4132
          os_hvp[os_name][hv_name] = hv_params
4133

    
4134
    result = {
4135
      "software_version": constants.RELEASE_VERSION,
4136
      "protocol_version": constants.PROTOCOL_VERSION,
4137
      "config_version": constants.CONFIG_VERSION,
4138
      "os_api_version": max(constants.OS_API_VERSIONS),
4139
      "export_version": constants.EXPORT_VERSION,
4140
      "architecture": (platform.architecture()[0], platform.machine()),
4141
      "name": cluster.cluster_name,
4142
      "master": cluster.master_node,
4143
      "default_hypervisor": cluster.enabled_hypervisors[0],
4144
      "enabled_hypervisors": cluster.enabled_hypervisors,
4145
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4146
                        for hypervisor_name in cluster.enabled_hypervisors]),
4147
      "os_hvp": os_hvp,
4148
      "beparams": cluster.beparams,
4149
      "osparams": cluster.osparams,
4150
      "nicparams": cluster.nicparams,
4151
      "candidate_pool_size": cluster.candidate_pool_size,
4152
      "master_netdev": cluster.master_netdev,
4153
      "volume_group_name": cluster.volume_group_name,
4154
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4155
      "file_storage_dir": cluster.file_storage_dir,
4156
      "maintain_node_health": cluster.maintain_node_health,
4157
      "ctime": cluster.ctime,
4158
      "mtime": cluster.mtime,
4159
      "uuid": cluster.uuid,
4160
      "tags": list(cluster.GetTags()),
4161
      "uid_pool": cluster.uid_pool,
4162
      "default_iallocator": cluster.default_iallocator,
4163
      "reserved_lvs": cluster.reserved_lvs,
4164
      }
4165

    
4166
    return result
4167

    
4168

    
4169
class LUQueryConfigValues(NoHooksLU):
4170
  """Return configuration values.
4171

4172
  """
4173
  _OP_PARAMS = [_POutputFields]
4174
  REQ_BGL = False
4175
  _FIELDS_DYNAMIC = utils.FieldSet()
4176
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4177
                                  "watcher_pause")
4178

    
4179
  def CheckArguments(self):
4180
    _CheckOutputFields(static=self._FIELDS_STATIC,
4181
                       dynamic=self._FIELDS_DYNAMIC,
4182
                       selected=self.op.output_fields)
4183

    
4184
  def ExpandNames(self):
4185
    self.needed_locks = {}
4186

    
4187
  def Exec(self, feedback_fn):
4188
    """Dump a representation of the cluster config to the standard output.
4189

4190
    """
4191
    values = []
4192
    for field in self.op.output_fields:
4193
      if field == "cluster_name":
4194
        entry = self.cfg.GetClusterName()
4195
      elif field == "master_node":
4196
        entry = self.cfg.GetMasterNode()
4197
      elif field == "drain_flag":
4198
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4199
      elif field == "watcher_pause":
4200
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4201
      else:
4202
        raise errors.ParameterError(field)
4203
      values.append(entry)
4204
    return values
4205

    
4206

    
4207
class LUActivateInstanceDisks(NoHooksLU):
4208
  """Bring up an instance's disks.
4209

4210
  """
4211
  _OP_PARAMS = [
4212
    _PInstanceName,
4213
    ("ignore_size", False, _TBool),
4214
    ]
4215
  REQ_BGL = False
4216

    
4217
  def ExpandNames(self):
4218
    self._ExpandAndLockInstance()
4219
    self.needed_locks[locking.LEVEL_NODE] = []
4220
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4221

    
4222
  def DeclareLocks(self, level):
4223
    if level == locking.LEVEL_NODE:
4224
      self._LockInstancesNodes()
4225

    
4226
  def CheckPrereq(self):
4227
    """Check prerequisites.
4228

4229
    This checks that the instance is in the cluster.
4230

4231
    """
4232
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4233
    assert self.instance is not None, \
4234
      "Cannot retrieve locked instance %s" % self.op.instance_name
4235
    _CheckNodeOnline(self, self.instance.primary_node)
4236

    
4237
  def Exec(self, feedback_fn):
4238
    """Activate the disks.
4239

4240
    """
4241
    disks_ok, disks_info = \
4242
              _AssembleInstanceDisks(self, self.instance,
4243
                                     ignore_size=self.op.ignore_size)
4244
    if not disks_ok:
4245
      raise errors.OpExecError("Cannot activate block devices")
4246

    
4247
    return disks_info
4248

    
4249

    
4250
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4251
                           ignore_size=False):
4252
  """Prepare the block devices for an instance.
4253

4254
  This sets up the block devices on all nodes.
4255

4256
  @type lu: L{LogicalUnit}
4257
  @param lu: the logical unit on whose behalf we execute
4258
  @type instance: L{objects.Instance}
4259
  @param instance: the instance for whose disks we assemble
4260
  @type disks: list of L{objects.Disk} or None
4261
  @param disks: which disks to assemble (or all, if None)
4262
  @type ignore_secondaries: boolean
4263
  @param ignore_secondaries: if true, errors on secondary nodes
4264
      won't result in an error return from the function
4265
  @type ignore_size: boolean
4266
  @param ignore_size: if true, the current known size of the disk
4267
      will not be used during the disk activation, useful for cases
4268
      when the size is wrong
4269
  @return: False if the operation failed, otherwise a list of
4270
      (host, instance_visible_name, node_visible_name)
4271
      with the mapping from node devices to instance devices
4272

4273
  """
4274
  device_info = []
4275
  disks_ok = True
4276
  iname = instance.name
4277
  disks = _ExpandCheckDisks(instance, disks)
4278

    
4279
  # With the two passes mechanism we try to reduce the window of
4280
  # opportunity for the race condition of switching DRBD to primary
4281
  # before handshaking occured, but we do not eliminate it
4282

    
4283
  # The proper fix would be to wait (with some limits) until the
4284
  # connection has been made and drbd transitions from WFConnection
4285
  # into any other network-connected state (Connected, SyncTarget,
4286
  # SyncSource, etc.)
4287

    
4288
  # 1st pass, assemble on all nodes in secondary mode
4289
  for inst_disk in disks:
4290
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4291
      if ignore_size:
4292
        node_disk = node_disk.Copy()
4293
        node_disk.UnsetSize()
4294
      lu.cfg.SetDiskID(node_disk, node)
4295
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4296
      msg = result.fail_msg
4297
      if msg:
4298
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4299
                           " (is_primary=False, pass=1): %s",
4300
                           inst_disk.iv_name, node, msg)
4301
        if not ignore_secondaries:
4302
          disks_ok = False
4303

    
4304
  # FIXME: race condition on drbd migration to primary
4305

    
4306
  # 2nd pass, do only the primary node
4307
  for inst_disk in disks:
4308
    dev_path = None
4309

    
4310
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4311
      if node != instance.primary_node:
4312
        continue
4313
      if ignore_size:
4314
        node_disk = node_disk.Copy()
4315
        node_disk.UnsetSize()
4316
      lu.cfg.SetDiskID(node_disk, node)
4317
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4318
      msg = result.fail_msg
4319
      if msg:
4320
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4321
                           " (is_primary=True, pass=2): %s",
4322
                           inst_disk.iv_name, node, msg)
4323
        disks_ok = False
4324
      else:
4325
        dev_path = result.payload
4326

    
4327
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4328

    
4329
  # leave the disks configured for the primary node
4330
  # this is a workaround that would be fixed better by
4331
  # improving the logical/physical id handling
4332
  for disk in disks:
4333
    lu.cfg.SetDiskID(disk, instance.primary_node)
4334

    
4335
  return disks_ok, device_info
4336

    
4337

    
4338
def _StartInstanceDisks(lu, instance, force):
4339
  """Start the disks of an instance.
4340

4341
  """
4342
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4343
                                           ignore_secondaries=force)
4344
  if not disks_ok:
4345
    _ShutdownInstanceDisks(lu, instance)
4346
    if force is not None and not force:
4347
      lu.proc.LogWarning("", hint="If the message above refers to a"
4348
                         " secondary node,"
4349
                         " you can retry the operation using '--force'.")
4350
    raise errors.OpExecError("Disk consistency error")
4351

    
4352

    
4353
class LUDeactivateInstanceDisks(NoHooksLU):
4354
  """Shutdown an instance's disks.
4355

4356
  """
4357
  _OP_PARAMS = [
4358
    _PInstanceName,
4359
    ]
4360
  REQ_BGL = False
4361

    
4362
  def ExpandNames(self):
4363
    self._ExpandAndLockInstance()
4364
    self.needed_locks[locking.LEVEL_NODE] = []
4365
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4366

    
4367
  def DeclareLocks(self, level):
4368
    if level == locking.LEVEL_NODE:
4369
      self._LockInstancesNodes()
4370

    
4371
  def CheckPrereq(self):
4372
    """Check prerequisites.
4373

4374
    This checks that the instance is in the cluster.
4375

4376
    """
4377
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4378
    assert self.instance is not None, \
4379
      "Cannot retrieve locked instance %s" % self.op.instance_name
4380

    
4381
  def Exec(self, feedback_fn):
4382
    """Deactivate the disks
4383

4384
    """
4385
    instance = self.instance
4386
    _SafeShutdownInstanceDisks(self, instance)
4387

    
4388

    
4389
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4390
  """Shutdown block devices of an instance.
4391

4392
  This function checks if an instance is running, before calling
4393
  _ShutdownInstanceDisks.
4394

4395
  """
4396
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4397
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4398

    
4399

    
4400
def _ExpandCheckDisks(instance, disks):
4401
  """Return the instance disks selected by the disks list
4402

4403
  @type disks: list of L{objects.Disk} or None
4404
  @param disks: selected disks
4405
  @rtype: list of L{objects.Disk}
4406
  @return: selected instance disks to act on
4407

4408
  """
4409
  if disks is None:
4410
    return instance.disks
4411
  else:
4412
    if not set(disks).issubset(instance.disks):
4413
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4414
                                   " target instance")
4415
    return disks
4416

    
4417

    
4418
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4419
  """Shutdown block devices of an instance.
4420

4421
  This does the shutdown on all nodes of the instance.
4422

4423
  If the ignore_primary is false, errors on the primary node are
4424
  ignored.
4425

4426
  """
4427
  all_result = True
4428
  disks = _ExpandCheckDisks(instance, disks)
4429

    
4430
  for disk in disks:
4431
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4432
      lu.cfg.SetDiskID(top_disk, node)
4433
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4434
      msg = result.fail_msg
4435
      if msg:
4436
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4437
                      disk.iv_name, node, msg)
4438
        if not ignore_primary or node != instance.primary_node:
4439
          all_result = False
4440
  return all_result
4441

    
4442

    
4443
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4444
  """Checks if a node has enough free memory.
4445

4446
  This function check if a given node has the needed amount of free
4447
  memory. In case the node has less memory or we cannot get the
4448
  information from the node, this function raise an OpPrereqError
4449
  exception.
4450

4451
  @type lu: C{LogicalUnit}
4452
  @param lu: a logical unit from which we get configuration data
4453
  @type node: C{str}
4454
  @param node: the node to check
4455
  @type reason: C{str}
4456
  @param reason: string to use in the error message
4457
  @type requested: C{int}
4458
  @param requested: the amount of memory in MiB to check for
4459
  @type hypervisor_name: C{str}
4460
  @param hypervisor_name: the hypervisor to ask for memory stats
4461
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4462
      we cannot check the node
4463

4464
  """
4465
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4466
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4467
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4468
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4469
  if not isinstance(free_mem, int):
4470
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4471
                               " was '%s'" % (node, free_mem),
4472
                               errors.ECODE_ENVIRON)
4473
  if requested > free_mem:
4474
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4475
                               " needed %s MiB, available %s MiB" %
4476
                               (node, reason, requested, free_mem),
4477
                               errors.ECODE_NORES)
4478

    
4479

    
4480
def _CheckNodesFreeDisk(lu, nodenames, requested):
4481
  """Checks if nodes have enough free disk space in the default VG.
4482

4483
  This function check if all given nodes have the needed amount of
4484
  free disk. In case any node has less disk or we cannot get the
4485
  information from the node, this function raise an OpPrereqError
4486
  exception.
4487

4488
  @type lu: C{LogicalUnit}
4489
  @param lu: a logical unit from which we get configuration data
4490
  @type nodenames: C{list}
4491
  @param nodenames: the list of node names to check
4492
  @type requested: C{int}
4493
  @param requested: the amount of disk in MiB to check for
4494
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4495
      we cannot check the node
4496

4497
  """
4498
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4499
                                   lu.cfg.GetHypervisorType())
4500
  for node in nodenames:
4501
    info = nodeinfo[node]
4502
    info.Raise("Cannot get current information from node %s" % node,
4503
               prereq=True, ecode=errors.ECODE_ENVIRON)
4504
    vg_free = info.payload.get("vg_free", None)
4505
    if not isinstance(vg_free, int):
4506
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4507
                                 " result was '%s'" % (node, vg_free),
4508
                                 errors.ECODE_ENVIRON)
4509
    if requested > vg_free:
4510
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4511
                                 " required %d MiB, available %d MiB" %
4512
                                 (node, requested, vg_free),
4513
                                 errors.ECODE_NORES)
4514

    
4515

    
4516
class LUStartupInstance(LogicalUnit):
4517
  """Starts an instance.
4518

4519
  """
4520
  HPATH = "instance-start"
4521
  HTYPE = constants.HTYPE_INSTANCE
4522
  _OP_PARAMS = [
4523
    _PInstanceName,
4524
    _PForce,
4525
    ("hvparams", _EmptyDict, _TDict),
4526
    ("beparams", _EmptyDict, _TDict),
4527
    ]
4528
  REQ_BGL = False
4529

    
4530
  def CheckArguments(self):
4531
    # extra beparams
4532
    if self.op.beparams:
4533
      # fill the beparams dict
4534
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4535

    
4536
  def ExpandNames(self):
4537
    self._ExpandAndLockInstance()
4538

    
4539
  def BuildHooksEnv(self):
4540
    """Build hooks env.
4541

4542
    This runs on master, primary and secondary nodes of the instance.
4543

4544
    """
4545
    env = {
4546
      "FORCE": self.op.force,
4547
      }
4548
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4549
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4550
    return env, nl, nl
4551

    
4552
  def CheckPrereq(self):
4553
    """Check prerequisites.
4554

4555
    This checks that the instance is in the cluster.
4556

4557
    """
4558
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4559
    assert self.instance is not None, \
4560
      "Cannot retrieve locked instance %s" % self.op.instance_name
4561

    
4562
    # extra hvparams
4563
    if self.op.hvparams:
4564
      # check hypervisor parameter syntax (locally)
4565
      cluster = self.cfg.GetClusterInfo()
4566
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4567
      filled_hvp = cluster.FillHV(instance)
4568
      filled_hvp.update(self.op.hvparams)
4569
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4570
      hv_type.CheckParameterSyntax(filled_hvp)
4571
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4572

    
4573
    _CheckNodeOnline(self, instance.primary_node)
4574

    
4575
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4576
    # check bridges existence
4577
    _CheckInstanceBridgesExist(self, instance)
4578

    
4579
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4580
                                              instance.name,
4581
                                              instance.hypervisor)
4582
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4583
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4584
    if not remote_info.payload: # not running already
4585
      _CheckNodeFreeMemory(self, instance.primary_node,
4586
                           "starting instance %s" % instance.name,
4587
                           bep[constants.BE_MEMORY], instance.hypervisor)
4588

    
4589
  def Exec(self, feedback_fn):
4590
    """Start the instance.
4591

4592
    """
4593
    instance = self.instance
4594
    force = self.op.force
4595

    
4596
    self.cfg.MarkInstanceUp(instance.name)
4597

    
4598
    node_current = instance.primary_node
4599

    
4600
    _StartInstanceDisks(self, instance, force)
4601

    
4602
    result = self.rpc.call_instance_start(node_current, instance,
4603
                                          self.op.hvparams, self.op.beparams)
4604
    msg = result.fail_msg
4605
    if msg:
4606
      _ShutdownInstanceDisks(self, instance)
4607
      raise errors.OpExecError("Could not start instance: %s" % msg)
4608

    
4609

    
4610
class LURebootInstance(LogicalUnit):
4611
  """Reboot an instance.
4612

4613
  """
4614
  HPATH = "instance-reboot"
4615
  HTYPE = constants.HTYPE_INSTANCE
4616
  _OP_PARAMS = [
4617
    _PInstanceName,
4618
    ("ignore_secondaries", False, _TBool),
4619
    ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4620
    _PShutdownTimeout,
4621
    ]
4622
  REQ_BGL = False
4623

    
4624
  def ExpandNames(self):
4625
    self._ExpandAndLockInstance()
4626

    
4627
  def BuildHooksEnv(self):
4628
    """Build hooks env.
4629

4630
    This runs on master, primary and secondary nodes of the instance.
4631

4632
    """
4633
    env = {
4634
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4635
      "REBOOT_TYPE": self.op.reboot_type,
4636
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4637
      }
4638
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4639
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4640
    return env, nl, nl
4641

    
4642
  def CheckPrereq(self):
4643
    """Check prerequisites.
4644

4645
    This checks that the instance is in the cluster.
4646

4647
    """
4648
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4649
    assert self.instance is not None, \
4650
      "Cannot retrieve locked instance %s" % self.op.instance_name
4651

    
4652
    _CheckNodeOnline(self, instance.primary_node)
4653

    
4654
    # check bridges existence
4655
    _CheckInstanceBridgesExist(self, instance)
4656

    
4657
  def Exec(self, feedback_fn):
4658
    """Reboot the instance.
4659

4660
    """
4661
    instance = self.instance
4662
    ignore_secondaries = self.op.ignore_secondaries
4663
    reboot_type = self.op.reboot_type
4664

    
4665
    node_current = instance.primary_node
4666

    
4667
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4668
                       constants.INSTANCE_REBOOT_HARD]:
4669
      for disk in instance.disks:
4670
        self.cfg.SetDiskID(disk, node_current)
4671
      result = self.rpc.call_instance_reboot(node_current, instance,
4672
                                             reboot_type,
4673
                                             self.op.shutdown_timeout)
4674
      result.Raise("Could not reboot instance")
4675
    else:
4676
      result = self.rpc.call_instance_shutdown(node_current, instance,
4677
                                               self.op.shutdown_timeout)
4678
      result.Raise("Could not shutdown instance for full reboot")
4679
      _ShutdownInstanceDisks(self, instance)
4680
      _StartInstanceDisks(self, instance, ignore_secondaries)
4681
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4682
      msg = result.fail_msg
4683
      if msg:
4684
        _ShutdownInstanceDisks(self, instance)
4685
        raise errors.OpExecError("Could not start instance for"
4686
                                 " full reboot: %s" % msg)
4687

    
4688
    self.cfg.MarkInstanceUp(instance.name)
4689

    
4690

    
4691
class LUShutdownInstance(LogicalUnit):
4692
  """Shutdown an instance.
4693

4694
  """
4695
  HPATH = "instance-stop"
4696
  HTYPE = constants.HTYPE_INSTANCE
4697
  _OP_PARAMS = [
4698
    _PInstanceName,
4699
    ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4700
    ]
4701
  REQ_BGL = False
4702

    
4703
  def ExpandNames(self):
4704
    self._ExpandAndLockInstance()
4705

    
4706
  def BuildHooksEnv(self):
4707
    """Build hooks env.
4708

4709
    This runs on master, primary and secondary nodes of the instance.
4710

4711
    """
4712
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4713
    env["TIMEOUT"] = self.op.timeout
4714
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4715
    return env, nl, nl
4716

    
4717
  def CheckPrereq(self):
4718
    """Check prerequisites.
4719

4720
    This checks that the instance is in the cluster.
4721

4722
    """
4723
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4724
    assert self.instance is not None, \
4725
      "Cannot retrieve locked instance %s" % self.op.instance_name
4726
    _CheckNodeOnline(self, self.instance.primary_node)
4727

    
4728
  def Exec(self, feedback_fn):
4729
    """Shutdown the instance.
4730

4731
    """
4732
    instance = self.instance
4733
    node_current = instance.primary_node
4734
    timeout = self.op.timeout
4735
    self.cfg.MarkInstanceDown(instance.name)
4736
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4737
    msg = result.fail_msg
4738
    if msg:
4739
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4740

    
4741
    _ShutdownInstanceDisks(self, instance)
4742

    
4743

    
4744
class LUReinstallInstance(LogicalUnit):
4745
  """Reinstall an instance.
4746

4747
  """
4748
  HPATH = "instance-reinstall"
4749
  HTYPE = constants.HTYPE_INSTANCE
4750
  _OP_PARAMS = [
4751
    _PInstanceName,
4752
    ("os_type", None, _TMaybeString),
4753
    ("force_variant", False, _TBool),
4754
    ]
4755
  REQ_BGL = False
4756

    
4757
  def ExpandNames(self):
4758
    self._ExpandAndLockInstance()
4759

    
4760
  def BuildHooksEnv(self):
4761
    """Build hooks env.
4762

4763
    This runs on master, primary and secondary nodes of the instance.
4764

4765
    """
4766
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4767
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4768
    return env, nl, nl
4769

    
4770
  def CheckPrereq(self):
4771
    """Check prerequisites.
4772

4773
    This checks that the instance is in the cluster and is not running.
4774

4775
    """
4776
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4777
    assert instance is not None, \
4778
      "Cannot retrieve locked instance %s" % self.op.instance_name
4779
    _CheckNodeOnline(self, instance.primary_node)
4780

    
4781
    if instance.disk_template == constants.DT_DISKLESS:
4782
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4783
                                 self.op.instance_name,
4784
                                 errors.ECODE_INVAL)
4785
    _CheckInstanceDown(self, instance, "cannot reinstall")
4786

    
4787
    if self.op.os_type is not None:
4788
      # OS verification
4789
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4790
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4791

    
4792
    self.instance = instance
4793

    
4794
  def Exec(self, feedback_fn):
4795
    """Reinstall the instance.
4796

4797
    """
4798
    inst = self.instance
4799

    
4800
    if self.op.os_type is not None:
4801
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4802
      inst.os = self.op.os_type
4803
      self.cfg.Update(inst, feedback_fn)
4804

    
4805
    _StartInstanceDisks(self, inst, None)
4806
    try:
4807
      feedback_fn("Running the instance OS create scripts...")
4808
      # FIXME: pass debug option from opcode to backend
4809
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4810
                                             self.op.debug_level)
4811
      result.Raise("Could not install OS for instance %s on node %s" %
4812
                   (inst.name, inst.primary_node))
4813
    finally:
4814
      _ShutdownInstanceDisks(self, inst)
4815

    
4816

    
4817
class LURecreateInstanceDisks(LogicalUnit):
4818
  """Recreate an instance's missing disks.
4819

4820
  """
4821
  HPATH = "instance-recreate-disks"
4822
  HTYPE = constants.HTYPE_INSTANCE
4823
  _OP_PARAMS = [
4824
    _PInstanceName,
4825
    ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4826
    ]
4827
  REQ_BGL = False
4828

    
4829
  def ExpandNames(self):
4830
    self._ExpandAndLockInstance()
4831

    
4832
  def BuildHooksEnv(self):
4833
    """Build hooks env.
4834

4835
    This runs on master, primary and secondary nodes of the instance.
4836

4837
    """
4838
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4839
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4840
    return env, nl, nl
4841

    
4842
  def CheckPrereq(self):
4843
    """Check prerequisites.
4844

4845
    This checks that the instance is in the cluster and is not running.
4846

4847
    """
4848
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4849
    assert instance is not None, \
4850
      "Cannot retrieve locked instance %s" % self.op.instance_name
4851
    _CheckNodeOnline(self, instance.primary_node)
4852

    
4853
    if instance.disk_template == constants.DT_DISKLESS:
4854
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4855
                                 self.op.instance_name, errors.ECODE_INVAL)
4856
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4857

    
4858
    if not self.op.disks:
4859
      self.op.disks = range(len(instance.disks))
4860
    else:
4861
      for idx in self.op.disks:
4862
        if idx >= len(instance.disks):
4863
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4864
                                     errors.ECODE_INVAL)
4865

    
4866
    self.instance = instance
4867

    
4868
  def Exec(self, feedback_fn):
4869
    """Recreate the disks.
4870

4871
    """
4872
    to_skip = []
4873
    for idx, _ in enumerate(self.instance.disks):
4874
      if idx not in self.op.disks: # disk idx has not been passed in
4875
        to_skip.append(idx)
4876
        continue
4877

    
4878
    _CreateDisks(self, self.instance, to_skip=to_skip)
4879

    
4880

    
4881
class LURenameInstance(LogicalUnit):
4882
  """Rename an instance.
4883

4884
  """
4885
  HPATH = "instance-rename"
4886
  HTYPE = constants.HTYPE_INSTANCE
4887
  _OP_PARAMS = [
4888
    _PInstanceName,
4889
    ("new_name", _NoDefault, _TNonEmptyString),
4890
    ("ip_check", False, _TBool),
4891
    ("name_check", True, _TBool),
4892
    ]
4893

    
4894
  def CheckArguments(self):
4895
    """Check arguments.
4896

4897
    """
4898
    if self.op.ip_check and not self.op.name_check:
4899
      # TODO: make the ip check more flexible and not depend on the name check
4900
      raise errors.OpPrereqError("Cannot do ip check without a name check",
4901
                                 errors.ECODE_INVAL)
4902

    
4903
  def BuildHooksEnv(self):
4904
    """Build hooks env.
4905

4906
    This runs on master, primary and secondary nodes of the instance.
4907

4908
    """
4909
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4910
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4911
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4912
    return env, nl, nl
4913

    
4914
  def CheckPrereq(self):
4915
    """Check prerequisites.
4916

4917
    This checks that the instance is in the cluster and is not running.
4918

4919
    """
4920
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4921
                                                self.op.instance_name)
4922
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4923
    assert instance is not None
4924
    _CheckNodeOnline(self, instance.primary_node)
4925
    _CheckInstanceDown(self, instance, "cannot rename")
4926
    self.instance = instance
4927

    
4928
    new_name = self.op.new_name
4929
    if self.op.name_check:
4930
      hostinfo = netutils.HostInfo(netutils.HostInfo.NormalizeName(new_name))
4931
      new_name = hostinfo.name
4932
      if (self.op.ip_check and
4933
          netutils.TcpPing(hostinfo.ip, constants.DEFAULT_NODED_PORT)):
4934
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4935
                                   (hostinfo.ip, new_name),
4936
                                   errors.ECODE_NOTUNIQUE)
4937

    
4938
    instance_list = self.cfg.GetInstanceList()
4939
    if new_name in instance_list:
4940
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4941
                                 new_name, errors.ECODE_EXISTS)
4942

    
4943

    
4944
  def Exec(self, feedback_fn):
4945
    """Reinstall the instance.
4946

4947
    """
4948
    inst = self.instance
4949
    old_name = inst.name
4950

    
4951
    if inst.disk_template == constants.DT_FILE:
4952
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4953

    
4954
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4955
    # Change the instance lock. This is definitely safe while we hold the BGL
4956
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4957
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4958

    
4959
    # re-read the instance from the configuration after rename
4960
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4961

    
4962
    if inst.disk_template == constants.DT_FILE:
4963
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4964
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4965
                                                     old_file_storage_dir,
4966
                                                     new_file_storage_dir)
4967
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4968
                   " (but the instance has been renamed in Ganeti)" %
4969
                   (inst.primary_node, old_file_storage_dir,
4970
                    new_file_storage_dir))
4971

    
4972
    _StartInstanceDisks(self, inst, None)
4973
    try:
4974
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4975
                                                 old_name, self.op.debug_level)
4976
      msg = result.fail_msg
4977
      if msg:
4978
        msg = ("Could not run OS rename script for instance %s on node %s"
4979
               " (but the instance has been renamed in Ganeti): %s" %
4980
               (inst.name, inst.primary_node, msg))
4981
        self.proc.LogWarning(msg)
4982
    finally:
4983
      _ShutdownInstanceDisks(self, inst)
4984

    
4985
    return inst.name
4986

    
4987

    
4988
class LURemoveInstance(LogicalUnit):
4989
  """Remove an instance.
4990

4991
  """
4992
  HPATH = "instance-remove"
4993
  HTYPE = constants.HTYPE_INSTANCE
4994
  _OP_PARAMS = [
4995
    _PInstanceName,
4996
    ("ignore_failures", False, _TBool),
4997
    _PShutdownTimeout,
4998
    ]
4999
  REQ_BGL = False
5000

    
5001
  def ExpandNames(self):
5002
    self._ExpandAndLockInstance()
5003
    self.needed_locks[locking.LEVEL_NODE] = []
5004
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5005

    
5006
  def DeclareLocks(self, level):
5007
    if level == locking.LEVEL_NODE:
5008
      self._LockInstancesNodes()
5009

    
5010
  def BuildHooksEnv(self):
5011
    """Build hooks env.
5012

5013
    This runs on master, primary and secondary nodes of the instance.
5014

5015
    """
5016
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5017
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5018
    nl = [self.cfg.GetMasterNode()]
5019
    nl_post = list(self.instance.all_nodes) + nl
5020
    return env, nl, nl_post
5021

    
5022
  def CheckPrereq(self):
5023
    """Check prerequisites.
5024

5025
    This checks that the instance is in the cluster.
5026

5027
    """
5028
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5029
    assert self.instance is not None, \
5030
      "Cannot retrieve locked instance %s" % self.op.instance_name
5031

    
5032
  def Exec(self, feedback_fn):
5033
    """Remove the instance.
5034

5035
    """
5036
    instance = self.instance
5037
    logging.info("Shutting down instance %s on node %s",
5038
                 instance.name, instance.primary_node)
5039

    
5040
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5041
                                             self.op.shutdown_timeout)
5042
    msg = result.fail_msg
5043
    if msg:
5044
      if self.op.ignore_failures:
5045
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5046
      else:
5047
        raise errors.OpExecError("Could not shutdown instance %s on"
5048
                                 " node %s: %s" %
5049
                                 (instance.name, instance.primary_node, msg))
5050

    
5051
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5052

    
5053

    
5054
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5055
  """Utility function to remove an instance.
5056

5057
  """
5058
  logging.info("Removing block devices for instance %s", instance.name)
5059

    
5060
  if not _RemoveDisks(lu, instance):
5061
    if not ignore_failures:
5062
      raise errors.OpExecError("Can't remove instance's disks")
5063
    feedback_fn("Warning: can't remove instance's disks")
5064

    
5065
  logging.info("Removing instance %s out of cluster config", instance.name)
5066

    
5067
  lu.cfg.RemoveInstance(instance.name)
5068

    
5069
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5070
    "Instance lock removal conflict"
5071

    
5072
  # Remove lock for the instance
5073
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5074

    
5075

    
5076
class LUQueryInstances(NoHooksLU):
5077
  """Logical unit for querying instances.
5078

5079
  """
5080
  # pylint: disable-msg=W0142
5081
  _OP_PARAMS = [
5082
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5083
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5084
    ("use_locking", False, _TBool),
5085
    ]
5086
  REQ_BGL = False
5087
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5088
                    "serial_no", "ctime", "mtime", "uuid"]
5089
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5090
                                    "admin_state",
5091
                                    "disk_template", "ip", "mac", "bridge",
5092
                                    "nic_mode", "nic_link",
5093
                                    "sda_size", "sdb_size", "vcpus", "tags",
5094
                                    "network_port", "beparams",
5095
                                    r"(disk)\.(size)/([0-9]+)",
5096
                                    r"(disk)\.(sizes)", "disk_usage",
5097
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5098
                                    r"(nic)\.(bridge)/([0-9]+)",
5099
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
5100
                                    r"(disk|nic)\.(count)",
5101
                                    "hvparams",
5102
                                    ] + _SIMPLE_FIELDS +
5103
                                  ["hv/%s" % name
5104
                                   for name in constants.HVS_PARAMETERS
5105
                                   if name not in constants.HVC_GLOBALS] +
5106
                                  ["be/%s" % name
5107
                                   for name in constants.BES_PARAMETERS])
5108
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5109
                                   "oper_ram",
5110
                                   "oper_vcpus",
5111
                                   "status")
5112

    
5113

    
5114
  def CheckArguments(self):
5115
    _CheckOutputFields(static=self._FIELDS_STATIC,
5116
                       dynamic=self._FIELDS_DYNAMIC,
5117
                       selected=self.op.output_fields)
5118

    
5119
  def ExpandNames(self):
5120
    self.needed_locks = {}
5121
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5122
    self.share_locks[locking.LEVEL_NODE] = 1
5123

    
5124
    if self.op.names:
5125
      self.wanted = _GetWantedInstances(self, self.op.names)
5126
    else:
5127
      self.wanted = locking.ALL_SET
5128

    
5129
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5130
    self.do_locking = self.do_node_query and self.op.use_locking
5131
    if self.do_locking:
5132
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5133
      self.needed_locks[locking.LEVEL_NODE] = []
5134
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5135

    
5136
  def DeclareLocks(self, level):
5137
    if level == locking.LEVEL_NODE and self.do_locking:
5138
      self._LockInstancesNodes()
5139

    
5140
  def Exec(self, feedback_fn):
5141
    """Computes the list of nodes and their attributes.
5142

5143
    """
5144
    # pylint: disable-msg=R0912
5145
    # way too many branches here
5146
    all_info = self.cfg.GetAllInstancesInfo()
5147
    if self.wanted == locking.ALL_SET:
5148
      # caller didn't specify instance names, so ordering is not important
5149
      if self.do_locking:
5150
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5151
      else:
5152
        instance_names = all_info.keys()
5153
      instance_names = utils.NiceSort(instance_names)
5154
    else:
5155
      # caller did specify names, so we must keep the ordering
5156
      if self.do_locking:
5157
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5158
      else:
5159
        tgt_set = all_info.keys()
5160
      missing = set(self.wanted).difference(tgt_set)
5161
      if missing:
5162
        raise errors.OpExecError("Some instances were removed before"
5163
                                 " retrieving their data: %s" % missing)
5164
      instance_names = self.wanted
5165

    
5166
    instance_list = [all_info[iname] for iname in instance_names]
5167

    
5168
    # begin data gathering
5169

    
5170
    nodes = frozenset([inst.primary_node for inst in instance_list])
5171
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
5172

    
5173
    bad_nodes = []
5174
    off_nodes = []
5175
    if self.do_node_query:
5176
      live_data = {}
5177
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5178
      for name in nodes:
5179
        result = node_data[name]
5180
        if result.offline:
5181
          # offline nodes will be in both lists
5182
          off_nodes.append(name)
5183
        if result.fail_msg:
5184
          bad_nodes.append(name)
5185
        else:
5186
          if result.payload:
5187
            live_data.update(result.payload)
5188
          # else no instance is alive
5189
    else:
5190
      live_data = dict([(name, {}) for name in instance_names])
5191

    
5192
    # end data gathering
5193

    
5194
    HVPREFIX = "hv/"
5195
    BEPREFIX = "be/"
5196
    output = []
5197
    cluster = self.cfg.GetClusterInfo()
5198
    for instance in instance_list:
5199
      iout = []
5200
      i_hv = cluster.FillHV(instance, skip_globals=True)
5201
      i_be = cluster.FillBE(instance)
5202
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5203
      for field in self.op.output_fields:
5204
        st_match = self._FIELDS_STATIC.Matches(field)
5205
        if field in self._SIMPLE_FIELDS:
5206
          val = getattr(instance, field)
5207
        elif field == "pnode":
5208
          val = instance.primary_node
5209
        elif field == "snodes":
5210
          val = list(instance.secondary_nodes)
5211
        elif field == "admin_state":
5212
          val = instance.admin_up
5213
        elif field == "oper_state":
5214
          if instance.primary_node in bad_nodes:
5215
            val = None
5216
          else:
5217
            val = bool(live_data.get(instance.name))
5218
        elif field == "status":
5219
          if instance.primary_node in off_nodes:
5220
            val = "ERROR_nodeoffline"
5221
          elif instance.primary_node in bad_nodes:
5222
            val = "ERROR_nodedown"
5223
          else:
5224
            running = bool(live_data.get(instance.name))
5225
            if running:
5226
              if instance.admin_up:
5227
                val = "running"
5228
              else:
5229
                val = "ERROR_up"
5230
            else:
5231
              if instance.admin_up:
5232
                val = "ERROR_down"
5233
              else:
5234
                val = "ADMIN_down"
5235
        elif field == "oper_ram":
5236
          if instance.primary_node in bad_nodes:
5237
            val = None
5238
          elif instance.name in live_data:
5239
            val = live_data[instance.name].get("memory", "?")
5240
          else:
5241
            val = "-"
5242
        elif field == "oper_vcpus":
5243
          if instance.primary_node in bad_nodes:
5244
            val = None
5245
          elif instance.name in live_data:
5246
            val = live_data[instance.name].get("vcpus", "?")
5247
          else:
5248
            val = "-"
5249
        elif field == "vcpus":
5250
          val = i_be[constants.BE_VCPUS]
5251
        elif field == "disk_template":
5252
          val = instance.disk_template
5253
        elif field == "ip":
5254
          if instance.nics:
5255
            val = instance.nics[0].ip
5256
          else:
5257
            val = None
5258
        elif field == "nic_mode":
5259
          if instance.nics:
5260
            val = i_nicp[0][constants.NIC_MODE]
5261
          else:
5262
            val = None
5263
        elif field == "nic_link":
5264
          if instance.nics:
5265
            val = i_nicp[0][constants.NIC_LINK]
5266
          else:
5267
            val = None
5268
        elif field == "bridge":
5269
          if (instance.nics and
5270
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5271
            val = i_nicp[0][constants.NIC_LINK]
5272
          else:
5273
            val = None
5274
        elif field == "mac":
5275
          if instance.nics:
5276
            val = instance.nics[0].mac
5277
          else:
5278
            val = None
5279
        elif field == "sda_size" or field == "sdb_size":
5280
          idx = ord(field[2]) - ord('a')
5281
          try:
5282
            val = instance.FindDisk(idx).size
5283
          except errors.OpPrereqError:
5284
            val = None
5285
        elif field == "disk_usage": # total disk usage per node
5286
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5287
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5288
        elif field == "tags":
5289
          val = list(instance.GetTags())
5290
        elif field == "hvparams":
5291
          val = i_hv
5292
        elif (field.startswith(HVPREFIX) and
5293
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5294
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5295
          val = i_hv.get(field[len(HVPREFIX):], None)
5296
        elif field == "beparams":
5297
          val = i_be
5298
        elif (field.startswith(BEPREFIX) and
5299
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5300
          val = i_be.get(field[len(BEPREFIX):], None)
5301
        elif st_match and st_match.groups():
5302
          # matches a variable list
5303
          st_groups = st_match.groups()
5304
          if st_groups and st_groups[0] == "disk":
5305
            if st_groups[1] == "count":
5306
              val = len(instance.disks)
5307
            elif st_groups[1] == "sizes":
5308
              val = [disk.size for disk in instance.disks]
5309
            elif st_groups[1] == "size":
5310
              try:
5311
                val = instance.FindDisk(st_groups[2]).size
5312
              except errors.OpPrereqError:
5313
                val = None
5314
            else:
5315
              assert False, "Unhandled disk parameter"
5316
          elif st_groups[0] == "nic":
5317
            if st_groups[1] == "count":
5318
              val = len(instance.nics)
5319
            elif st_groups[1] == "macs":
5320
              val = [nic.mac for nic in instance.nics]
5321
            elif st_groups[1] == "ips":
5322
              val = [nic.ip for nic in instance.nics]
5323
            elif st_groups[1] == "modes":
5324
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5325
            elif st_groups[1] == "links":
5326
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5327
            elif st_groups[1] == "bridges":
5328
              val = []
5329
              for nicp in i_nicp:
5330
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5331
                  val.append(nicp[constants.NIC_LINK])
5332
                else:
5333
                  val.append(None)
5334
            else:
5335
              # index-based item
5336
              nic_idx = int(st_groups[2])
5337
              if nic_idx >= len(instance.nics):
5338
                val = None
5339
              else:
5340
                if st_groups[1] == "mac":
5341
                  val = instance.nics[nic_idx].mac
5342
                elif st_groups[1] == "ip":
5343
                  val = instance.nics[nic_idx].ip
5344
                elif st_groups[1] == "mode":
5345
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5346
                elif st_groups[1] == "link":
5347
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5348
                elif st_groups[1] == "bridge":
5349
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5350
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5351
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5352
                  else:
5353
                    val = None
5354
                else:
5355
                  assert False, "Unhandled NIC parameter"
5356
          else:
5357
            assert False, ("Declared but unhandled variable parameter '%s'" %
5358
                           field)
5359
        else:
5360
          assert False, "Declared but unhandled parameter '%s'" % field
5361
        iout.append(val)
5362
      output.append(iout)
5363

    
5364
    return output
5365

    
5366

    
5367
class LUFailoverInstance(LogicalUnit):
5368
  """Failover an instance.
5369

5370
  """
5371
  HPATH = "instance-failover"
5372
  HTYPE = constants.HTYPE_INSTANCE
5373
  _OP_PARAMS = [
5374
    _PInstanceName,
5375
    ("ignore_consistency", False, _TBool),
5376
    _PShutdownTimeout,
5377
    ]
5378
  REQ_BGL = False
5379

    
5380
  def ExpandNames(self):
5381
    self._ExpandAndLockInstance()
5382
    self.needed_locks[locking.LEVEL_NODE] = []
5383
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5384

    
5385
  def DeclareLocks(self, level):
5386
    if level == locking.LEVEL_NODE:
5387
      self._LockInstancesNodes()
5388

    
5389
  def BuildHooksEnv(self):
5390
    """Build hooks env.
5391

5392
    This runs on master, primary and secondary nodes of the instance.
5393

5394
    """
5395
    instance = self.instance
5396
    source_node = instance.primary_node
5397
    target_node = instance.secondary_nodes[0]
5398
    env = {
5399
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5400
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5401
      "OLD_PRIMARY": source_node,
5402
      "OLD_SECONDARY": target_node,
5403
      "NEW_PRIMARY": target_node,
5404
      "NEW_SECONDARY": source_node,
5405
      }
5406
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5407
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5408
    nl_post = list(nl)
5409
    nl_post.append(source_node)
5410
    return env, nl, nl_post
5411

    
5412
  def CheckPrereq(self):
5413
    """Check prerequisites.
5414

5415
    This checks that the instance is in the cluster.
5416

5417
    """
5418
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5419
    assert self.instance is not None, \
5420
      "Cannot retrieve locked instance %s" % self.op.instance_name
5421

    
5422
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5423
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5424
      raise errors.OpPrereqError("Instance's disk layout is not"
5425
                                 " network mirrored, cannot failover.",
5426
                                 errors.ECODE_STATE)
5427

    
5428
    secondary_nodes = instance.secondary_nodes
5429
    if not secondary_nodes:
5430
      raise errors.ProgrammerError("no secondary node but using "
5431
                                   "a mirrored disk template")
5432

    
5433
    target_node = secondary_nodes[0]
5434
    _CheckNodeOnline(self, target_node)
5435
    _CheckNodeNotDrained(self, target_node)
5436
    if instance.admin_up:
5437
      # check memory requirements on the secondary node
5438
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5439
                           instance.name, bep[constants.BE_MEMORY],
5440
                           instance.hypervisor)
5441
    else:
5442
      self.LogInfo("Not checking memory on the secondary node as"
5443
                   " instance will not be started")
5444

    
5445
    # check bridge existance
5446
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5447

    
5448
  def Exec(self, feedback_fn):
5449
    """Failover an instance.
5450

5451
    The failover is done by shutting it down on its present node and
5452
    starting it on the secondary.
5453

5454
    """
5455
    instance = self.instance
5456

    
5457
    source_node = instance.primary_node
5458
    target_node = instance.secondary_nodes[0]
5459

    
5460
    if instance.admin_up:
5461
      feedback_fn("* checking disk consistency between source and target")
5462
      for dev in instance.disks:
5463
        # for drbd, these are drbd over lvm
5464
        if not _CheckDiskConsistency(self, dev, target_node, False):
5465
          if not self.op.ignore_consistency:
5466
            raise errors.OpExecError("Disk %s is degraded on target node,"
5467
                                     " aborting failover." % dev.iv_name)
5468
    else:
5469
      feedback_fn("* not checking disk consistency as instance is not running")
5470

    
5471
    feedback_fn("* shutting down instance on source node")
5472
    logging.info("Shutting down instance %s on node %s",
5473
                 instance.name, source_node)
5474

    
5475
    result = self.rpc.call_instance_shutdown(source_node, instance,
5476
                                             self.op.shutdown_timeout)
5477
    msg = result.fail_msg
5478
    if msg:
5479
      if self.op.ignore_consistency:
5480
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5481
                             " Proceeding anyway. Please make sure node"
5482
                             " %s is down. Error details: %s",
5483
                             instance.name, source_node, source_node, msg)
5484
      else:
5485
        raise errors.OpExecError("Could not shutdown instance %s on"
5486
                                 " node %s: %s" %
5487
                                 (instance.name, source_node, msg))
5488

    
5489
    feedback_fn("* deactivating the instance's disks on source node")
5490
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5491
      raise errors.OpExecError("Can't shut down the instance's disks.")
5492

    
5493
    instance.primary_node = target_node
5494
    # distribute new instance config to the other nodes
5495
    self.cfg.Update(instance, feedback_fn)
5496

    
5497
    # Only start the instance if it's marked as up
5498
    if instance.admin_up:
5499
      feedback_fn("* activating the instance's disks on target node")
5500
      logging.info("Starting instance %s on node %s",
5501
                   instance.name, target_node)
5502

    
5503
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5504
                                           ignore_secondaries=True)
5505
      if not disks_ok:
5506
        _ShutdownInstanceDisks(self, instance)
5507
        raise errors.OpExecError("Can't activate the instance's disks")
5508

    
5509
      feedback_fn("* starting the instance on the target node")
5510
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5511
      msg = result.fail_msg
5512
      if msg:
5513
        _ShutdownInstanceDisks(self, instance)
5514
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5515
                                 (instance.name, target_node, msg))
5516

    
5517

    
5518
class LUMigrateInstance(LogicalUnit):
5519
  """Migrate an instance.
5520

5521
  This is migration without shutting down, compared to the failover,
5522
  which is done with shutdown.
5523

5524
  """
5525
  HPATH = "instance-migrate"
5526
  HTYPE = constants.HTYPE_INSTANCE
5527
  _OP_PARAMS = [
5528
    _PInstanceName,
5529
    _PMigrationMode,
5530
    _PMigrationLive,
5531
    ("cleanup", False, _TBool),
5532
    ]
5533

    
5534
  REQ_BGL = False
5535

    
5536
  def ExpandNames(self):
5537
    self._ExpandAndLockInstance()
5538

    
5539
    self.needed_locks[locking.LEVEL_NODE] = []
5540
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5541

    
5542
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5543
                                       self.op.cleanup)
5544
    self.tasklets = [self._migrater]
5545

    
5546
  def DeclareLocks(self, level):
5547
    if level == locking.LEVEL_NODE:
5548
      self._LockInstancesNodes()
5549

    
5550
  def BuildHooksEnv(self):
5551
    """Build hooks env.
5552

5553
    This runs on master, primary and secondary nodes of the instance.
5554

5555
    """
5556
    instance = self._migrater.instance
5557
    source_node = instance.primary_node
5558
    target_node = instance.secondary_nodes[0]
5559
    env = _BuildInstanceHookEnvByObject(self, instance)
5560
    env["MIGRATE_LIVE"] = self._migrater.live
5561
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5562
    env.update({
5563
        "OLD_PRIMARY": source_node,
5564
        "OLD_SECONDARY": target_node,
5565
        "NEW_PRIMARY": target_node,
5566
        "NEW_SECONDARY": source_node,
5567
        })
5568
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5569
    nl_post = list(nl)
5570
    nl_post.append(source_node)
5571
    return env, nl, nl_post
5572

    
5573

    
5574
class LUMoveInstance(LogicalUnit):
5575
  """Move an instance by data-copying.
5576

5577
  """
5578
  HPATH = "instance-move"
5579
  HTYPE = constants.HTYPE_INSTANCE
5580
  _OP_PARAMS = [
5581
    _PInstanceName,
5582
    ("target_node", _NoDefault, _TNonEmptyString),
5583
    _PShutdownTimeout,
5584
    ]
5585
  REQ_BGL = False
5586

    
5587
  def ExpandNames(self):
5588
    self._ExpandAndLockInstance()
5589
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5590
    self.op.target_node = target_node
5591
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5592
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5593

    
5594
  def DeclareLocks(self, level):
5595
    if level == locking.LEVEL_NODE:
5596
      self._LockInstancesNodes(primary_only=True)
5597

    
5598
  def BuildHooksEnv(self):
5599
    """Build hooks env.
5600

5601
    This runs on master, primary and secondary nodes of the instance.
5602

5603
    """
5604
    env = {
5605
      "TARGET_NODE": self.op.target_node,
5606
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5607
      }
5608
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5609
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5610
                                       self.op.target_node]
5611
    return env, nl, nl
5612

    
5613
  def CheckPrereq(self):
5614
    """Check prerequisites.
5615

5616
    This checks that the instance is in the cluster.
5617

5618
    """
5619
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5620
    assert self.instance is not None, \
5621
      "Cannot retrieve locked instance %s" % self.op.instance_name
5622

    
5623
    node = self.cfg.GetNodeInfo(self.op.target_node)
5624
    assert node is not None, \
5625
      "Cannot retrieve locked node %s" % self.op.target_node
5626

    
5627
    self.target_node = target_node = node.name
5628

    
5629
    if target_node == instance.primary_node:
5630
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5631
                                 (instance.name, target_node),
5632
                                 errors.ECODE_STATE)
5633

    
5634
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5635

    
5636
    for idx, dsk in enumerate(instance.disks):
5637
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5638
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5639
                                   " cannot copy" % idx, errors.ECODE_STATE)
5640

    
5641
    _CheckNodeOnline(self, target_node)
5642
    _CheckNodeNotDrained(self, target_node)
5643

    
5644
    if instance.admin_up:
5645
      # check memory requirements on the secondary node
5646
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5647
                           instance.name, bep[constants.BE_MEMORY],
5648
                           instance.hypervisor)
5649
    else:
5650
      self.LogInfo("Not checking memory on the secondary node as"
5651
                   " instance will not be started")
5652

    
5653
    # check bridge existance
5654
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5655

    
5656
  def Exec(self, feedback_fn):
5657
    """Move an instance.
5658

5659
    The move is done by shutting it down on its present node, copying
5660
    the data over (slow) and starting it on the new node.
5661

5662
    """
5663
    instance = self.instance
5664

    
5665
    source_node = instance.primary_node
5666
    target_node = self.target_node
5667

    
5668
    self.LogInfo("Shutting down instance %s on source node %s",
5669
                 instance.name, source_node)
5670

    
5671
    result = self.rpc.call_instance_shutdown(source_node, instance,
5672
                                             self.op.shutdown_timeout)
5673
    msg = result.fail_msg
5674
    if msg:
5675
      if self.op.ignore_consistency:
5676
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5677
                             " Proceeding anyway. Please make sure node"
5678
                             " %s is down. Error details: %s",
5679
                             instance.name, source_node, source_node, msg)
5680
      else:
5681
        raise errors.OpExecError("Could not shutdown instance %s on"
5682
                                 " node %s: %s" %
5683
                                 (instance.name, source_node, msg))
5684

    
5685
    # create the target disks
5686
    try:
5687
      _CreateDisks(self, instance, target_node=target_node)
5688
    except errors.OpExecError:
5689
      self.LogWarning("Device creation failed, reverting...")
5690
      try:
5691
        _RemoveDisks(self, instance, target_node=target_node)
5692
      finally:
5693
        self.cfg.ReleaseDRBDMinors(instance.name)
5694
        raise
5695

    
5696
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5697

    
5698
    errs = []
5699
    # activate, get path, copy the data over
5700
    for idx, disk in enumerate(instance.disks):
5701
      self.LogInfo("Copying data for disk %d", idx)
5702
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5703
                                               instance.name, True)
5704
      if result.fail_msg:
5705
        self.LogWarning("Can't assemble newly created disk %d: %s",
5706
                        idx, result.fail_msg)
5707
        errs.append(result.fail_msg)
5708
        break
5709
      dev_path = result.payload
5710
      result = self.rpc.call_blockdev_export(source_node, disk,
5711
                                             target_node, dev_path,
5712
                                             cluster_name)
5713
      if result.fail_msg:
5714
        self.LogWarning("Can't copy data over for disk %d: %s",
5715
                        idx, result.fail_msg)
5716
        errs.append(result.fail_msg)
5717
        break
5718

    
5719
    if errs:
5720
      self.LogWarning("Some disks failed to copy, aborting")
5721
      try:
5722
        _RemoveDisks(self, instance, target_node=target_node)
5723
      finally:
5724
        self.cfg.ReleaseDRBDMinors(instance.name)
5725
        raise errors.OpExecError("Errors during disk copy: %s" %
5726
                                 (",".join(errs),))
5727

    
5728
    instance.primary_node = target_node
5729
    self.cfg.Update(instance, feedback_fn)
5730

    
5731
    self.LogInfo("Removing the disks on the original node")
5732
    _RemoveDisks(self, instance, target_node=source_node)
5733

    
5734
    # Only start the instance if it's marked as up
5735
    if instance.admin_up:
5736
      self.LogInfo("Starting instance %s on node %s",
5737
                   instance.name, target_node)
5738

    
5739
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5740
                                           ignore_secondaries=True)
5741
      if not disks_ok:
5742
        _ShutdownInstanceDisks(self, instance)
5743
        raise errors.OpExecError("Can't activate the instance's disks")
5744

    
5745
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5746
      msg = result.fail_msg
5747
      if msg:
5748
        _ShutdownInstanceDisks(self, instance)
5749
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5750
                                 (instance.name, target_node, msg))
5751

    
5752

    
5753
class LUMigrateNode(LogicalUnit):
5754
  """Migrate all instances from a node.
5755

5756
  """
5757
  HPATH = "node-migrate"
5758
  HTYPE = constants.HTYPE_NODE
5759
  _OP_PARAMS = [
5760
    _PNodeName,
5761
    _PMigrationMode,
5762
    _PMigrationLive,
5763
    ]
5764
  REQ_BGL = False
5765

    
5766
  def ExpandNames(self):
5767
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5768

    
5769
    self.needed_locks = {
5770
      locking.LEVEL_NODE: [self.op.node_name],
5771
      }
5772

    
5773
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5774

    
5775
    # Create tasklets for migrating instances for all instances on this node
5776
    names = []
5777
    tasklets = []
5778

    
5779
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5780
      logging.debug("Migrating instance %s", inst.name)
5781
      names.append(inst.name)
5782

    
5783
      tasklets.append(TLMigrateInstance(self, inst.name, False))
5784

    
5785
    self.tasklets = tasklets
5786

    
5787
    # Declare instance locks
5788
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5789

    
5790
  def DeclareLocks(self, level):
5791
    if level == locking.LEVEL_NODE:
5792
      self._LockInstancesNodes()
5793

    
5794
  def BuildHooksEnv(self):
5795
    """Build hooks env.
5796

5797
    This runs on the master, the primary and all the secondaries.
5798

5799
    """
5800
    env = {
5801
      "NODE_NAME": self.op.node_name,
5802
      }
5803

    
5804
    nl = [self.cfg.GetMasterNode()]
5805

    
5806
    return (env, nl, nl)
5807

    
5808

    
5809
class TLMigrateInstance(Tasklet):
5810
  """Tasklet class for instance migration.
5811

5812
  @type live: boolean
5813
  @ivar live: whether the migration will be done live or non-live;
5814
      this variable is initalized only after CheckPrereq has run
5815

5816
  """
5817
  def __init__(self, lu, instance_name, cleanup):
5818
    """Initializes this class.
5819

5820
    """
5821
    Tasklet.__init__(self, lu)
5822

    
5823
    # Parameters
5824
    self.instance_name = instance_name
5825
    self.cleanup = cleanup
5826
    self.live = False # will be overridden later
5827

    
5828
  def CheckPrereq(self):
5829
    """Check prerequisites.
5830

5831
    This checks that the instance is in the cluster.
5832

5833
    """
5834
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5835
    instance = self.cfg.GetInstanceInfo(instance_name)
5836
    assert instance is not None
5837

    
5838
    if instance.disk_template != constants.DT_DRBD8:
5839
      raise errors.OpPrereqError("Instance's disk layout is not"
5840
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5841

    
5842
    secondary_nodes = instance.secondary_nodes
5843
    if not secondary_nodes:
5844
      raise errors.ConfigurationError("No secondary node but using"
5845
                                      " drbd8 disk template")
5846

    
5847
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5848

    
5849
    target_node = secondary_nodes[0]
5850
    # check memory requirements on the secondary node
5851
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5852
                         instance.name, i_be[constants.BE_MEMORY],
5853
                         instance.hypervisor)
5854

    
5855
    # check bridge existance
5856
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5857

    
5858
    if not self.cleanup:
5859
      _CheckNodeNotDrained(self.lu, target_node)
5860
      result = self.rpc.call_instance_migratable(instance.primary_node,
5861
                                                 instance)
5862
      result.Raise("Can't migrate, please use failover",
5863
                   prereq=True, ecode=errors.ECODE_STATE)
5864

    
5865
    self.instance = instance
5866

    
5867
    if self.lu.op.live is not None and self.lu.op.mode is not None:
5868
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5869
                                 " parameters are accepted",
5870
                                 errors.ECODE_INVAL)
5871
    if self.lu.op.live is not None:
5872
      if self.lu.op.live:
5873
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
5874
      else:
5875
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5876
      # reset the 'live' parameter to None so that repeated
5877
      # invocations of CheckPrereq do not raise an exception
5878
      self.lu.op.live = None
5879
    elif self.lu.op.mode is None:
5880
      # read the default value from the hypervisor
5881
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5882
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5883

    
5884
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5885

    
5886
  def _WaitUntilSync(self):
5887
    """Poll with custom rpc for disk sync.
5888

5889
    This uses our own step-based rpc call.
5890

5891
    """
5892
    self.feedback_fn("* wait until resync is done")
5893
    all_done = False
5894
    while not all_done:
5895
      all_done = True
5896
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5897
                                            self.nodes_ip,
5898
                                            self.instance.disks)
5899
      min_percent = 100
5900
      for node, nres in result.items():
5901
        nres.Raise("Cannot resync disks on node %s" % node)
5902
        node_done, node_percent = nres.payload
5903
        all_done = all_done and node_done
5904
        if node_percent is not None:
5905
          min_percent = min(min_percent, node_percent)
5906
      if not all_done:
5907
        if min_percent < 100:
5908
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5909
        time.sleep(2)
5910

    
5911
  def _EnsureSecondary(self, node):
5912
    """Demote a node to secondary.
5913

5914
    """
5915
    self.feedback_fn("* switching node %s to secondary mode" % node)
5916

    
5917
    for dev in self.instance.disks:
5918
      self.cfg.SetDiskID(dev, node)
5919

    
5920
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5921
                                          self.instance.disks)
5922
    result.Raise("Cannot change disk to secondary on node %s" % node)
5923

    
5924
  def _GoStandalone(self):
5925
    """Disconnect from the network.
5926

5927
    """
5928
    self.feedback_fn("* changing into standalone mode")
5929
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5930
                                               self.instance.disks)
5931
    for node, nres in result.items():
5932
      nres.Raise("Cannot disconnect disks node %s" % node)
5933

    
5934
  def _GoReconnect(self, multimaster):
5935
    """Reconnect to the network.
5936

5937
    """
5938
    if multimaster:
5939
      msg = "dual-master"
5940
    else:
5941
      msg = "single-master"
5942
    self.feedback_fn("* changing disks into %s mode" % msg)
5943
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5944
                                           self.instance.disks,
5945
                                           self.instance.name, multimaster)
5946
    for node, nres in result.items():
5947
      nres.Raise("Cannot change disks config on node %s" % node)
5948

    
5949
  def _ExecCleanup(self):
5950
    """Try to cleanup after a failed migration.
5951

5952
    The cleanup is done by:
5953
      - check that the instance is running only on one node
5954
        (and update the config if needed)
5955
      - change disks on its secondary node to secondary
5956
      - wait until disks are fully synchronized
5957
      - disconnect from the network
5958
      - change disks into single-master mode
5959
      - wait again until disks are fully synchronized
5960

5961
    """
5962
    instance = self.instance
5963
    target_node = self.target_node
5964
    source_node = self.source_node
5965

    
5966
    # check running on only one node
5967
    self.feedback_fn("* checking where the instance actually runs"
5968
                     " (if this hangs, the hypervisor might be in"
5969
                     " a bad state)")
5970
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5971
    for node, result in ins_l.items():
5972
      result.Raise("Can't contact node %s" % node)
5973

    
5974
    runningon_source = instance.name in ins_l[source_node].payload
5975
    runningon_target = instance.name in ins_l[target_node].payload
5976

    
5977
    if runningon_source and runningon_target:
5978
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5979
                               " or the hypervisor is confused. You will have"
5980
                               " to ensure manually that it runs only on one"
5981
                               " and restart this operation.")
5982

    
5983
    if not (runningon_source or runningon_target):
5984
      raise errors.OpExecError("Instance does not seem to be running at all."
5985
                               " In this case, it's safer to repair by"
5986
                               " running 'gnt-instance stop' to ensure disk"
5987
                               " shutdown, and then restarting it.")
5988

    
5989
    if runningon_target:
5990
      # the migration has actually succeeded, we need to update the config
5991
      self.feedback_fn("* instance running on secondary node (%s),"
5992
                       " updating config" % target_node)
5993
      instance.primary_node = target_node
5994
      self.cfg.Update(instance, self.feedback_fn)
5995
      demoted_node = source_node
5996
    else:
5997
      self.feedback_fn("* instance confirmed to be running on its"
5998
                       " primary node (%s)" % source_node)
5999
      demoted_node = target_node
6000

    
6001
    self._EnsureSecondary(demoted_node)
6002
    try:
6003
      self._WaitUntilSync()
6004
    except errors.OpExecError:
6005
      # we ignore here errors, since if the device is standalone, it
6006
      # won't be able to sync
6007
      pass
6008
    self._GoStandalone()
6009
    self._GoReconnect(False)
6010
    self._WaitUntilSync()
6011

    
6012
    self.feedback_fn("* done")
6013

    
6014
  def _RevertDiskStatus(self):
6015
    """Try to revert the disk status after a failed migration.
6016

6017
    """
6018
    target_node = self.target_node
6019
    try:
6020
      self._EnsureSecondary(target_node)
6021
      self._GoStandalone()
6022
      self._GoReconnect(False)
6023
      self._WaitUntilSync()
6024
    except errors.OpExecError, err:
6025
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6026
                         " drives: error '%s'\n"
6027
                         "Please look and recover the instance status" %
6028
                         str(err))
6029

    
6030
  def _AbortMigration(self):
6031
    """Call the hypervisor code to abort a started migration.
6032

6033
    """
6034
    instance = self.instance
6035
    target_node = self.target_node
6036
    migration_info = self.migration_info
6037

    
6038
    abort_result = self.rpc.call_finalize_migration(target_node,
6039
                                                    instance,
6040
                                                    migration_info,
6041
                                                    False)
6042
    abort_msg = abort_result.fail_msg
6043
    if abort_msg:
6044
      logging.error("Aborting migration failed on target node %s: %s",
6045
                    target_node, abort_msg)
6046
      # Don't raise an exception here, as we stil have to try to revert the
6047
      # disk status, even if this step failed.
6048

    
6049
  def _ExecMigration(self):
6050
    """Migrate an instance.
6051

6052
    The migrate is done by:
6053
      - change the disks into dual-master mode
6054
      - wait until disks are fully synchronized again
6055
      - migrate the instance
6056
      - change disks on the new secondary node (the old primary) to secondary
6057
      - wait until disks are fully synchronized
6058
      - change disks into single-master mode
6059

6060
    """
6061
    instance = self.instance
6062
    target_node = self.target_node
6063
    source_node = self.source_node
6064

    
6065
    self.feedback_fn("* checking disk consistency between source and target")
6066
    for dev in instance.disks:
6067
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6068
        raise errors.OpExecError("Disk %s is degraded or not fully"
6069
                                 " synchronized on target node,"
6070
                                 " aborting migrate." % dev.iv_name)
6071

    
6072
    # First get the migration information from the remote node
6073
    result = self.rpc.call_migration_info(source_node, instance)
6074
    msg = result.fail_msg
6075
    if msg:
6076
      log_err = ("Failed fetching source migration information from %s: %s" %
6077
                 (source_node, msg))
6078
      logging.error(log_err)
6079
      raise errors.OpExecError(log_err)
6080

    
6081
    self.migration_info = migration_info = result.payload
6082

    
6083
    # Then switch the disks to master/master mode
6084
    self._EnsureSecondary(target_node)
6085
    self._GoStandalone()
6086
    self._GoReconnect(True)
6087
    self._WaitUntilSync()
6088

    
6089
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6090
    result = self.rpc.call_accept_instance(target_node,
6091
                                           instance,
6092
                                           migration_info,
6093
                                           self.nodes_ip[target_node])
6094

    
6095
    msg = result.fail_msg
6096
    if msg:
6097
      logging.error("Instance pre-migration failed, trying to revert"
6098
                    " disk status: %s", msg)
6099
      self.feedback_fn("Pre-migration failed, aborting")
6100
      self._AbortMigration()
6101
      self._RevertDiskStatus()
6102
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6103
                               (instance.name, msg))
6104

    
6105
    self.feedback_fn("* migrating instance to %s" % target_node)
6106
    time.sleep(10)
6107
    result = self.rpc.call_instance_migrate(source_node, instance,
6108
                                            self.nodes_ip[target_node],
6109
                                            self.live)
6110
    msg = result.fail_msg
6111
    if msg:
6112
      logging.error("Instance migration failed, trying to revert"
6113
                    " disk status: %s", msg)
6114
      self.feedback_fn("Migration failed, aborting")
6115
      self._AbortMigration()
6116
      self._RevertDiskStatus()
6117
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6118
                               (instance.name, msg))
6119
    time.sleep(10)
6120

    
6121
    instance.primary_node = target_node
6122
    # distribute new instance config to the other nodes
6123
    self.cfg.Update(instance, self.feedback_fn)
6124

    
6125
    result = self.rpc.call_finalize_migration(target_node,
6126
                                              instance,
6127
                                              migration_info,
6128
                                              True)
6129
    msg = result.fail_msg
6130
    if msg:
6131
      logging.error("Instance migration succeeded, but finalization failed:"
6132
                    " %s", msg)
6133
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6134
                               msg)
6135

    
6136
    self._EnsureSecondary(source_node)
6137
    self._WaitUntilSync()
6138
    self._GoStandalone()
6139
    self._GoReconnect(False)
6140
    self._WaitUntilSync()
6141

    
6142
    self.feedback_fn("* done")
6143

    
6144
  def Exec(self, feedback_fn):
6145
    """Perform the migration.
6146

6147
    """
6148
    feedback_fn("Migrating instance %s" % self.instance.name)
6149

    
6150
    self.feedback_fn = feedback_fn
6151

    
6152
    self.source_node = self.instance.primary_node
6153
    self.target_node = self.instance.secondary_nodes[0]
6154
    self.all_nodes = [self.source_node, self.target_node]
6155
    self.nodes_ip = {
6156
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6157
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6158
      }
6159

    
6160
    if self.cleanup:
6161
      return self._ExecCleanup()
6162
    else:
6163
      return self._ExecMigration()
6164

    
6165

    
6166
def _CreateBlockDev(lu, node, instance, device, force_create,
6167
                    info, force_open):
6168
  """Create a tree of block devices on a given node.
6169

6170
  If this device type has to be created on secondaries, create it and
6171
  all its children.
6172

6173
  If not, just recurse to children keeping the same 'force' value.
6174

6175
  @param lu: the lu on whose behalf we execute
6176
  @param node: the node on which to create the device
6177
  @type instance: L{objects.Instance}
6178
  @param instance: the instance which owns the device
6179
  @type device: L{objects.Disk}
6180
  @param device: the device to create
6181
  @type force_create: boolean
6182
  @param force_create: whether to force creation of this device; this
6183
      will be change to True whenever we find a device which has
6184
      CreateOnSecondary() attribute
6185
  @param info: the extra 'metadata' we should attach to the device
6186
      (this will be represented as a LVM tag)
6187
  @type force_open: boolean
6188
  @param force_open: this parameter will be passes to the
6189
      L{backend.BlockdevCreate} function where it specifies
6190
      whether we run on primary or not, and it affects both
6191
      the child assembly and the device own Open() execution
6192

6193
  """
6194
  if device.CreateOnSecondary():
6195
    force_create = True
6196

    
6197
  if device.children:
6198
    for child in device.children:
6199
      _CreateBlockDev(lu, node, instance, child, force_create,
6200
                      info, force_open)
6201

    
6202
  if not force_create:
6203
    return
6204

    
6205
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6206

    
6207

    
6208
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6209
  """Create a single block device on a given node.
6210

6211
  This will not recurse over children of the device, so they must be
6212
  created in advance.
6213

6214
  @param lu: the lu on whose behalf we execute
6215
  @param node: the node on which to create the device
6216
  @type instance: L{objects.Instance}
6217
  @param instance: the instance which owns the device
6218
  @type device: L{objects.Disk}
6219
  @param device: the device to create
6220
  @param info: the extra 'metadata' we should attach to the device
6221
      (this will be represented as a LVM tag)
6222
  @type force_open: boolean
6223
  @param force_open: this parameter will be passes to the
6224
      L{backend.BlockdevCreate} function where it specifies
6225
      whether we run on primary or not, and it affects both
6226
      the child assembly and the device own Open() execution
6227

6228
  """
6229
  lu.cfg.SetDiskID(device, node)
6230
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6231
                                       instance.name, force_open, info)
6232
  result.Raise("Can't create block device %s on"
6233
               " node %s for instance %s" % (device, node, instance.name))
6234
  if device.physical_id is None:
6235
    device.physical_id = result.payload
6236

    
6237

    
6238
def _GenerateUniqueNames(lu, exts):
6239
  """Generate a suitable LV name.
6240

6241
  This will generate a logical volume name for the given instance.
6242

6243
  """
6244
  results = []
6245
  for val in exts:
6246
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6247
    results.append("%s%s" % (new_id, val))
6248
  return results
6249

    
6250

    
6251
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6252
                         p_minor, s_minor):
6253
  """Generate a drbd8 device complete with its children.
6254

6255
  """
6256
  port = lu.cfg.AllocatePort()
6257
  vgname = lu.cfg.GetVGName()
6258
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6259
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6260
                          logical_id=(vgname, names[0]))
6261
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6262
                          logical_id=(vgname, names[1]))
6263
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6264
                          logical_id=(primary, secondary, port,
6265
                                      p_minor, s_minor,
6266
                                      shared_secret),
6267
                          children=[dev_data, dev_meta],
6268
                          iv_name=iv_name)
6269
  return drbd_dev
6270

    
6271

    
6272
def _GenerateDiskTemplate(lu, template_name,
6273
                          instance_name, primary_node,
6274
                          secondary_nodes, disk_info,
6275
                          file_storage_dir, file_driver,
6276
                          base_index):
6277
  """Generate the entire disk layout for a given template type.
6278

6279
  """
6280
  #TODO: compute space requirements
6281

    
6282
  vgname = lu.cfg.GetVGName()
6283
  disk_count = len(disk_info)
6284
  disks = []
6285
  if template_name == constants.DT_DISKLESS:
6286
    pass
6287
  elif template_name == constants.DT_PLAIN:
6288
    if len(secondary_nodes) != 0:
6289
      raise errors.ProgrammerError("Wrong template configuration")
6290

    
6291
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6292
                                      for i in range(disk_count)])
6293
    for idx, disk in enumerate(disk_info):
6294
      disk_index = idx + base_index
6295
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6296
                              logical_id=(vgname, names[idx]),
6297
                              iv_name="disk/%d" % disk_index,
6298
                              mode=disk["mode"])
6299
      disks.append(disk_dev)
6300
  elif template_name == constants.DT_DRBD8:
6301
    if len(secondary_nodes) != 1:
6302
      raise errors.ProgrammerError("Wrong template configuration")
6303
    remote_node = secondary_nodes[0]
6304
    minors = lu.cfg.AllocateDRBDMinor(
6305
      [primary_node, remote_node] * len(disk_info), instance_name)
6306

    
6307
    names = []
6308
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6309
                                               for i in range(disk_count)]):
6310
      names.append(lv_prefix + "_data")
6311
      names.append(lv_prefix + "_meta")
6312
    for idx, disk in enumerate(disk_info):
6313
      disk_index = idx + base_index
6314
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6315
                                      disk["size"], names[idx*2:idx*2+2],
6316
                                      "disk/%d" % disk_index,
6317
                                      minors[idx*2], minors[idx*2+1])
6318
      disk_dev.mode = disk["mode"]
6319
      disks.append(disk_dev)
6320
  elif template_name == constants.DT_FILE:
6321
    if len(secondary_nodes) != 0:
6322
      raise errors.ProgrammerError("Wrong template configuration")
6323

    
6324
    _RequireFileStorage()
6325

    
6326
    for idx, disk in enumerate(disk_info):
6327
      disk_index = idx + base_index
6328
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6329
                              iv_name="disk/%d" % disk_index,
6330
                              logical_id=(file_driver,
6331
                                          "%s/disk%d" % (file_storage_dir,
6332
                                                         disk_index)),
6333
                              mode=disk["mode"])
6334
      disks.append(disk_dev)
6335
  else:
6336
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6337
  return disks
6338

    
6339

    
6340
def _GetInstanceInfoText(instance):
6341
  """Compute that text that should be added to the disk's metadata.
6342

6343
  """
6344
  return "originstname+%s" % instance.name
6345

    
6346

    
6347
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6348
  """Create all disks for an instance.
6349

6350
  This abstracts away some work from AddInstance.
6351

6352
  @type lu: L{LogicalUnit}
6353
  @param lu: the logical unit on whose behalf we execute
6354
  @type instance: L{objects.Instance}
6355
  @param instance: the instance whose disks we should create
6356
  @type to_skip: list
6357
  @param to_skip: list of indices to skip
6358
  @type target_node: string
6359
  @param target_node: if passed, overrides the target node for creation
6360
  @rtype: boolean
6361
  @return: the success of the creation
6362

6363
  """
6364
  info = _GetInstanceInfoText(instance)
6365
  if target_node is None:
6366
    pnode = instance.primary_node
6367
    all_nodes = instance.all_nodes
6368
  else:
6369
    pnode = target_node
6370
    all_nodes = [pnode]
6371

    
6372
  if instance.disk_template == constants.DT_FILE:
6373
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6374
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6375

    
6376
    result.Raise("Failed to create directory '%s' on"
6377
                 " node %s" % (file_storage_dir, pnode))
6378

    
6379
  # Note: this needs to be kept in sync with adding of disks in
6380
  # LUSetInstanceParams
6381
  for idx, device in enumerate(instance.disks):
6382
    if to_skip and idx in to_skip:
6383
      continue
6384
    logging.info("Creating volume %s for instance %s",
6385
                 device.iv_name, instance.name)
6386
    #HARDCODE
6387
    for node in all_nodes:
6388
      f_create = node == pnode
6389
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6390

    
6391

    
6392
def _RemoveDisks(lu, instance, target_node=None):
6393
  """Remove all disks for an instance.
6394

6395
  This abstracts away some work from `AddInstance()` and
6396
  `RemoveInstance()`. Note that in case some of the devices couldn't
6397
  be removed, the removal will continue with the other ones (compare
6398
  with `_CreateDisks()`).
6399

6400
  @type lu: L{LogicalUnit}
6401
  @param lu: the logical unit on whose behalf we execute
6402
  @type instance: L{objects.Instance}
6403
  @param instance: the instance whose disks we should remove
6404
  @type target_node: string
6405
  @param target_node: used to override the node on which to remove the disks
6406
  @rtype: boolean
6407
  @return: the success of the removal
6408

6409
  """
6410
  logging.info("Removing block devices for instance %s", instance.name)
6411

    
6412
  all_result = True
6413
  for device in instance.disks:
6414
    if target_node:
6415
      edata = [(target_node, device)]
6416
    else:
6417
      edata = device.ComputeNodeTree(instance.primary_node)
6418
    for node, disk in edata:
6419
      lu.cfg.SetDiskID(disk, node)
6420
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6421
      if msg:
6422
        lu.LogWarning("Could not remove block device %s on node %s,"
6423
                      " continuing anyway: %s", device.iv_name, node, msg)
6424
        all_result = False
6425

    
6426
  if instance.disk_template == constants.DT_FILE:
6427
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6428
    if target_node:
6429
      tgt = target_node
6430
    else:
6431
      tgt = instance.primary_node
6432
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6433
    if result.fail_msg:
6434
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6435
                    file_storage_dir, instance.primary_node, result.fail_msg)
6436
      all_result = False
6437

    
6438
  return all_result
6439

    
6440

    
6441
def _ComputeDiskSize(disk_template, disks):
6442
  """Compute disk size requirements in the volume group
6443

6444
  """
6445
  # Required free disk space as a function of disk and swap space
6446
  req_size_dict = {
6447
    constants.DT_DISKLESS: None,
6448
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6449
    # 128 MB are added for drbd metadata for each disk
6450
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6451
    constants.DT_FILE: None,
6452
  }
6453

    
6454
  if disk_template not in req_size_dict:
6455
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6456
                                 " is unknown" %  disk_template)
6457

    
6458
  return req_size_dict[disk_template]
6459

    
6460

    
6461
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6462
  """Hypervisor parameter validation.
6463

6464
  This function abstract the hypervisor parameter validation to be
6465
  used in both instance create and instance modify.
6466

6467
  @type lu: L{LogicalUnit}
6468
  @param lu: the logical unit for which we check
6469
  @type nodenames: list
6470
  @param nodenames: the list of nodes on which we should check
6471
  @type hvname: string
6472
  @param hvname: the name of the hypervisor we should use
6473
  @type hvparams: dict
6474
  @param hvparams: the parameters which we need to check
6475
  @raise errors.OpPrereqError: if the parameters are not valid
6476

6477
  """
6478
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6479
                                                  hvname,
6480
                                                  hvparams)
6481
  for node in nodenames:
6482
    info = hvinfo[node]
6483
    if info.offline:
6484
      continue
6485
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6486

    
6487

    
6488
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6489
  """OS parameters validation.
6490

6491
  @type lu: L{LogicalUnit}
6492
  @param lu: the logical unit for which we check
6493
  @type required: boolean
6494
  @param required: whether the validation should fail if the OS is not
6495
      found
6496
  @type nodenames: list
6497
  @param nodenames: the list of nodes on which we should check
6498
  @type osname: string
6499
  @param osname: the name of the hypervisor we should use
6500
  @type osparams: dict
6501
  @param osparams: the parameters which we need to check
6502
  @raise errors.OpPrereqError: if the parameters are not valid
6503

6504
  """
6505
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6506
                                   [constants.OS_VALIDATE_PARAMETERS],
6507
                                   osparams)
6508
  for node, nres in result.items():
6509
    # we don't check for offline cases since this should be run only
6510
    # against the master node and/or an instance's nodes
6511
    nres.Raise("OS Parameters validation failed on node %s" % node)
6512
    if not nres.payload:
6513
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6514
                 osname, node)
6515

    
6516

    
6517
class LUCreateInstance(LogicalUnit):
6518
  """Create an instance.
6519

6520
  """
6521
  HPATH = "instance-add"
6522
  HTYPE = constants.HTYPE_INSTANCE
6523
  _OP_PARAMS = [
6524
    _PInstanceName,
6525
    ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6526
    ("start", True, _TBool),
6527
    ("wait_for_sync", True, _TBool),
6528
    ("ip_check", True, _TBool),
6529
    ("name_check", True, _TBool),
6530
    ("disks", _NoDefault, _TListOf(_TDict)),
6531
    ("nics", _NoDefault, _TListOf(_TDict)),
6532
    ("hvparams", _EmptyDict, _TDict),
6533
    ("beparams", _EmptyDict, _TDict),
6534
    ("osparams", _EmptyDict, _TDict),
6535
    ("no_install", None, _TMaybeBool),
6536
    ("os_type", None, _TMaybeString),
6537
    ("force_variant", False, _TBool),
6538
    ("source_handshake", None, _TOr(_TList, _TNone)),
6539
    ("source_x509_ca", None, _TMaybeString),
6540
    ("source_instance_name", None, _TMaybeString),
6541
    ("src_node", None, _TMaybeString),
6542
    ("src_path", None, _TMaybeString),
6543
    ("pnode", None, _TMaybeString),
6544
    ("snode", None, _TMaybeString),
6545
    ("iallocator", None, _TMaybeString),
6546
    ("hypervisor", None, _TMaybeString),
6547
    ("disk_template", _NoDefault, _CheckDiskTemplate),
6548
    ("identify_defaults", False, _TBool),
6549
    ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6550
    ("file_storage_dir", None, _TMaybeString),
6551
    ]
6552
  REQ_BGL = False
6553

    
6554
  def CheckArguments(self):
6555
    """Check arguments.
6556

6557
    """
6558
    # do not require name_check to ease forward/backward compatibility
6559
    # for tools
6560
    if self.op.no_install and self.op.start:
6561
      self.LogInfo("No-installation mode selected, disabling startup")
6562
      self.op.start = False
6563
    # validate/normalize the instance name
6564
    self.op.instance_name = \
6565
      netutils.HostInfo.NormalizeName(self.op.instance_name)
6566

    
6567
    if self.op.ip_check and not self.op.name_check:
6568
      # TODO: make the ip check more flexible and not depend on the name check
6569
      raise errors.OpPrereqError("Cannot do ip check without a name check",
6570
                                 errors.ECODE_INVAL)
6571

    
6572
    # check nics' parameter names
6573
    for nic in self.op.nics:
6574
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6575

    
6576
    # check disks. parameter names and consistent adopt/no-adopt strategy
6577
    has_adopt = has_no_adopt = False
6578
    for disk in self.op.disks:
6579
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6580
      if "adopt" in disk:
6581
        has_adopt = True
6582
      else:
6583
        has_no_adopt = True
6584
    if has_adopt and has_no_adopt:
6585
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6586
                                 errors.ECODE_INVAL)
6587
    if has_adopt:
6588
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6589
        raise errors.OpPrereqError("Disk adoption is not supported for the"
6590
                                   " '%s' disk template" %
6591
                                   self.op.disk_template,
6592
                                   errors.ECODE_INVAL)
6593
      if self.op.iallocator is not None:
6594
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6595
                                   " iallocator script", errors.ECODE_INVAL)
6596
      if self.op.mode == constants.INSTANCE_IMPORT:
6597
        raise errors.OpPrereqError("Disk adoption not allowed for"
6598
                                   " instance import", errors.ECODE_INVAL)
6599

    
6600
    self.adopt_disks = has_adopt
6601

    
6602
    # instance name verification
6603
    if self.op.name_check:
6604
      self.hostname1 = netutils.GetHostInfo(self.op.instance_name)
6605
      self.op.instance_name = self.hostname1.name
6606
      # used in CheckPrereq for ip ping check
6607
      self.check_ip = self.hostname1.ip
6608
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6609
      raise errors.OpPrereqError("Remote imports require names to be checked" %
6610
                                 errors.ECODE_INVAL)
6611
    else:
6612
      self.check_ip = None
6613

    
6614
    # file storage checks
6615
    if (self.op.file_driver and
6616
        not self.op.file_driver in constants.FILE_DRIVER):
6617
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6618
                                 self.op.file_driver, errors.ECODE_INVAL)
6619

    
6620
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6621
      raise errors.OpPrereqError("File storage directory path not absolute",
6622
                                 errors.ECODE_INVAL)
6623

    
6624
    ### Node/iallocator related checks
6625
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6626

    
6627
    if self.op.pnode is not None:
6628
      if self.op.disk_template in constants.DTS_NET_MIRROR:
6629
        if self.op.snode is None:
6630
          raise errors.OpPrereqError("The networked disk templates need"
6631
                                     " a mirror node", errors.ECODE_INVAL)
6632
      elif self.op.snode:
6633
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6634
                        " template")
6635
        self.op.snode = None
6636

    
6637
    self._cds = _GetClusterDomainSecret()
6638

    
6639
    if self.op.mode == constants.INSTANCE_IMPORT:
6640
      # On import force_variant must be True, because if we forced it at
6641
      # initial install, our only chance when importing it back is that it
6642
      # works again!
6643
      self.op.force_variant = True
6644

    
6645
      if self.op.no_install:
6646
        self.LogInfo("No-installation mode has no effect during import")
6647

    
6648
    elif self.op.mode == constants.INSTANCE_CREATE:
6649
      if self.op.os_type is None:
6650
        raise errors.OpPrereqError("No guest OS specified",
6651
                                   errors.ECODE_INVAL)
6652
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_oss:
6653
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6654
                                   " installation" % self.op.os_type,
6655
                                   errors.ECODE_STATE)
6656
      if self.op.disk_template is None:
6657
        raise errors.OpPrereqError("No disk template specified",
6658
                                   errors.ECODE_INVAL)
6659

    
6660
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6661
      # Check handshake to ensure both clusters have the same domain secret
6662
      src_handshake = self.op.source_handshake
6663
      if not src_handshake:
6664
        raise errors.OpPrereqError("Missing source handshake",
6665
                                   errors.ECODE_INVAL)
6666

    
6667
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6668
                                                           src_handshake)
6669
      if errmsg:
6670
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6671
                                   errors.ECODE_INVAL)
6672

    
6673
      # Load and check source CA
6674
      self.source_x509_ca_pem = self.op.source_x509_ca
6675
      if not self.source_x509_ca_pem:
6676
        raise errors.OpPrereqError("Missing source X509 CA",
6677
                                   errors.ECODE_INVAL)
6678

    
6679
      try:
6680
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6681
                                                    self._cds)
6682
      except OpenSSL.crypto.Error, err:
6683
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6684
                                   (err, ), errors.ECODE_INVAL)
6685

    
6686
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6687
      if errcode is not None:
6688
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6689
                                   errors.ECODE_INVAL)
6690

    
6691
      self.source_x509_ca = cert
6692

    
6693
      src_instance_name = self.op.source_instance_name
6694
      if not src_instance_name:
6695
        raise errors.OpPrereqError("Missing source instance name",
6696
                                   errors.ECODE_INVAL)
6697

    
6698
      norm_name = netutils.HostInfo.NormalizeName(src_instance_name)
6699
      self.source_instance_name = netutils.GetHostInfo(norm_name).name
6700

    
6701
    else:
6702
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6703
                                 self.op.mode, errors.ECODE_INVAL)
6704

    
6705
  def ExpandNames(self):
6706
    """ExpandNames for CreateInstance.
6707

6708
    Figure out the right locks for instance creation.
6709

6710
    """
6711
    self.needed_locks = {}
6712

    
6713
    instance_name = self.op.instance_name
6714
    # this is just a preventive check, but someone might still add this
6715
    # instance in the meantime, and creation will fail at lock-add time
6716
    if instance_name in self.cfg.GetInstanceList():
6717
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6718
                                 instance_name, errors.ECODE_EXISTS)
6719

    
6720
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6721

    
6722
    if self.op.iallocator:
6723
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6724
    else:
6725
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6726
      nodelist = [self.op.pnode]
6727
      if self.op.snode is not None:
6728
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6729
        nodelist.append(self.op.snode)
6730
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6731

    
6732
    # in case of import lock the source node too
6733
    if self.op.mode == constants.INSTANCE_IMPORT:
6734
      src_node = self.op.src_node
6735
      src_path = self.op.src_path
6736

    
6737
      if src_path is None:
6738
        self.op.src_path = src_path = self.op.instance_name
6739

    
6740
      if src_node is None:
6741
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6742
        self.op.src_node = None
6743
        if os.path.isabs(src_path):
6744
          raise errors.OpPrereqError("Importing an instance from an absolute"
6745
                                     " path requires a source node option.",
6746
                                     errors.ECODE_INVAL)
6747
      else:
6748
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6749
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6750
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6751
        if not os.path.isabs(src_path):
6752
          self.op.src_path = src_path = \
6753
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6754

    
6755
  def _RunAllocator(self):
6756
    """Run the allocator based on input opcode.
6757

6758
    """
6759
    nics = [n.ToDict() for n in self.nics]
6760
    ial = IAllocator(self.cfg, self.rpc,
6761
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6762
                     name=self.op.instance_name,
6763
                     disk_template=self.op.disk_template,
6764
                     tags=[],
6765
                     os=self.op.os_type,
6766
                     vcpus=self.be_full[constants.BE_VCPUS],
6767
                     mem_size=self.be_full[constants.BE_MEMORY],
6768
                     disks=self.disks,
6769
                     nics=nics,
6770
                     hypervisor=self.op.hypervisor,
6771
                     )
6772

    
6773
    ial.Run(self.op.iallocator)
6774

    
6775
    if not ial.success:
6776
      raise errors.OpPrereqError("Can't compute nodes using"
6777
                                 " iallocator '%s': %s" %
6778
                                 (self.op.iallocator, ial.info),
6779
                                 errors.ECODE_NORES)
6780
    if len(ial.result) != ial.required_nodes:
6781
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6782
                                 " of nodes (%s), required %s" %
6783
                                 (self.op.iallocator, len(ial.result),
6784
                                  ial.required_nodes), errors.ECODE_FAULT)
6785
    self.op.pnode = ial.result[0]
6786
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6787
                 self.op.instance_name, self.op.iallocator,
6788
                 utils.CommaJoin(ial.result))
6789
    if ial.required_nodes == 2:
6790
      self.op.snode = ial.result[1]
6791

    
6792
  def BuildHooksEnv(self):
6793
    """Build hooks env.
6794

6795
    This runs on master, primary and secondary nodes of the instance.
6796

6797
    """
6798
    env = {
6799
      "ADD_MODE": self.op.mode,
6800
      }
6801
    if self.op.mode == constants.INSTANCE_IMPORT:
6802
      env["SRC_NODE"] = self.op.src_node
6803
      env["SRC_PATH"] = self.op.src_path
6804
      env["SRC_IMAGES"] = self.src_images
6805

    
6806
    env.update(_BuildInstanceHookEnv(
6807
      name=self.op.instance_name,
6808
      primary_node=self.op.pnode,
6809
      secondary_nodes=self.secondaries,
6810
      status=self.op.start,
6811
      os_type=self.op.os_type,
6812
      memory=self.be_full[constants.BE_MEMORY],
6813
      vcpus=self.be_full[constants.BE_VCPUS],
6814
      nics=_NICListToTuple(self, self.nics),
6815
      disk_template=self.op.disk_template,
6816
      disks=[(d["size"], d["mode"]) for d in self.disks],
6817
      bep=self.be_full,
6818
      hvp=self.hv_full,
6819
      hypervisor_name=self.op.hypervisor,
6820
    ))
6821

    
6822
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6823
          self.secondaries)
6824
    return env, nl, nl
6825

    
6826
  def _ReadExportInfo(self):
6827
    """Reads the export information from disk.
6828

6829
    It will override the opcode source node and path with the actual
6830
    information, if these two were not specified before.
6831

6832
    @return: the export information
6833

6834
    """
6835
    assert self.op.mode == constants.INSTANCE_IMPORT
6836

    
6837
    src_node = self.op.src_node
6838
    src_path = self.op.src_path
6839

    
6840
    if src_node is None:
6841
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6842
      exp_list = self.rpc.call_export_list(locked_nodes)
6843
      found = False
6844
      for node in exp_list:
6845
        if exp_list[node].fail_msg:
6846
          continue
6847
        if src_path in exp_list[node].payload:
6848
          found = True
6849
          self.op.src_node = src_node = node
6850
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6851
                                                       src_path)
6852
          break
6853
      if not found:
6854
        raise errors.OpPrereqError("No export found for relative path %s" %
6855
                                    src_path, errors.ECODE_INVAL)
6856

    
6857
    _CheckNodeOnline(self, src_node)
6858
    result = self.rpc.call_export_info(src_node, src_path)
6859
    result.Raise("No export or invalid export found in dir %s" % src_path)
6860

    
6861
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6862
    if not export_info.has_section(constants.INISECT_EXP):
6863
      raise errors.ProgrammerError("Corrupted export config",
6864
                                   errors.ECODE_ENVIRON)
6865

    
6866
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6867
    if (int(ei_version) != constants.EXPORT_VERSION):
6868
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6869
                                 (ei_version, constants.EXPORT_VERSION),
6870
                                 errors.ECODE_ENVIRON)
6871
    return export_info
6872

    
6873
  def _ReadExportParams(self, einfo):
6874
    """Use export parameters as defaults.
6875

6876
    In case the opcode doesn't specify (as in override) some instance
6877
    parameters, then try to use them from the export information, if
6878
    that declares them.
6879

6880
    """
6881
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6882

    
6883
    if self.op.disk_template is None:
6884
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6885
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6886
                                          "disk_template")
6887
      else:
6888
        raise errors.OpPrereqError("No disk template specified and the export"
6889
                                   " is missing the disk_template information",
6890
                                   errors.ECODE_INVAL)
6891

    
6892
    if not self.op.disks:
6893
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6894
        disks = []
6895
        # TODO: import the disk iv_name too
6896
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6897
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6898
          disks.append({"size": disk_sz})
6899
        self.op.disks = disks
6900
      else:
6901
        raise errors.OpPrereqError("No disk info specified and the export"
6902
                                   " is missing the disk information",
6903
                                   errors.ECODE_INVAL)
6904

    
6905
    if (not self.op.nics and
6906
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6907
      nics = []
6908
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6909
        ndict = {}
6910
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6911
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6912
          ndict[name] = v
6913
        nics.append(ndict)
6914
      self.op.nics = nics
6915

    
6916
    if (self.op.hypervisor is None and
6917
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6918
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6919
    if einfo.has_section(constants.INISECT_HYP):
6920
      # use the export parameters but do not override the ones
6921
      # specified by the user
6922
      for name, value in einfo.items(constants.INISECT_HYP):
6923
        if name not in self.op.hvparams:
6924
          self.op.hvparams[name] = value
6925

    
6926
    if einfo.has_section(constants.INISECT_BEP):
6927
      # use the parameters, without overriding
6928
      for name, value in einfo.items(constants.INISECT_BEP):
6929
        if name not in self.op.beparams:
6930
          self.op.beparams[name] = value
6931
    else:
6932
      # try to read the parameters old style, from the main section
6933
      for name in constants.BES_PARAMETERS:
6934
        if (name not in self.op.beparams and
6935
            einfo.has_option(constants.INISECT_INS, name)):
6936
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6937

    
6938
    if einfo.has_section(constants.INISECT_OSP):
6939
      # use the parameters, without overriding
6940
      for name, value in einfo.items(constants.INISECT_OSP):
6941
        if name not in self.op.osparams:
6942
          self.op.osparams[name] = value
6943

    
6944
  def _RevertToDefaults(self, cluster):
6945
    """Revert the instance parameters to the default values.
6946

6947
    """
6948
    # hvparams
6949
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6950
    for name in self.op.hvparams.keys():
6951
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6952
        del self.op.hvparams[name]
6953
    # beparams
6954
    be_defs = cluster.SimpleFillBE({})
6955
    for name in self.op.beparams.keys():
6956
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6957
        del self.op.beparams[name]
6958
    # nic params
6959
    nic_defs = cluster.SimpleFillNIC({})
6960
    for nic in self.op.nics:
6961
      for name in constants.NICS_PARAMETERS:
6962
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6963
          del nic[name]
6964
    # osparams
6965
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6966
    for name in self.op.osparams.keys():
6967
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
6968
        del self.op.osparams[name]
6969

    
6970
  def CheckPrereq(self):
6971
    """Check prerequisites.
6972

6973
    """
6974
    if self.op.mode == constants.INSTANCE_IMPORT:
6975
      export_info = self._ReadExportInfo()
6976
      self._ReadExportParams(export_info)
6977

    
6978
    _CheckDiskTemplate(self.op.disk_template)
6979

    
6980
    if (not self.cfg.GetVGName() and
6981
        self.op.disk_template not in constants.DTS_NOT_LVM):
6982
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6983
                                 " instances", errors.ECODE_STATE)
6984

    
6985
    if self.op.hypervisor is None:
6986
      self.op.hypervisor = self.cfg.GetHypervisorType()
6987

    
6988
    cluster = self.cfg.GetClusterInfo()
6989
    enabled_hvs = cluster.enabled_hypervisors
6990
    if self.op.hypervisor not in enabled_hvs:
6991
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6992
                                 " cluster (%s)" % (self.op.hypervisor,
6993
                                  ",".join(enabled_hvs)),
6994
                                 errors.ECODE_STATE)
6995

    
6996
    # check hypervisor parameter syntax (locally)
6997
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6998
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6999
                                      self.op.hvparams)
7000
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7001
    hv_type.CheckParameterSyntax(filled_hvp)
7002
    self.hv_full = filled_hvp
7003
    # check that we don't specify global parameters on an instance
7004
    _CheckGlobalHvParams(self.op.hvparams)
7005

    
7006
    # fill and remember the beparams dict
7007
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7008
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7009

    
7010
    # build os parameters
7011
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7012

    
7013
    # now that hvp/bep are in final format, let's reset to defaults,
7014
    # if told to do so
7015
    if self.op.identify_defaults:
7016
      self._RevertToDefaults(cluster)
7017

    
7018
    # NIC buildup
7019
    self.nics = []
7020
    for idx, nic in enumerate(self.op.nics):
7021
      nic_mode_req = nic.get("mode", None)
7022
      nic_mode = nic_mode_req
7023
      if nic_mode is None:
7024
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7025

    
7026
      # in routed mode, for the first nic, the default ip is 'auto'
7027
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7028
        default_ip_mode = constants.VALUE_AUTO
7029
      else:
7030
        default_ip_mode = constants.VALUE_NONE
7031

    
7032
      # ip validity checks
7033
      ip = nic.get("ip", default_ip_mode)
7034
      if ip is None or ip.lower() == constants.VALUE_NONE:
7035
        nic_ip = None
7036
      elif ip.lower() == constants.VALUE_AUTO:
7037
        if not self.op.name_check:
7038
          raise errors.OpPrereqError("IP address set to auto but name checks"
7039
                                     " have been skipped. Aborting.",
7040
                                     errors.ECODE_INVAL)
7041
        nic_ip = self.hostname1.ip
7042
      else:
7043
        if not netutils.IsValidIP4(ip):
7044
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
7045
                                     " like a valid IP" % ip,
7046
                                     errors.ECODE_INVAL)
7047
        nic_ip = ip
7048

    
7049
      # TODO: check the ip address for uniqueness
7050
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7051
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7052
                                   errors.ECODE_INVAL)
7053

    
7054
      # MAC address verification
7055
      mac = nic.get("mac", constants.VALUE_AUTO)
7056
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7057
        mac = utils.NormalizeAndValidateMac(mac)
7058

    
7059
        try:
7060
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7061
        except errors.ReservationError:
7062
          raise errors.OpPrereqError("MAC address %s already in use"
7063
                                     " in cluster" % mac,
7064
                                     errors.ECODE_NOTUNIQUE)
7065

    
7066
      # bridge verification
7067
      bridge = nic.get("bridge", None)
7068
      link = nic.get("link", None)
7069
      if bridge and link:
7070
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7071
                                   " at the same time", errors.ECODE_INVAL)
7072
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7073
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7074
                                   errors.ECODE_INVAL)
7075
      elif bridge:
7076
        link = bridge
7077

    
7078
      nicparams = {}
7079
      if nic_mode_req:
7080
        nicparams[constants.NIC_MODE] = nic_mode_req
7081
      if link:
7082
        nicparams[constants.NIC_LINK] = link
7083

    
7084
      check_params = cluster.SimpleFillNIC(nicparams)
7085
      objects.NIC.CheckParameterSyntax(check_params)
7086
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7087

    
7088
    # disk checks/pre-build
7089
    self.disks = []
7090
    for disk in self.op.disks:
7091
      mode = disk.get("mode", constants.DISK_RDWR)
7092
      if mode not in constants.DISK_ACCESS_SET:
7093
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7094
                                   mode, errors.ECODE_INVAL)
7095
      size = disk.get("size", None)
7096
      if size is None:
7097
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7098
      try:
7099
        size = int(size)
7100
      except (TypeError, ValueError):
7101
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7102
                                   errors.ECODE_INVAL)
7103
      new_disk = {"size": size, "mode": mode}
7104
      if "adopt" in disk:
7105
        new_disk["adopt"] = disk["adopt"]
7106
      self.disks.append(new_disk)
7107

    
7108
    if self.op.mode == constants.INSTANCE_IMPORT:
7109

    
7110
      # Check that the new instance doesn't have less disks than the export
7111
      instance_disks = len(self.disks)
7112
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7113
      if instance_disks < export_disks:
7114
        raise errors.OpPrereqError("Not enough disks to import."
7115
                                   " (instance: %d, export: %d)" %
7116
                                   (instance_disks, export_disks),
7117
                                   errors.ECODE_INVAL)
7118

    
7119
      disk_images = []
7120
      for idx in range(export_disks):
7121
        option = 'disk%d_dump' % idx
7122
        if export_info.has_option(constants.INISECT_INS, option):
7123
          # FIXME: are the old os-es, disk sizes, etc. useful?
7124
          export_name = export_info.get(constants.INISECT_INS, option)
7125
          image = utils.PathJoin(self.op.src_path, export_name)
7126
          disk_images.append(image)
7127
        else:
7128
          disk_images.append(False)
7129

    
7130
      self.src_images = disk_images
7131

    
7132
      old_name = export_info.get(constants.INISECT_INS, 'name')
7133
      try:
7134
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7135
      except (TypeError, ValueError), err:
7136
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7137
                                   " an integer: %s" % str(err),
7138
                                   errors.ECODE_STATE)
7139
      if self.op.instance_name == old_name:
7140
        for idx, nic in enumerate(self.nics):
7141
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7142
            nic_mac_ini = 'nic%d_mac' % idx
7143
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7144

    
7145
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7146

    
7147
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7148
    if self.op.ip_check:
7149
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7150
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7151
                                   (self.check_ip, self.op.instance_name),
7152
                                   errors.ECODE_NOTUNIQUE)
7153

    
7154
    #### mac address generation
7155
    # By generating here the mac address both the allocator and the hooks get
7156
    # the real final mac address rather than the 'auto' or 'generate' value.
7157
    # There is a race condition between the generation and the instance object
7158
    # creation, which means that we know the mac is valid now, but we're not
7159
    # sure it will be when we actually add the instance. If things go bad
7160
    # adding the instance will abort because of a duplicate mac, and the
7161
    # creation job will fail.
7162
    for nic in self.nics:
7163
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7164
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7165

    
7166
    #### allocator run
7167

    
7168
    if self.op.iallocator is not None:
7169
      self._RunAllocator()
7170

    
7171
    #### node related checks
7172

    
7173
    # check primary node
7174
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7175
    assert self.pnode is not None, \
7176
      "Cannot retrieve locked node %s" % self.op.pnode
7177
    if pnode.offline:
7178
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7179
                                 pnode.name, errors.ECODE_STATE)
7180
    if pnode.drained:
7181
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7182
                                 pnode.name, errors.ECODE_STATE)
7183

    
7184
    self.secondaries = []
7185

    
7186
    # mirror node verification
7187
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7188
      if self.op.snode == pnode.name:
7189
        raise errors.OpPrereqError("The secondary node cannot be the"
7190
                                   " primary node.", errors.ECODE_INVAL)
7191
      _CheckNodeOnline(self, self.op.snode)
7192
      _CheckNodeNotDrained(self, self.op.snode)
7193
      self.secondaries.append(self.op.snode)
7194

    
7195
    nodenames = [pnode.name] + self.secondaries
7196

    
7197
    req_size = _ComputeDiskSize(self.op.disk_template,
7198
                                self.disks)
7199

    
7200
    # Check lv size requirements, if not adopting
7201
    if req_size is not None and not self.adopt_disks:
7202
      _CheckNodesFreeDisk(self, nodenames, req_size)
7203

    
7204
    if self.adopt_disks: # instead, we must check the adoption data
7205
      all_lvs = set([i["adopt"] for i in self.disks])
7206
      if len(all_lvs) != len(self.disks):
7207
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7208
                                   errors.ECODE_INVAL)
7209
      for lv_name in all_lvs:
7210
        try:
7211
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7212
        except errors.ReservationError:
7213
          raise errors.OpPrereqError("LV named %s used by another instance" %
7214
                                     lv_name, errors.ECODE_NOTUNIQUE)
7215

    
7216
      node_lvs = self.rpc.call_lv_list([pnode.name],
7217
                                       self.cfg.GetVGName())[pnode.name]
7218
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7219
      node_lvs = node_lvs.payload
7220
      delta = all_lvs.difference(node_lvs.keys())
7221
      if delta:
7222
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7223
                                   utils.CommaJoin(delta),
7224
                                   errors.ECODE_INVAL)
7225
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7226
      if online_lvs:
7227
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7228
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7229
                                   errors.ECODE_STATE)
7230
      # update the size of disk based on what is found
7231
      for dsk in self.disks:
7232
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7233

    
7234
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7235

    
7236
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7237
    # check OS parameters (remotely)
7238
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7239

    
7240
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7241

    
7242
    # memory check on primary node
7243
    if self.op.start:
7244
      _CheckNodeFreeMemory(self, self.pnode.name,
7245
                           "creating instance %s" % self.op.instance_name,
7246
                           self.be_full[constants.BE_MEMORY],
7247
                           self.op.hypervisor)
7248

    
7249
    self.dry_run_result = list(nodenames)
7250

    
7251
  def Exec(self, feedback_fn):
7252
    """Create and add the instance to the cluster.
7253

7254
    """
7255
    instance = self.op.instance_name
7256
    pnode_name = self.pnode.name
7257

    
7258
    ht_kind = self.op.hypervisor
7259
    if ht_kind in constants.HTS_REQ_PORT:
7260
      network_port = self.cfg.AllocatePort()
7261
    else:
7262
      network_port = None
7263

    
7264
    if constants.ENABLE_FILE_STORAGE:
7265
      # this is needed because os.path.join does not accept None arguments
7266
      if self.op.file_storage_dir is None:
7267
        string_file_storage_dir = ""
7268
      else:
7269
        string_file_storage_dir = self.op.file_storage_dir
7270

    
7271
      # build the full file storage dir path
7272
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7273
                                        string_file_storage_dir, instance)
7274
    else:
7275
      file_storage_dir = ""
7276

    
7277
    disks = _GenerateDiskTemplate(self,
7278
                                  self.op.disk_template,
7279
                                  instance, pnode_name,
7280
                                  self.secondaries,
7281
                                  self.disks,
7282
                                  file_storage_dir,
7283
                                  self.op.file_driver,
7284
                                  0)
7285

    
7286
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7287
                            primary_node=pnode_name,
7288
                            nics=self.nics, disks=disks,
7289
                            disk_template=self.op.disk_template,
7290
                            admin_up=False,
7291
                            network_port=network_port,
7292
                            beparams=self.op.beparams,
7293
                            hvparams=self.op.hvparams,
7294
                            hypervisor=self.op.hypervisor,
7295
                            osparams=self.op.osparams,
7296
                            )
7297

    
7298
    if self.adopt_disks:
7299
      # rename LVs to the newly-generated names; we need to construct
7300
      # 'fake' LV disks with the old data, plus the new unique_id
7301
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7302
      rename_to = []
7303
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7304
        rename_to.append(t_dsk.logical_id)
7305
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7306
        self.cfg.SetDiskID(t_dsk, pnode_name)
7307
      result = self.rpc.call_blockdev_rename(pnode_name,
7308
                                             zip(tmp_disks, rename_to))
7309
      result.Raise("Failed to rename adoped LVs")
7310
    else:
7311
      feedback_fn("* creating instance disks...")
7312
      try:
7313
        _CreateDisks(self, iobj)
7314
      except errors.OpExecError:
7315
        self.LogWarning("Device creation failed, reverting...")
7316
        try:
7317
          _RemoveDisks(self, iobj)
7318
        finally:
7319
          self.cfg.ReleaseDRBDMinors(instance)
7320
          raise
7321

    
7322
    feedback_fn("adding instance %s to cluster config" % instance)
7323

    
7324
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7325

    
7326
    # Declare that we don't want to remove the instance lock anymore, as we've
7327
    # added the instance to the config
7328
    del self.remove_locks[locking.LEVEL_INSTANCE]
7329
    # Unlock all the nodes
7330
    if self.op.mode == constants.INSTANCE_IMPORT:
7331
      nodes_keep = [self.op.src_node]
7332
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7333
                       if node != self.op.src_node]
7334
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7335
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7336
    else:
7337
      self.context.glm.release(locking.LEVEL_NODE)
7338
      del self.acquired_locks[locking.LEVEL_NODE]
7339

    
7340
    if self.op.wait_for_sync:
7341
      disk_abort = not _WaitForSync(self, iobj)
7342
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7343
      # make sure the disks are not degraded (still sync-ing is ok)
7344
      time.sleep(15)
7345
      feedback_fn("* checking mirrors status")
7346
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7347
    else:
7348
      disk_abort = False
7349

    
7350
    if disk_abort:
7351
      _RemoveDisks(self, iobj)
7352
      self.cfg.RemoveInstance(iobj.name)
7353
      # Make sure the instance lock gets removed
7354
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7355
      raise errors.OpExecError("There are some degraded disks for"
7356
                               " this instance")
7357

    
7358
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7359
      if self.op.mode == constants.INSTANCE_CREATE:
7360
        if not self.op.no_install:
7361
          feedback_fn("* running the instance OS create scripts...")
7362
          # FIXME: pass debug option from opcode to backend
7363
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7364
                                                 self.op.debug_level)
7365
          result.Raise("Could not add os for instance %s"
7366
                       " on node %s" % (instance, pnode_name))
7367

    
7368
      elif self.op.mode == constants.INSTANCE_IMPORT:
7369
        feedback_fn("* running the instance OS import scripts...")
7370

    
7371
        transfers = []
7372

    
7373
        for idx, image in enumerate(self.src_images):
7374
          if not image:
7375
            continue
7376

    
7377
          # FIXME: pass debug option from opcode to backend
7378
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7379
                                             constants.IEIO_FILE, (image, ),
7380
                                             constants.IEIO_SCRIPT,
7381
                                             (iobj.disks[idx], idx),
7382
                                             None)
7383
          transfers.append(dt)
7384

    
7385
        import_result = \
7386
          masterd.instance.TransferInstanceData(self, feedback_fn,
7387
                                                self.op.src_node, pnode_name,
7388
                                                self.pnode.secondary_ip,
7389
                                                iobj, transfers)
7390
        if not compat.all(import_result):
7391
          self.LogWarning("Some disks for instance %s on node %s were not"
7392
                          " imported successfully" % (instance, pnode_name))
7393

    
7394
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7395
        feedback_fn("* preparing remote import...")
7396
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
7397
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7398

    
7399
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7400
                                                     self.source_x509_ca,
7401
                                                     self._cds, timeouts)
7402
        if not compat.all(disk_results):
7403
          # TODO: Should the instance still be started, even if some disks
7404
          # failed to import (valid for local imports, too)?
7405
          self.LogWarning("Some disks for instance %s on node %s were not"
7406
                          " imported successfully" % (instance, pnode_name))
7407

    
7408
        # Run rename script on newly imported instance
7409
        assert iobj.name == instance
7410
        feedback_fn("Running rename script for %s" % instance)
7411
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7412
                                                   self.source_instance_name,
7413
                                                   self.op.debug_level)
7414
        if result.fail_msg:
7415
          self.LogWarning("Failed to run rename script for %s on node"
7416
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7417

    
7418
      else:
7419
        # also checked in the prereq part
7420
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7421
                                     % self.op.mode)
7422

    
7423
    if self.op.start:
7424
      iobj.admin_up = True
7425
      self.cfg.Update(iobj, feedback_fn)
7426
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7427
      feedback_fn("* starting instance...")
7428
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7429
      result.Raise("Could not start instance")
7430

    
7431
    return list(iobj.all_nodes)
7432

    
7433

    
7434
class LUConnectConsole(NoHooksLU):
7435
  """Connect to an instance's console.
7436

7437
  This is somewhat special in that it returns the command line that
7438
  you need to run on the master node in order to connect to the
7439
  console.
7440

7441
  """
7442
  _OP_PARAMS = [
7443
    _PInstanceName
7444
    ]
7445
  REQ_BGL = False
7446

    
7447
  def ExpandNames(self):
7448
    self._ExpandAndLockInstance()
7449

    
7450
  def CheckPrereq(self):
7451
    """Check prerequisites.
7452

7453
    This checks that the instance is in the cluster.
7454

7455
    """
7456
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7457
    assert self.instance is not None, \
7458
      "Cannot retrieve locked instance %s" % self.op.instance_name
7459
    _CheckNodeOnline(self, self.instance.primary_node)
7460

    
7461
  def Exec(self, feedback_fn):
7462
    """Connect to the console of an instance
7463

7464
    """
7465
    instance = self.instance
7466
    node = instance.primary_node
7467

    
7468
    node_insts = self.rpc.call_instance_list([node],
7469
                                             [instance.hypervisor])[node]
7470
    node_insts.Raise("Can't get node information from %s" % node)
7471

    
7472
    if instance.name not in node_insts.payload:
7473
      raise errors.OpExecError("Instance %s is not running." % instance.name)
7474

    
7475
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7476

    
7477
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7478
    cluster = self.cfg.GetClusterInfo()
7479
    # beparams and hvparams are passed separately, to avoid editing the
7480
    # instance and then saving the defaults in the instance itself.
7481
    hvparams = cluster.FillHV(instance)
7482
    beparams = cluster.FillBE(instance)
7483
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7484

    
7485
    # build ssh cmdline
7486
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7487

    
7488

    
7489
class LUReplaceDisks(LogicalUnit):
7490
  """Replace the disks of an instance.
7491

7492
  """
7493
  HPATH = "mirrors-replace"
7494
  HTYPE = constants.HTYPE_INSTANCE
7495
  _OP_PARAMS = [
7496
    _PInstanceName,
7497
    ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7498
    ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7499
    ("remote_node", None, _TMaybeString),
7500
    ("iallocator", None, _TMaybeString),
7501
    ("early_release", False, _TBool),
7502
    ]
7503
  REQ_BGL = False
7504

    
7505
  def CheckArguments(self):
7506
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7507
                                  self.op.iallocator)
7508

    
7509
  def ExpandNames(self):
7510
    self._ExpandAndLockInstance()
7511

    
7512
    if self.op.iallocator is not None:
7513
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7514

    
7515
    elif self.op.remote_node is not None:
7516
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7517
      self.op.remote_node = remote_node
7518

    
7519
      # Warning: do not remove the locking of the new secondary here
7520
      # unless DRBD8.AddChildren is changed to work in parallel;
7521
      # currently it doesn't since parallel invocations of
7522
      # FindUnusedMinor will conflict
7523
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7524
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7525

    
7526
    else:
7527
      self.needed_locks[locking.LEVEL_NODE] = []
7528
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7529

    
7530
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7531
                                   self.op.iallocator, self.op.remote_node,
7532
                                   self.op.disks, False, self.op.early_release)
7533

    
7534
    self.tasklets = [self.replacer]
7535

    
7536
  def DeclareLocks(self, level):
7537
    # If we're not already locking all nodes in the set we have to declare the
7538
    # instance's primary/secondary nodes.
7539
    if (level == locking.LEVEL_NODE and
7540
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7541
      self._LockInstancesNodes()
7542

    
7543
  def BuildHooksEnv(self):
7544
    """Build hooks env.
7545

7546
    This runs on the master, the primary and all the secondaries.
7547

7548
    """
7549
    instance = self.replacer.instance
7550
    env = {
7551
      "MODE": self.op.mode,
7552
      "NEW_SECONDARY": self.op.remote_node,
7553
      "OLD_SECONDARY": instance.secondary_nodes[0],
7554
      }
7555
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7556
    nl = [
7557
      self.cfg.GetMasterNode(),
7558
      instance.primary_node,
7559
      ]
7560
    if self.op.remote_node is not None:
7561
      nl.append(self.op.remote_node)
7562
    return env, nl, nl
7563

    
7564

    
7565
class TLReplaceDisks(Tasklet):
7566
  """Replaces disks for an instance.
7567

7568
  Note: Locking is not within the scope of this class.
7569

7570
  """
7571
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7572
               disks, delay_iallocator, early_release):
7573
    """Initializes this class.
7574

7575
    """
7576
    Tasklet.__init__(self, lu)
7577

    
7578
    # Parameters
7579
    self.instance_name = instance_name
7580
    self.mode = mode
7581
    self.iallocator_name = iallocator_name
7582
    self.remote_node = remote_node
7583
    self.disks = disks
7584
    self.delay_iallocator = delay_iallocator
7585
    self.early_release = early_release
7586

    
7587
    # Runtime data
7588
    self.instance = None
7589
    self.new_node = None
7590
    self.target_node = None
7591
    self.other_node = None
7592
    self.remote_node_info = None
7593
    self.node_secondary_ip = None
7594

    
7595
  @staticmethod
7596
  def CheckArguments(mode, remote_node, iallocator):
7597
    """Helper function for users of this class.
7598

7599
    """
7600
    # check for valid parameter combination
7601
    if mode == constants.REPLACE_DISK_CHG:
7602
      if remote_node is None and iallocator is None:
7603
        raise errors.OpPrereqError("When changing the secondary either an"
7604
                                   " iallocator script must be used or the"
7605
                                   " new node given", errors.ECODE_INVAL)
7606

    
7607
      if remote_node is not None and iallocator is not None:
7608
        raise errors.OpPrereqError("Give either the iallocator or the new"
7609
                                   " secondary, not both", errors.ECODE_INVAL)
7610

    
7611
    elif remote_node is not None or iallocator is not None:
7612
      # Not replacing the secondary
7613
      raise errors.OpPrereqError("The iallocator and new node options can"
7614
                                 " only be used when changing the"
7615
                                 " secondary node", errors.ECODE_INVAL)
7616

    
7617
  @staticmethod
7618
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7619
    """Compute a new secondary node using an IAllocator.
7620

7621
    """
7622
    ial = IAllocator(lu.cfg, lu.rpc,
7623
                     mode=constants.IALLOCATOR_MODE_RELOC,
7624
                     name=instance_name,
7625
                     relocate_from=relocate_from)
7626

    
7627
    ial.Run(iallocator_name)
7628

    
7629
    if not ial.success:
7630
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7631
                                 " %s" % (iallocator_name, ial.info),
7632
                                 errors.ECODE_NORES)
7633

    
7634
    if len(ial.result) != ial.required_nodes:
7635
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7636
                                 " of nodes (%s), required %s" %
7637
                                 (iallocator_name,
7638
                                  len(ial.result), ial.required_nodes),
7639
                                 errors.ECODE_FAULT)
7640

    
7641
    remote_node_name = ial.result[0]
7642

    
7643
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7644
               instance_name, remote_node_name)
7645

    
7646
    return remote_node_name
7647

    
7648
  def _FindFaultyDisks(self, node_name):
7649
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7650
                                    node_name, True)
7651

    
7652
  def CheckPrereq(self):
7653
    """Check prerequisites.
7654

7655
    This checks that the instance is in the cluster.
7656

7657
    """
7658
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7659
    assert instance is not None, \
7660
      "Cannot retrieve locked instance %s" % self.instance_name
7661

    
7662
    if instance.disk_template != constants.DT_DRBD8:
7663
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7664
                                 " instances", errors.ECODE_INVAL)
7665

    
7666
    if len(instance.secondary_nodes) != 1:
7667
      raise errors.OpPrereqError("The instance has a strange layout,"
7668
                                 " expected one secondary but found %d" %
7669
                                 len(instance.secondary_nodes),
7670
                                 errors.ECODE_FAULT)
7671

    
7672
    if not self.delay_iallocator:
7673
      self._CheckPrereq2()
7674

    
7675
  def _CheckPrereq2(self):
7676
    """Check prerequisites, second part.
7677

7678
    This function should always be part of CheckPrereq. It was separated and is
7679
    now called from Exec because during node evacuation iallocator was only
7680
    called with an unmodified cluster model, not taking planned changes into
7681
    account.
7682

7683
    """
7684
    instance = self.instance
7685
    secondary_node = instance.secondary_nodes[0]
7686

    
7687
    if self.iallocator_name is None:
7688
      remote_node = self.remote_node
7689
    else:
7690
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7691
                                       instance.name, instance.secondary_nodes)
7692

    
7693
    if remote_node is not None:
7694
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7695
      assert self.remote_node_info is not None, \
7696
        "Cannot retrieve locked node %s" % remote_node
7697
    else:
7698
      self.remote_node_info = None
7699

    
7700
    if remote_node == self.instance.primary_node:
7701
      raise errors.OpPrereqError("The specified node is the primary node of"
7702
                                 " the instance.", errors.ECODE_INVAL)
7703

    
7704
    if remote_node == secondary_node:
7705
      raise errors.OpPrereqError("The specified node is already the"
7706
                                 " secondary node of the instance.",
7707
                                 errors.ECODE_INVAL)
7708

    
7709
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7710
                                    constants.REPLACE_DISK_CHG):
7711
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7712
                                 errors.ECODE_INVAL)
7713

    
7714
    if self.mode == constants.REPLACE_DISK_AUTO:
7715
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7716
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7717

    
7718
      if faulty_primary and faulty_secondary:
7719
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7720
                                   " one node and can not be repaired"
7721
                                   " automatically" % self.instance_name,
7722
                                   errors.ECODE_STATE)
7723

    
7724
      if faulty_primary:
7725
        self.disks = faulty_primary
7726
        self.target_node = instance.primary_node
7727
        self.other_node = secondary_node
7728
        check_nodes = [self.target_node, self.other_node]
7729
      elif faulty_secondary:
7730
        self.disks = faulty_secondary
7731
        self.target_node = secondary_node
7732
        self.other_node = instance.primary_node
7733
        check_nodes = [self.target_node, self.other_node]
7734
      else:
7735
        self.disks = []
7736
        check_nodes = []
7737

    
7738
    else:
7739
      # Non-automatic modes
7740
      if self.mode == constants.REPLACE_DISK_PRI:
7741
        self.target_node = instance.primary_node
7742
        self.other_node = secondary_node
7743
        check_nodes = [self.target_node, self.other_node]
7744

    
7745
      elif self.mode == constants.REPLACE_DISK_SEC:
7746
        self.target_node = secondary_node
7747
        self.other_node = instance.primary_node
7748
        check_nodes = [self.target_node, self.other_node]
7749

    
7750
      elif self.mode == constants.REPLACE_DISK_CHG:
7751
        self.new_node = remote_node
7752
        self.other_node = instance.primary_node
7753
        self.target_node = secondary_node
7754
        check_nodes = [self.new_node, self.other_node]
7755

    
7756
        _CheckNodeNotDrained(self.lu, remote_node)
7757

    
7758
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7759
        assert old_node_info is not None
7760
        if old_node_info.offline and not self.early_release:
7761
          # doesn't make sense to delay the release
7762
          self.early_release = True
7763
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7764
                          " early-release mode", secondary_node)
7765

    
7766
      else:
7767
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7768
                                     self.mode)
7769

    
7770
      # If not specified all disks should be replaced
7771
      if not self.disks:
7772
        self.disks = range(len(self.instance.disks))
7773

    
7774
    for node in check_nodes:
7775
      _CheckNodeOnline(self.lu, node)
7776

    
7777
    # Check whether disks are valid
7778
    for disk_idx in self.disks:
7779
      instance.FindDisk(disk_idx)
7780

    
7781
    # Get secondary node IP addresses
7782
    node_2nd_ip = {}
7783

    
7784
    for node_name in [self.target_node, self.other_node, self.new_node]:
7785
      if node_name is not None:
7786
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7787

    
7788
    self.node_secondary_ip = node_2nd_ip
7789

    
7790
  def Exec(self, feedback_fn):
7791
    """Execute disk replacement.
7792

7793
    This dispatches the disk replacement to the appropriate handler.
7794

7795
    """
7796
    if self.delay_iallocator:
7797
      self._CheckPrereq2()
7798

    
7799
    if not self.disks:
7800
      feedback_fn("No disks need replacement")
7801
      return
7802

    
7803
    feedback_fn("Replacing disk(s) %s for %s" %
7804
                (utils.CommaJoin(self.disks), self.instance.name))
7805

    
7806
    activate_disks = (not self.instance.admin_up)
7807

    
7808
    # Activate the instance disks if we're replacing them on a down instance
7809
    if activate_disks:
7810
      _StartInstanceDisks(self.lu, self.instance, True)
7811

    
7812
    try:
7813
      # Should we replace the secondary node?
7814
      if self.new_node is not None:
7815
        fn = self._ExecDrbd8Secondary
7816
      else:
7817
        fn = self._ExecDrbd8DiskOnly
7818

    
7819
      return fn(feedback_fn)
7820

    
7821
    finally:
7822
      # Deactivate the instance disks if we're replacing them on a
7823
      # down instance
7824
      if activate_disks:
7825
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7826

    
7827
  def _CheckVolumeGroup(self, nodes):
7828
    self.lu.LogInfo("Checking volume groups")
7829

    
7830
    vgname = self.cfg.GetVGName()
7831

    
7832
    # Make sure volume group exists on all involved nodes
7833
    results = self.rpc.call_vg_list(nodes)
7834
    if not results:
7835
      raise errors.OpExecError("Can't list volume groups on the nodes")
7836

    
7837
    for node in nodes:
7838
      res = results[node]
7839
      res.Raise("Error checking node %s" % node)
7840
      if vgname not in res.payload:
7841
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7842
                                 (vgname, node))
7843

    
7844
  def _CheckDisksExistence(self, nodes):
7845
    # Check disk existence
7846
    for idx, dev in enumerate(self.instance.disks):
7847
      if idx not in self.disks:
7848
        continue
7849

    
7850
      for node in nodes:
7851
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7852
        self.cfg.SetDiskID(dev, node)
7853

    
7854
        result = self.rpc.call_blockdev_find(node, dev)
7855

    
7856
        msg = result.fail_msg
7857
        if msg or not result.payload:
7858
          if not msg:
7859
            msg = "disk not found"
7860
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7861
                                   (idx, node, msg))
7862

    
7863
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7864
    for idx, dev in enumerate(self.instance.disks):
7865
      if idx not in self.disks:
7866
        continue
7867

    
7868
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7869
                      (idx, node_name))
7870

    
7871
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7872
                                   ldisk=ldisk):
7873
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7874
                                 " replace disks for instance %s" %
7875
                                 (node_name, self.instance.name))
7876

    
7877
  def _CreateNewStorage(self, node_name):
7878
    vgname = self.cfg.GetVGName()
7879
    iv_names = {}
7880

    
7881
    for idx, dev in enumerate(self.instance.disks):
7882
      if idx not in self.disks:
7883
        continue
7884

    
7885
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7886

    
7887
      self.cfg.SetDiskID(dev, node_name)
7888

    
7889
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7890
      names = _GenerateUniqueNames(self.lu, lv_names)
7891

    
7892
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7893
                             logical_id=(vgname, names[0]))
7894
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7895
                             logical_id=(vgname, names[1]))
7896

    
7897
      new_lvs = [lv_data, lv_meta]
7898
      old_lvs = dev.children
7899
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7900

    
7901
      # we pass force_create=True to force the LVM creation
7902
      for new_lv in new_lvs:
7903
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7904
                        _GetInstanceInfoText(self.instance), False)
7905

    
7906
    return iv_names
7907

    
7908
  def _CheckDevices(self, node_name, iv_names):
7909
    for name, (dev, _, _) in iv_names.iteritems():
7910
      self.cfg.SetDiskID(dev, node_name)
7911

    
7912
      result = self.rpc.call_blockdev_find(node_name, dev)
7913

    
7914
      msg = result.fail_msg
7915
      if msg or not result.payload:
7916
        if not msg:
7917
          msg = "disk not found"
7918
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7919
                                 (name, msg))
7920

    
7921
      if result.payload.is_degraded:
7922
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7923

    
7924
  def _RemoveOldStorage(self, node_name, iv_names):
7925
    for name, (_, old_lvs, _) in iv_names.iteritems():
7926
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7927

    
7928
      for lv in old_lvs:
7929
        self.cfg.SetDiskID(lv, node_name)
7930

    
7931
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7932
        if msg:
7933
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7934
                             hint="remove unused LVs manually")
7935

    
7936
  def _ReleaseNodeLock(self, node_name):
7937
    """Releases the lock for a given node."""
7938
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7939

    
7940
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7941
    """Replace a disk on the primary or secondary for DRBD 8.
7942

7943
    The algorithm for replace is quite complicated:
7944

7945
      1. for each disk to be replaced:
7946

7947
        1. create new LVs on the target node with unique names
7948
        1. detach old LVs from the drbd device
7949
        1. rename old LVs to name_replaced.<time_t>
7950
        1. rename new LVs to old LVs
7951
        1. attach the new LVs (with the old names now) to the drbd device
7952

7953
      1. wait for sync across all devices
7954

7955
      1. for each modified disk:
7956

7957
        1. remove old LVs (which have the name name_replaces.<time_t>)
7958

7959
    Failures are not very well handled.
7960

7961
    """
7962
    steps_total = 6
7963

    
7964
    # Step: check device activation
7965
    self.lu.LogStep(1, steps_total, "Check device existence")
7966
    self._CheckDisksExistence([self.other_node, self.target_node])
7967
    self._CheckVolumeGroup([self.target_node, self.other_node])
7968

    
7969
    # Step: check other node consistency
7970
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7971
    self._CheckDisksConsistency(self.other_node,
7972
                                self.other_node == self.instance.primary_node,
7973
                                False)
7974

    
7975
    # Step: create new storage
7976
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7977
    iv_names = self._CreateNewStorage(self.target_node)
7978

    
7979
    # Step: for each lv, detach+rename*2+attach
7980
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7981
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7982
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7983

    
7984
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7985
                                                     old_lvs)
7986
      result.Raise("Can't detach drbd from local storage on node"
7987
                   " %s for device %s" % (self.target_node, dev.iv_name))
7988
      #dev.children = []
7989
      #cfg.Update(instance)
7990

    
7991
      # ok, we created the new LVs, so now we know we have the needed
7992
      # storage; as such, we proceed on the target node to rename
7993
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7994
      # using the assumption that logical_id == physical_id (which in
7995
      # turn is the unique_id on that node)
7996

    
7997
      # FIXME(iustin): use a better name for the replaced LVs
7998
      temp_suffix = int(time.time())
7999
      ren_fn = lambda d, suff: (d.physical_id[0],
8000
                                d.physical_id[1] + "_replaced-%s" % suff)
8001

    
8002
      # Build the rename list based on what LVs exist on the node
8003
      rename_old_to_new = []
8004
      for to_ren in old_lvs:
8005
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8006
        if not result.fail_msg and result.payload:
8007
          # device exists
8008
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8009

    
8010
      self.lu.LogInfo("Renaming the old LVs on the target node")
8011
      result = self.rpc.call_blockdev_rename(self.target_node,
8012
                                             rename_old_to_new)
8013
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8014

    
8015
      # Now we rename the new LVs to the old LVs
8016
      self.lu.LogInfo("Renaming the new LVs on the target node")
8017
      rename_new_to_old = [(new, old.physical_id)
8018
                           for old, new in zip(old_lvs, new_lvs)]
8019
      result = self.rpc.call_blockdev_rename(self.target_node,
8020
                                             rename_new_to_old)
8021
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8022

    
8023
      for old, new in zip(old_lvs, new_lvs):
8024
        new.logical_id = old.logical_id
8025
        self.cfg.SetDiskID(new, self.target_node)
8026

    
8027
      for disk in old_lvs:
8028
        disk.logical_id = ren_fn(disk, temp_suffix)
8029
        self.cfg.SetDiskID(disk, self.target_node)
8030

    
8031
      # Now that the new lvs have the old name, we can add them to the device
8032
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8033
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8034
                                                  new_lvs)
8035
      msg = result.fail_msg
8036
      if msg:
8037
        for new_lv in new_lvs:
8038
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8039
                                               new_lv).fail_msg
8040
          if msg2:
8041
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8042
                               hint=("cleanup manually the unused logical"
8043
                                     "volumes"))
8044
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8045

    
8046
      dev.children = new_lvs
8047

    
8048
      self.cfg.Update(self.instance, feedback_fn)
8049

    
8050
    cstep = 5
8051
    if self.early_release:
8052
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8053
      cstep += 1
8054
      self._RemoveOldStorage(self.target_node, iv_names)
8055
      # WARNING: we release both node locks here, do not do other RPCs
8056
      # than WaitForSync to the primary node
8057
      self._ReleaseNodeLock([self.target_node, self.other_node])
8058

    
8059
    # Wait for sync
8060
    # This can fail as the old devices are degraded and _WaitForSync
8061
    # does a combined result over all disks, so we don't check its return value
8062
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8063
    cstep += 1
8064
    _WaitForSync(self.lu, self.instance)
8065

    
8066
    # Check all devices manually
8067
    self._CheckDevices(self.instance.primary_node, iv_names)
8068

    
8069
    # Step: remove old storage
8070
    if not self.early_release:
8071
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8072
      cstep += 1
8073
      self._RemoveOldStorage(self.target_node, iv_names)
8074

    
8075
  def _ExecDrbd8Secondary(self, feedback_fn):
8076
    """Replace the secondary node for DRBD 8.
8077

8078
    The algorithm for replace is quite complicated:
8079
      - for all disks of the instance:
8080
        - create new LVs on the new node with same names
8081
        - shutdown the drbd device on the old secondary
8082
        - disconnect the drbd network on the primary
8083
        - create the drbd device on the new secondary
8084
        - network attach the drbd on the primary, using an artifice:
8085
          the drbd code for Attach() will connect to the network if it
8086
          finds a device which is connected to the good local disks but
8087
          not network enabled
8088
      - wait for sync across all devices
8089
      - remove all disks from the old secondary
8090

8091
    Failures are not very well handled.
8092

8093
    """
8094
    steps_total = 6
8095

    
8096
    # Step: check device activation
8097
    self.lu.LogStep(1, steps_total, "Check device existence")
8098
    self._CheckDisksExistence([self.instance.primary_node])
8099
    self._CheckVolumeGroup([self.instance.primary_node])
8100

    
8101
    # Step: check other node consistency
8102
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8103
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8104

    
8105
    # Step: create new storage
8106
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8107
    for idx, dev in enumerate(self.instance.disks):
8108
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8109
                      (self.new_node, idx))
8110
      # we pass force_create=True to force LVM creation
8111
      for new_lv in dev.children:
8112
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8113
                        _GetInstanceInfoText(self.instance), False)
8114

    
8115
    # Step 4: dbrd minors and drbd setups changes
8116
    # after this, we must manually remove the drbd minors on both the
8117
    # error and the success paths
8118
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8119
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8120
                                         for dev in self.instance.disks],
8121
                                        self.instance.name)
8122
    logging.debug("Allocated minors %r", minors)
8123

    
8124
    iv_names = {}
8125
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8126
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8127
                      (self.new_node, idx))
8128
      # create new devices on new_node; note that we create two IDs:
8129
      # one without port, so the drbd will be activated without
8130
      # networking information on the new node at this stage, and one
8131
      # with network, for the latter activation in step 4
8132
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8133
      if self.instance.primary_node == o_node1:
8134
        p_minor = o_minor1
8135
      else:
8136
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8137
        p_minor = o_minor2
8138

    
8139
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8140
                      p_minor, new_minor, o_secret)
8141
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8142
                    p_minor, new_minor, o_secret)
8143

    
8144
      iv_names[idx] = (dev, dev.children, new_net_id)
8145
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8146
                    new_net_id)
8147
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8148
                              logical_id=new_alone_id,
8149
                              children=dev.children,
8150
                              size=dev.size)
8151
      try:
8152
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8153
                              _GetInstanceInfoText(self.instance), False)
8154
      except errors.GenericError:
8155
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8156
        raise
8157

    
8158
    # We have new devices, shutdown the drbd on the old secondary
8159
    for idx, dev in enumerate(self.instance.disks):
8160
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8161
      self.cfg.SetDiskID(dev, self.target_node)
8162
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8163
      if msg:
8164
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8165
                           "node: %s" % (idx, msg),
8166
                           hint=("Please cleanup this device manually as"
8167
                                 " soon as possible"))
8168

    
8169
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8170
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8171
                                               self.node_secondary_ip,
8172
                                               self.instance.disks)\
8173
                                              [self.instance.primary_node]
8174

    
8175
    msg = result.fail_msg
8176
    if msg:
8177
      # detaches didn't succeed (unlikely)
8178
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8179
      raise errors.OpExecError("Can't detach the disks from the network on"
8180
                               " old node: %s" % (msg,))
8181

    
8182
    # if we managed to detach at least one, we update all the disks of
8183
    # the instance to point to the new secondary
8184
    self.lu.LogInfo("Updating instance configuration")
8185
    for dev, _, new_logical_id in iv_names.itervalues():
8186
      dev.logical_id = new_logical_id
8187
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8188

    
8189
    self.cfg.Update(self.instance, feedback_fn)
8190

    
8191
    # and now perform the drbd attach
8192
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8193
                    " (standalone => connected)")
8194
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8195
                                            self.new_node],
8196
                                           self.node_secondary_ip,
8197
                                           self.instance.disks,
8198
                                           self.instance.name,
8199
                                           False)
8200
    for to_node, to_result in result.items():
8201
      msg = to_result.fail_msg
8202
      if msg:
8203
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8204
                           to_node, msg,
8205
                           hint=("please do a gnt-instance info to see the"
8206
                                 " status of disks"))
8207
    cstep = 5
8208
    if self.early_release:
8209
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8210
      cstep += 1
8211
      self._RemoveOldStorage(self.target_node, iv_names)
8212
      # WARNING: we release all node locks here, do not do other RPCs
8213
      # than WaitForSync to the primary node
8214
      self._ReleaseNodeLock([self.instance.primary_node,
8215
                             self.target_node,
8216
                             self.new_node])
8217

    
8218
    # Wait for sync
8219
    # This can fail as the old devices are degraded and _WaitForSync
8220
    # does a combined result over all disks, so we don't check its return value
8221
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8222
    cstep += 1
8223
    _WaitForSync(self.lu, self.instance)
8224

    
8225
    # Check all devices manually
8226
    self._CheckDevices(self.instance.primary_node, iv_names)
8227

    
8228
    # Step: remove old storage
8229
    if not self.early_release:
8230
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8231
      self._RemoveOldStorage(self.target_node, iv_names)
8232

    
8233

    
8234
class LURepairNodeStorage(NoHooksLU):
8235
  """Repairs the volume group on a node.
8236

8237
  """
8238
  _OP_PARAMS = [
8239
    _PNodeName,
8240
    ("storage_type", _NoDefault, _CheckStorageType),
8241
    ("name", _NoDefault, _TNonEmptyString),
8242
    ("ignore_consistency", False, _TBool),
8243
    ]
8244
  REQ_BGL = False
8245

    
8246
  def CheckArguments(self):
8247
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8248

    
8249
    storage_type = self.op.storage_type
8250

    
8251
    if (constants.SO_FIX_CONSISTENCY not in
8252
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8253
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8254
                                 " repaired" % storage_type,
8255
                                 errors.ECODE_INVAL)
8256

    
8257
  def ExpandNames(self):
8258
    self.needed_locks = {
8259
      locking.LEVEL_NODE: [self.op.node_name],
8260
      }
8261

    
8262
  def _CheckFaultyDisks(self, instance, node_name):
8263
    """Ensure faulty disks abort the opcode or at least warn."""
8264
    try:
8265
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8266
                                  node_name, True):
8267
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8268
                                   " node '%s'" % (instance.name, node_name),
8269
                                   errors.ECODE_STATE)
8270
    except errors.OpPrereqError, err:
8271
      if self.op.ignore_consistency:
8272
        self.proc.LogWarning(str(err.args[0]))
8273
      else:
8274
        raise
8275

    
8276
  def CheckPrereq(self):
8277
    """Check prerequisites.
8278

8279
    """
8280
    # Check whether any instance on this node has faulty disks
8281
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8282
      if not inst.admin_up:
8283
        continue
8284
      check_nodes = set(inst.all_nodes)
8285
      check_nodes.discard(self.op.node_name)
8286
      for inst_node_name in check_nodes:
8287
        self._CheckFaultyDisks(inst, inst_node_name)
8288

    
8289
  def Exec(self, feedback_fn):
8290
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8291
                (self.op.name, self.op.node_name))
8292

    
8293
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8294
    result = self.rpc.call_storage_execute(self.op.node_name,
8295
                                           self.op.storage_type, st_args,
8296
                                           self.op.name,
8297
                                           constants.SO_FIX_CONSISTENCY)
8298
    result.Raise("Failed to repair storage unit '%s' on %s" %
8299
                 (self.op.name, self.op.node_name))
8300

    
8301

    
8302
class LUNodeEvacuationStrategy(NoHooksLU):
8303
  """Computes the node evacuation strategy.
8304

8305
  """
8306
  _OP_PARAMS = [
8307
    ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8308
    ("remote_node", None, _TMaybeString),
8309
    ("iallocator", None, _TMaybeString),
8310
    ]
8311
  REQ_BGL = False
8312

    
8313
  def CheckArguments(self):
8314
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8315

    
8316
  def ExpandNames(self):
8317
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8318
    self.needed_locks = locks = {}
8319
    if self.op.remote_node is None:
8320
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8321
    else:
8322
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8323
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8324

    
8325
  def Exec(self, feedback_fn):
8326
    if self.op.remote_node is not None:
8327
      instances = []
8328
      for node in self.op.nodes:
8329
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8330
      result = []
8331
      for i in instances:
8332
        if i.primary_node == self.op.remote_node:
8333
          raise errors.OpPrereqError("Node %s is the primary node of"
8334
                                     " instance %s, cannot use it as"
8335
                                     " secondary" %
8336
                                     (self.op.remote_node, i.name),
8337
                                     errors.ECODE_INVAL)
8338
        result.append([i.name, self.op.remote_node])
8339
    else:
8340
      ial = IAllocator(self.cfg, self.rpc,
8341
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8342
                       evac_nodes=self.op.nodes)
8343
      ial.Run(self.op.iallocator, validate=True)
8344
      if not ial.success:
8345
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8346
                                 errors.ECODE_NORES)
8347
      result = ial.result
8348
    return result
8349

    
8350

    
8351
class LUGrowDisk(LogicalUnit):
8352
  """Grow a disk of an instance.
8353

8354
  """
8355
  HPATH = "disk-grow"
8356
  HTYPE = constants.HTYPE_INSTANCE
8357
  _OP_PARAMS = [
8358
    _PInstanceName,
8359
    ("disk", _NoDefault, _TInt),
8360
    ("amount", _NoDefault, _TInt),
8361
    ("wait_for_sync", True, _TBool),
8362
    ]
8363
  REQ_BGL = False
8364

    
8365
  def ExpandNames(self):
8366
    self._ExpandAndLockInstance()
8367
    self.needed_locks[locking.LEVEL_NODE] = []
8368
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8369

    
8370
  def DeclareLocks(self, level):
8371
    if level == locking.LEVEL_NODE:
8372
      self._LockInstancesNodes()
8373

    
8374
  def BuildHooksEnv(self):
8375
    """Build hooks env.
8376

8377
    This runs on the master, the primary and all the secondaries.
8378

8379
    """
8380
    env = {
8381
      "DISK": self.op.disk,
8382
      "AMOUNT": self.op.amount,
8383
      }
8384
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8385
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8386
    return env, nl, nl
8387

    
8388
  def CheckPrereq(self):
8389
    """Check prerequisites.
8390

8391
    This checks that the instance is in the cluster.
8392

8393
    """
8394
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8395
    assert instance is not None, \
8396
      "Cannot retrieve locked instance %s" % self.op.instance_name
8397
    nodenames = list(instance.all_nodes)
8398
    for node in nodenames:
8399
      _CheckNodeOnline(self, node)
8400

    
8401
    self.instance = instance
8402

    
8403
    if instance.disk_template not in constants.DTS_GROWABLE:
8404
      raise errors.OpPrereqError("Instance's disk layout does not support"
8405
                                 " growing.", errors.ECODE_INVAL)
8406

    
8407
    self.disk = instance.FindDisk(self.op.disk)
8408

    
8409
    if instance.disk_template != constants.DT_FILE:
8410
      # TODO: check the free disk space for file, when that feature will be
8411
      # supported
8412
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8413

    
8414
  def Exec(self, feedback_fn):
8415
    """Execute disk grow.
8416

8417
    """
8418
    instance = self.instance
8419
    disk = self.disk
8420

    
8421
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8422
    if not disks_ok:
8423
      raise errors.OpExecError("Cannot activate block device to grow")
8424

    
8425
    for node in instance.all_nodes:
8426
      self.cfg.SetDiskID(disk, node)
8427
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8428
      result.Raise("Grow request failed to node %s" % node)
8429

    
8430
      # TODO: Rewrite code to work properly
8431
      # DRBD goes into sync mode for a short amount of time after executing the
8432
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8433
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8434
      # time is a work-around.
8435
      time.sleep(5)
8436

    
8437
    disk.RecordGrow(self.op.amount)
8438
    self.cfg.Update(instance, feedback_fn)
8439
    if self.op.wait_for_sync:
8440
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8441
      if disk_abort:
8442
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8443
                             " status.\nPlease check the instance.")
8444
      if not instance.admin_up:
8445
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8446
    elif not instance.admin_up:
8447
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8448
                           " not supposed to be running because no wait for"
8449
                           " sync mode was requested.")
8450

    
8451

    
8452
class LUQueryInstanceData(NoHooksLU):
8453
  """Query runtime instance data.
8454

8455
  """
8456
  _OP_PARAMS = [
8457
    ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8458
    ("static", False, _TBool),
8459
    ]
8460
  REQ_BGL = False
8461

    
8462
  def ExpandNames(self):
8463
    self.needed_locks = {}
8464
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8465

    
8466
    if self.op.instances:
8467
      self.wanted_names = []
8468
      for name in self.op.instances:
8469
        full_name = _ExpandInstanceName(self.cfg, name)
8470
        self.wanted_names.append(full_name)
8471
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8472
    else:
8473
      self.wanted_names = None
8474
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8475

    
8476
    self.needed_locks[locking.LEVEL_NODE] = []
8477
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8478

    
8479
  def DeclareLocks(self, level):
8480
    if level == locking.LEVEL_NODE:
8481
      self._LockInstancesNodes()
8482

    
8483
  def CheckPrereq(self):
8484
    """Check prerequisites.
8485

8486
    This only checks the optional instance list against the existing names.
8487

8488
    """
8489
    if self.wanted_names is None:
8490
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8491

    
8492
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8493
                             in self.wanted_names]
8494

    
8495
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8496
    """Returns the status of a block device
8497

8498
    """
8499
    if self.op.static or not node:
8500
      return None
8501

    
8502
    self.cfg.SetDiskID(dev, node)
8503

    
8504
    result = self.rpc.call_blockdev_find(node, dev)
8505
    if result.offline:
8506
      return None
8507

    
8508
    result.Raise("Can't compute disk status for %s" % instance_name)
8509

    
8510
    status = result.payload
8511
    if status is None:
8512
      return None
8513

    
8514
    return (status.dev_path, status.major, status.minor,
8515
            status.sync_percent, status.estimated_time,
8516
            status.is_degraded, status.ldisk_status)
8517

    
8518
  def _ComputeDiskStatus(self, instance, snode, dev):
8519
    """Compute block device status.
8520

8521
    """
8522
    if dev.dev_type in constants.LDS_DRBD:
8523
      # we change the snode then (otherwise we use the one passed in)
8524
      if dev.logical_id[0] == instance.primary_node:
8525
        snode = dev.logical_id[1]
8526
      else:
8527
        snode = dev.logical_id[0]
8528

    
8529
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8530
                                              instance.name, dev)
8531
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8532

    
8533
    if dev.children:
8534
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8535
                      for child in dev.children]
8536
    else:
8537
      dev_children = []
8538

    
8539
    data = {
8540
      "iv_name": dev.iv_name,
8541
      "dev_type": dev.dev_type,
8542
      "logical_id": dev.logical_id,
8543
      "physical_id": dev.physical_id,
8544
      "pstatus": dev_pstatus,
8545
      "sstatus": dev_sstatus,
8546
      "children": dev_children,
8547
      "mode": dev.mode,
8548
      "size": dev.size,
8549
      }
8550

    
8551
    return data
8552

    
8553
  def Exec(self, feedback_fn):
8554
    """Gather and return data"""
8555
    result = {}
8556

    
8557
    cluster = self.cfg.GetClusterInfo()
8558

    
8559
    for instance in self.wanted_instances:
8560
      if not self.op.static:
8561
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8562
                                                  instance.name,
8563
                                                  instance.hypervisor)
8564
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8565
        remote_info = remote_info.payload
8566
        if remote_info and "state" in remote_info:
8567
          remote_state = "up"
8568
        else:
8569
          remote_state = "down"
8570
      else:
8571
        remote_state = None
8572
      if instance.admin_up:
8573
        config_state = "up"
8574
      else:
8575
        config_state = "down"
8576

    
8577
      disks = [self._ComputeDiskStatus(instance, None, device)
8578
               for device in instance.disks]
8579

    
8580
      idict = {
8581
        "name": instance.name,
8582
        "config_state": config_state,
8583
        "run_state": remote_state,
8584
        "pnode": instance.primary_node,
8585
        "snodes": instance.secondary_nodes,
8586
        "os": instance.os,
8587
        # this happens to be the same format used for hooks
8588
        "nics": _NICListToTuple(self, instance.nics),
8589
        "disk_template": instance.disk_template,
8590
        "disks": disks,
8591
        "hypervisor": instance.hypervisor,
8592
        "network_port": instance.network_port,
8593
        "hv_instance": instance.hvparams,
8594
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8595
        "be_instance": instance.beparams,
8596
        "be_actual": cluster.FillBE(instance),
8597
        "os_instance": instance.osparams,
8598
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8599
        "serial_no": instance.serial_no,
8600
        "mtime": instance.mtime,
8601
        "ctime": instance.ctime,
8602
        "uuid": instance.uuid,
8603
        }
8604

    
8605
      result[instance.name] = idict
8606

    
8607
    return result
8608

    
8609

    
8610
class LUSetInstanceParams(LogicalUnit):
8611
  """Modifies an instances's parameters.
8612

8613
  """
8614
  HPATH = "instance-modify"
8615
  HTYPE = constants.HTYPE_INSTANCE
8616
  _OP_PARAMS = [
8617
    _PInstanceName,
8618
    ("nics", _EmptyList, _TList),
8619
    ("disks", _EmptyList, _TList),
8620
    ("beparams", _EmptyDict, _TDict),
8621
    ("hvparams", _EmptyDict, _TDict),
8622
    ("disk_template", None, _TMaybeString),
8623
    ("remote_node", None, _TMaybeString),
8624
    ("os_name", None, _TMaybeString),
8625
    ("force_variant", False, _TBool),
8626
    ("osparams", None, _TOr(_TDict, _TNone)),
8627
    _PForce,
8628
    ]
8629
  REQ_BGL = False
8630

    
8631
  def CheckArguments(self):
8632
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8633
            self.op.hvparams or self.op.beparams or self.op.os_name):
8634
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8635

    
8636
    if self.op.hvparams:
8637
      _CheckGlobalHvParams(self.op.hvparams)
8638

    
8639
    # Disk validation
8640
    disk_addremove = 0
8641
    for disk_op, disk_dict in self.op.disks:
8642
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8643
      if disk_op == constants.DDM_REMOVE:
8644
        disk_addremove += 1
8645
        continue
8646
      elif disk_op == constants.DDM_ADD:
8647
        disk_addremove += 1
8648
      else:
8649
        if not isinstance(disk_op, int):
8650
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8651
        if not isinstance(disk_dict, dict):
8652
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8653
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8654

    
8655
      if disk_op == constants.DDM_ADD:
8656
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8657
        if mode not in constants.DISK_ACCESS_SET:
8658
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8659
                                     errors.ECODE_INVAL)
8660
        size = disk_dict.get('size', None)
8661
        if size is None:
8662
          raise errors.OpPrereqError("Required disk parameter size missing",
8663
                                     errors.ECODE_INVAL)
8664
        try:
8665
          size = int(size)
8666
        except (TypeError, ValueError), err:
8667
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8668
                                     str(err), errors.ECODE_INVAL)
8669
        disk_dict['size'] = size
8670
      else:
8671
        # modification of disk
8672
        if 'size' in disk_dict:
8673
          raise errors.OpPrereqError("Disk size change not possible, use"
8674
                                     " grow-disk", errors.ECODE_INVAL)
8675

    
8676
    if disk_addremove > 1:
8677
      raise errors.OpPrereqError("Only one disk add or remove operation"
8678
                                 " supported at a time", errors.ECODE_INVAL)
8679

    
8680
    if self.op.disks and self.op.disk_template is not None:
8681
      raise errors.OpPrereqError("Disk template conversion and other disk"
8682
                                 " changes not supported at the same time",
8683
                                 errors.ECODE_INVAL)
8684

    
8685
    if self.op.disk_template:
8686
      _CheckDiskTemplate(self.op.disk_template)
8687
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8688
          self.op.remote_node is None):
8689
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8690
                                   " one requires specifying a secondary node",
8691
                                   errors.ECODE_INVAL)
8692

    
8693
    # NIC validation
8694
    nic_addremove = 0
8695
    for nic_op, nic_dict in self.op.nics:
8696
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8697
      if nic_op == constants.DDM_REMOVE:
8698
        nic_addremove += 1
8699
        continue
8700
      elif nic_op == constants.DDM_ADD:
8701
        nic_addremove += 1
8702
      else:
8703
        if not isinstance(nic_op, int):
8704
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8705
        if not isinstance(nic_dict, dict):
8706
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8707
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8708

    
8709
      # nic_dict should be a dict
8710
      nic_ip = nic_dict.get('ip', None)
8711
      if nic_ip is not None:
8712
        if nic_ip.lower() == constants.VALUE_NONE:
8713
          nic_dict['ip'] = None
8714
        else:
8715
          if not netutils.IsValidIP4(nic_ip):
8716
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8717
                                       errors.ECODE_INVAL)
8718

    
8719
      nic_bridge = nic_dict.get('bridge', None)
8720
      nic_link = nic_dict.get('link', None)
8721
      if nic_bridge and nic_link:
8722
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8723
                                   " at the same time", errors.ECODE_INVAL)
8724
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8725
        nic_dict['bridge'] = None
8726
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8727
        nic_dict['link'] = None
8728

    
8729
      if nic_op == constants.DDM_ADD:
8730
        nic_mac = nic_dict.get('mac', None)
8731
        if nic_mac is None:
8732
          nic_dict['mac'] = constants.VALUE_AUTO
8733

    
8734
      if 'mac' in nic_dict:
8735
        nic_mac = nic_dict['mac']
8736
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8737
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8738

    
8739
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8740
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8741
                                     " modifying an existing nic",
8742
                                     errors.ECODE_INVAL)
8743

    
8744
    if nic_addremove > 1:
8745
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8746
                                 " supported at a time", errors.ECODE_INVAL)
8747

    
8748
  def ExpandNames(self):
8749
    self._ExpandAndLockInstance()
8750
    self.needed_locks[locking.LEVEL_NODE] = []
8751
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8752

    
8753
  def DeclareLocks(self, level):
8754
    if level == locking.LEVEL_NODE:
8755
      self._LockInstancesNodes()
8756
      if self.op.disk_template and self.op.remote_node:
8757
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8758
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8759

    
8760
  def BuildHooksEnv(self):
8761
    """Build hooks env.
8762

8763
    This runs on the master, primary and secondaries.
8764

8765
    """
8766
    args = dict()
8767
    if constants.BE_MEMORY in self.be_new:
8768
      args['memory'] = self.be_new[constants.BE_MEMORY]
8769
    if constants.BE_VCPUS in self.be_new:
8770
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8771
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8772
    # information at all.
8773
    if self.op.nics:
8774
      args['nics'] = []
8775
      nic_override = dict(self.op.nics)
8776
      for idx, nic in enumerate(self.instance.nics):
8777
        if idx in nic_override:
8778
          this_nic_override = nic_override[idx]
8779
        else:
8780
          this_nic_override = {}
8781
        if 'ip' in this_nic_override:
8782
          ip = this_nic_override['ip']
8783
        else:
8784
          ip = nic.ip
8785
        if 'mac' in this_nic_override:
8786
          mac = this_nic_override['mac']
8787
        else:
8788
          mac = nic.mac
8789
        if idx in self.nic_pnew:
8790
          nicparams = self.nic_pnew[idx]
8791
        else:
8792
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8793
        mode = nicparams[constants.NIC_MODE]
8794
        link = nicparams[constants.NIC_LINK]
8795
        args['nics'].append((ip, mac, mode, link))
8796
      if constants.DDM_ADD in nic_override:
8797
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8798
        mac = nic_override[constants.DDM_ADD]['mac']
8799
        nicparams = self.nic_pnew[constants.DDM_ADD]
8800
        mode = nicparams[constants.NIC_MODE]
8801
        link = nicparams[constants.NIC_LINK]
8802
        args['nics'].append((ip, mac, mode, link))
8803
      elif constants.DDM_REMOVE in nic_override:
8804
        del args['nics'][-1]
8805

    
8806
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8807
    if self.op.disk_template:
8808
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8809
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8810
    return env, nl, nl
8811

    
8812
  def CheckPrereq(self):
8813
    """Check prerequisites.
8814

8815
    This only checks the instance list against the existing names.
8816

8817
    """
8818
    # checking the new params on the primary/secondary nodes
8819

    
8820
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8821
    cluster = self.cluster = self.cfg.GetClusterInfo()
8822
    assert self.instance is not None, \
8823
      "Cannot retrieve locked instance %s" % self.op.instance_name
8824
    pnode = instance.primary_node
8825
    nodelist = list(instance.all_nodes)
8826

    
8827
    # OS change
8828
    if self.op.os_name and not self.op.force:
8829
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8830
                      self.op.force_variant)
8831
      instance_os = self.op.os_name
8832
    else:
8833
      instance_os = instance.os
8834

    
8835
    if self.op.disk_template:
8836
      if instance.disk_template == self.op.disk_template:
8837
        raise errors.OpPrereqError("Instance already has disk template %s" %
8838
                                   instance.disk_template, errors.ECODE_INVAL)
8839

    
8840
      if (instance.disk_template,
8841
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8842
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8843
                                   " %s to %s" % (instance.disk_template,
8844
                                                  self.op.disk_template),
8845
                                   errors.ECODE_INVAL)
8846
      _CheckInstanceDown(self, instance, "cannot change disk template")
8847
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8848
        if self.op.remote_node == pnode:
8849
          raise errors.OpPrereqError("Given new secondary node %s is the same"
8850
                                     " as the primary node of the instance" %
8851
                                     self.op.remote_node, errors.ECODE_STATE)
8852
        _CheckNodeOnline(self, self.op.remote_node)
8853
        _CheckNodeNotDrained(self, self.op.remote_node)
8854
        disks = [{"size": d.size} for d in instance.disks]
8855
        required = _ComputeDiskSize(self.op.disk_template, disks)
8856
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8857

    
8858
    # hvparams processing
8859
    if self.op.hvparams:
8860
      hv_type = instance.hypervisor
8861
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8862
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8863
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8864

    
8865
      # local check
8866
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8867
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8868
      self.hv_new = hv_new # the new actual values
8869
      self.hv_inst = i_hvdict # the new dict (without defaults)
8870
    else:
8871
      self.hv_new = self.hv_inst = {}
8872

    
8873
    # beparams processing
8874
    if self.op.beparams:
8875
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8876
                                   use_none=True)
8877
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8878
      be_new = cluster.SimpleFillBE(i_bedict)
8879
      self.be_new = be_new # the new actual values
8880
      self.be_inst = i_bedict # the new dict (without defaults)
8881
    else:
8882
      self.be_new = self.be_inst = {}
8883

    
8884
    # osparams processing
8885
    if self.op.osparams:
8886
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8887
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8888
      self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8889
      self.os_inst = i_osdict # the new dict (without defaults)
8890
    else:
8891
      self.os_new = self.os_inst = {}
8892

    
8893
    self.warn = []
8894

    
8895
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8896
      mem_check_list = [pnode]
8897
      if be_new[constants.BE_AUTO_BALANCE]:
8898
        # either we changed auto_balance to yes or it was from before
8899
        mem_check_list.extend(instance.secondary_nodes)
8900
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8901
                                                  instance.hypervisor)
8902
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8903
                                         instance.hypervisor)
8904
      pninfo = nodeinfo[pnode]
8905
      msg = pninfo.fail_msg
8906
      if msg:
8907
        # Assume the primary node is unreachable and go ahead
8908
        self.warn.append("Can't get info from primary node %s: %s" %
8909
                         (pnode,  msg))
8910
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8911
        self.warn.append("Node data from primary node %s doesn't contain"
8912
                         " free memory information" % pnode)
8913
      elif instance_info.fail_msg:
8914
        self.warn.append("Can't get instance runtime information: %s" %
8915
                        instance_info.fail_msg)
8916
      else:
8917
        if instance_info.payload:
8918
          current_mem = int(instance_info.payload['memory'])
8919
        else:
8920
          # Assume instance not running
8921
          # (there is a slight race condition here, but it's not very probable,
8922
          # and we have no other way to check)
8923
          current_mem = 0
8924
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8925
                    pninfo.payload['memory_free'])
8926
        if miss_mem > 0:
8927
          raise errors.OpPrereqError("This change will prevent the instance"
8928
                                     " from starting, due to %d MB of memory"
8929
                                     " missing on its primary node" % miss_mem,
8930
                                     errors.ECODE_NORES)
8931

    
8932
      if be_new[constants.BE_AUTO_BALANCE]:
8933
        for node, nres in nodeinfo.items():
8934
          if node not in instance.secondary_nodes:
8935
            continue
8936
          msg = nres.fail_msg
8937
          if msg:
8938
            self.warn.append("Can't get info from secondary node %s: %s" %
8939
                             (node, msg))
8940
          elif not isinstance(nres.payload.get('memory_free', None), int):
8941
            self.warn.append("Secondary node %s didn't return free"
8942
                             " memory information" % node)
8943
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8944
            self.warn.append("Not enough memory to failover instance to"
8945
                             " secondary node %s" % node)
8946

    
8947
    # NIC processing
8948
    self.nic_pnew = {}
8949
    self.nic_pinst = {}
8950
    for nic_op, nic_dict in self.op.nics:
8951
      if nic_op == constants.DDM_REMOVE:
8952
        if not instance.nics:
8953
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8954
                                     errors.ECODE_INVAL)
8955
        continue
8956
      if nic_op != constants.DDM_ADD:
8957
        # an existing nic
8958
        if not instance.nics:
8959
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8960
                                     " no NICs" % nic_op,
8961
                                     errors.ECODE_INVAL)
8962
        if nic_op < 0 or nic_op >= len(instance.nics):
8963
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8964
                                     " are 0 to %d" %
8965
                                     (nic_op, len(instance.nics) - 1),
8966
                                     errors.ECODE_INVAL)
8967
        old_nic_params = instance.nics[nic_op].nicparams
8968
        old_nic_ip = instance.nics[nic_op].ip
8969
      else:
8970
        old_nic_params = {}
8971
        old_nic_ip = None
8972

    
8973
      update_params_dict = dict([(key, nic_dict[key])
8974
                                 for key in constants.NICS_PARAMETERS
8975
                                 if key in nic_dict])
8976

    
8977
      if 'bridge' in nic_dict:
8978
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8979

    
8980
      new_nic_params = _GetUpdatedParams(old_nic_params,
8981
                                         update_params_dict)
8982
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8983
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8984
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8985
      self.nic_pinst[nic_op] = new_nic_params
8986
      self.nic_pnew[nic_op] = new_filled_nic_params
8987
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8988

    
8989
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8990
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8991
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8992
        if msg:
8993
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8994
          if self.op.force:
8995
            self.warn.append(msg)
8996
          else:
8997
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8998
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8999
        if 'ip' in nic_dict:
9000
          nic_ip = nic_dict['ip']
9001
        else:
9002
          nic_ip = old_nic_ip
9003
        if nic_ip is None:
9004
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9005
                                     ' on a routed nic', errors.ECODE_INVAL)
9006
      if 'mac' in nic_dict:
9007
        nic_mac = nic_dict['mac']
9008
        if nic_mac is None:
9009
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9010
                                     errors.ECODE_INVAL)
9011
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9012
          # otherwise generate the mac
9013
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9014
        else:
9015
          # or validate/reserve the current one
9016
          try:
9017
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9018
          except errors.ReservationError:
9019
            raise errors.OpPrereqError("MAC address %s already in use"
9020
                                       " in cluster" % nic_mac,
9021
                                       errors.ECODE_NOTUNIQUE)
9022

    
9023
    # DISK processing
9024
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9025
      raise errors.OpPrereqError("Disk operations not supported for"
9026
                                 " diskless instances",
9027
                                 errors.ECODE_INVAL)
9028
    for disk_op, _ in self.op.disks:
9029
      if disk_op == constants.DDM_REMOVE:
9030
        if len(instance.disks) == 1:
9031
          raise errors.OpPrereqError("Cannot remove the last disk of"
9032
                                     " an instance", errors.ECODE_INVAL)
9033
        _CheckInstanceDown(self, instance, "cannot remove disks")
9034

    
9035
      if (disk_op == constants.DDM_ADD and
9036
          len(instance.nics) >= constants.MAX_DISKS):
9037
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9038
                                   " add more" % constants.MAX_DISKS,
9039
                                   errors.ECODE_STATE)
9040
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9041
        # an existing disk
9042
        if disk_op < 0 or disk_op >= len(instance.disks):
9043
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9044
                                     " are 0 to %d" %
9045
                                     (disk_op, len(instance.disks)),
9046
                                     errors.ECODE_INVAL)
9047

    
9048
    return
9049

    
9050
  def _ConvertPlainToDrbd(self, feedback_fn):
9051
    """Converts an instance from plain to drbd.
9052

9053
    """
9054
    feedback_fn("Converting template to drbd")
9055
    instance = self.instance
9056
    pnode = instance.primary_node
9057
    snode = self.op.remote_node
9058

    
9059
    # create a fake disk info for _GenerateDiskTemplate
9060
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9061
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9062
                                      instance.name, pnode, [snode],
9063
                                      disk_info, None, None, 0)
9064
    info = _GetInstanceInfoText(instance)
9065
    feedback_fn("Creating aditional volumes...")
9066
    # first, create the missing data and meta devices
9067
    for disk in new_disks:
9068
      # unfortunately this is... not too nice
9069
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9070
                            info, True)
9071
      for child in disk.children:
9072
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9073
    # at this stage, all new LVs have been created, we can rename the
9074
    # old ones
9075
    feedback_fn("Renaming original volumes...")
9076
    rename_list = [(o, n.children[0].logical_id)
9077
                   for (o, n) in zip(instance.disks, new_disks)]
9078
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9079
    result.Raise("Failed to rename original LVs")
9080

    
9081
    feedback_fn("Initializing DRBD devices...")
9082
    # all child devices are in place, we can now create the DRBD devices
9083
    for disk in new_disks:
9084
      for node in [pnode, snode]:
9085
        f_create = node == pnode
9086
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9087

    
9088
    # at this point, the instance has been modified
9089
    instance.disk_template = constants.DT_DRBD8
9090
    instance.disks = new_disks
9091
    self.cfg.Update(instance, feedback_fn)
9092

    
9093
    # disks are created, waiting for sync
9094
    disk_abort = not _WaitForSync(self, instance)
9095
    if disk_abort:
9096
      raise errors.OpExecError("There are some degraded disks for"
9097
                               " this instance, please cleanup manually")
9098

    
9099
  def _ConvertDrbdToPlain(self, feedback_fn):
9100
    """Converts an instance from drbd to plain.
9101

9102
    """
9103
    instance = self.instance
9104
    assert len(instance.secondary_nodes) == 1
9105
    pnode = instance.primary_node
9106
    snode = instance.secondary_nodes[0]
9107
    feedback_fn("Converting template to plain")
9108

    
9109
    old_disks = instance.disks
9110
    new_disks = [d.children[0] for d in old_disks]
9111

    
9112
    # copy over size and mode
9113
    for parent, child in zip(old_disks, new_disks):
9114
      child.size = parent.size
9115
      child.mode = parent.mode
9116

    
9117
    # update instance structure
9118
    instance.disks = new_disks
9119
    instance.disk_template = constants.DT_PLAIN
9120
    self.cfg.Update(instance, feedback_fn)
9121

    
9122
    feedback_fn("Removing volumes on the secondary node...")
9123
    for disk in old_disks:
9124
      self.cfg.SetDiskID(disk, snode)
9125
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9126
      if msg:
9127
        self.LogWarning("Could not remove block device %s on node %s,"
9128
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9129

    
9130
    feedback_fn("Removing unneeded volumes on the primary node...")
9131
    for idx, disk in enumerate(old_disks):
9132
      meta = disk.children[1]
9133
      self.cfg.SetDiskID(meta, pnode)
9134
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9135
      if msg:
9136
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9137
                        " continuing anyway: %s", idx, pnode, msg)
9138

    
9139

    
9140
  def Exec(self, feedback_fn):
9141
    """Modifies an instance.
9142

9143
    All parameters take effect only at the next restart of the instance.
9144

9145
    """
9146
    # Process here the warnings from CheckPrereq, as we don't have a
9147
    # feedback_fn there.
9148
    for warn in self.warn:
9149
      feedback_fn("WARNING: %s" % warn)
9150

    
9151
    result = []
9152
    instance = self.instance
9153
    # disk changes
9154
    for disk_op, disk_dict in self.op.disks:
9155
      if disk_op == constants.DDM_REMOVE:
9156
        # remove the last disk
9157
        device = instance.disks.pop()
9158
        device_idx = len(instance.disks)
9159
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9160
          self.cfg.SetDiskID(disk, node)
9161
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9162
          if msg:
9163
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9164
                            " continuing anyway", device_idx, node, msg)
9165
        result.append(("disk/%d" % device_idx, "remove"))
9166
      elif disk_op == constants.DDM_ADD:
9167
        # add a new disk
9168
        if instance.disk_template == constants.DT_FILE:
9169
          file_driver, file_path = instance.disks[0].logical_id
9170
          file_path = os.path.dirname(file_path)
9171
        else:
9172
          file_driver = file_path = None
9173
        disk_idx_base = len(instance.disks)
9174
        new_disk = _GenerateDiskTemplate(self,
9175
                                         instance.disk_template,
9176
                                         instance.name, instance.primary_node,
9177
                                         instance.secondary_nodes,
9178
                                         [disk_dict],
9179
                                         file_path,
9180
                                         file_driver,
9181
                                         disk_idx_base)[0]
9182
        instance.disks.append(new_disk)
9183
        info = _GetInstanceInfoText(instance)
9184

    
9185
        logging.info("Creating volume %s for instance %s",
9186
                     new_disk.iv_name, instance.name)
9187
        # Note: this needs to be kept in sync with _CreateDisks
9188
        #HARDCODE
9189
        for node in instance.all_nodes:
9190
          f_create = node == instance.primary_node
9191
          try:
9192
            _CreateBlockDev(self, node, instance, new_disk,
9193
                            f_create, info, f_create)
9194
          except errors.OpExecError, err:
9195
            self.LogWarning("Failed to create volume %s (%s) on"
9196
                            " node %s: %s",
9197
                            new_disk.iv_name, new_disk, node, err)
9198
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9199
                       (new_disk.size, new_disk.mode)))
9200
      else:
9201
        # change a given disk
9202
        instance.disks[disk_op].mode = disk_dict['mode']
9203
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9204

    
9205
    if self.op.disk_template:
9206
      r_shut = _ShutdownInstanceDisks(self, instance)
9207
      if not r_shut:
9208
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9209
                                 " proceed with disk template conversion")
9210
      mode = (instance.disk_template, self.op.disk_template)
9211
      try:
9212
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9213
      except:
9214
        self.cfg.ReleaseDRBDMinors(instance.name)
9215
        raise
9216
      result.append(("disk_template", self.op.disk_template))
9217

    
9218
    # NIC changes
9219
    for nic_op, nic_dict in self.op.nics:
9220
      if nic_op == constants.DDM_REMOVE:
9221
        # remove the last nic
9222
        del instance.nics[-1]
9223
        result.append(("nic.%d" % len(instance.nics), "remove"))
9224
      elif nic_op == constants.DDM_ADD:
9225
        # mac and bridge should be set, by now
9226
        mac = nic_dict['mac']
9227
        ip = nic_dict.get('ip', None)
9228
        nicparams = self.nic_pinst[constants.DDM_ADD]
9229
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9230
        instance.nics.append(new_nic)
9231
        result.append(("nic.%d" % (len(instance.nics) - 1),
9232
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9233
                       (new_nic.mac, new_nic.ip,
9234
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9235
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9236
                       )))
9237
      else:
9238
        for key in 'mac', 'ip':
9239
          if key in nic_dict:
9240
            setattr(instance.nics[nic_op], key, nic_dict[key])
9241
        if nic_op in self.nic_pinst:
9242
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9243
        for key, val in nic_dict.iteritems():
9244
          result.append(("nic.%s/%d" % (key, nic_op), val))
9245

    
9246
    # hvparams changes
9247
    if self.op.hvparams:
9248
      instance.hvparams = self.hv_inst
9249
      for key, val in self.op.hvparams.iteritems():
9250
        result.append(("hv/%s" % key, val))
9251

    
9252
    # beparams changes
9253
    if self.op.beparams:
9254
      instance.beparams = self.be_inst
9255
      for key, val in self.op.beparams.iteritems():
9256
        result.append(("be/%s" % key, val))
9257

    
9258
    # OS change
9259
    if self.op.os_name:
9260
      instance.os = self.op.os_name
9261

    
9262
    # osparams changes
9263
    if self.op.osparams:
9264
      instance.osparams = self.os_inst
9265
      for key, val in self.op.osparams.iteritems():
9266
        result.append(("os/%s" % key, val))
9267

    
9268
    self.cfg.Update(instance, feedback_fn)
9269

    
9270
    return result
9271

    
9272
  _DISK_CONVERSIONS = {
9273
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9274
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9275
    }
9276

    
9277

    
9278
class LUQueryExports(NoHooksLU):
9279
  """Query the exports list
9280

9281
  """
9282
  _OP_PARAMS = [
9283
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9284
    ("use_locking", False, _TBool),
9285
    ]
9286
  REQ_BGL = False
9287

    
9288
  def ExpandNames(self):
9289
    self.needed_locks = {}
9290
    self.share_locks[locking.LEVEL_NODE] = 1
9291
    if not self.op.nodes:
9292
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9293
    else:
9294
      self.needed_locks[locking.LEVEL_NODE] = \
9295
        _GetWantedNodes(self, self.op.nodes)
9296

    
9297
  def Exec(self, feedback_fn):
9298
    """Compute the list of all the exported system images.
9299

9300
    @rtype: dict
9301
    @return: a dictionary with the structure node->(export-list)
9302
        where export-list is a list of the instances exported on
9303
        that node.
9304

9305
    """
9306
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9307
    rpcresult = self.rpc.call_export_list(self.nodes)
9308
    result = {}
9309
    for node in rpcresult:
9310
      if rpcresult[node].fail_msg:
9311
        result[node] = False
9312
      else:
9313
        result[node] = rpcresult[node].payload
9314

    
9315
    return result
9316

    
9317

    
9318
class LUPrepareExport(NoHooksLU):
9319
  """Prepares an instance for an export and returns useful information.
9320

9321
  """
9322
  _OP_PARAMS = [
9323
    _PInstanceName,
9324
    ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9325
    ]
9326
  REQ_BGL = False
9327

    
9328
  def ExpandNames(self):
9329
    self._ExpandAndLockInstance()
9330

    
9331
  def CheckPrereq(self):
9332
    """Check prerequisites.
9333

9334
    """
9335
    instance_name = self.op.instance_name
9336

    
9337
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9338
    assert self.instance is not None, \
9339
          "Cannot retrieve locked instance %s" % self.op.instance_name
9340
    _CheckNodeOnline(self, self.instance.primary_node)
9341

    
9342
    self._cds = _GetClusterDomainSecret()
9343

    
9344
  def Exec(self, feedback_fn):
9345
    """Prepares an instance for an export.
9346

9347
    """
9348
    instance = self.instance
9349

    
9350
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9351
      salt = utils.GenerateSecret(8)
9352

    
9353
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9354
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9355
                                              constants.RIE_CERT_VALIDITY)
9356
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9357

    
9358
      (name, cert_pem) = result.payload
9359

    
9360
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9361
                                             cert_pem)
9362

    
9363
      return {
9364
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9365
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9366
                          salt),
9367
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9368
        }
9369

    
9370
    return None
9371

    
9372

    
9373
class LUExportInstance(LogicalUnit):
9374
  """Export an instance to an image in the cluster.
9375

9376
  """
9377
  HPATH = "instance-export"
9378
  HTYPE = constants.HTYPE_INSTANCE
9379
  _OP_PARAMS = [
9380
    _PInstanceName,
9381
    ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9382
    ("shutdown", True, _TBool),
9383
    _PShutdownTimeout,
9384
    ("remove_instance", False, _TBool),
9385
    ("ignore_remove_failures", False, _TBool),
9386
    ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9387
    ("x509_key_name", None, _TOr(_TList, _TNone)),
9388
    ("destination_x509_ca", None, _TMaybeString),
9389
    ]
9390
  REQ_BGL = False
9391

    
9392
  def CheckArguments(self):
9393
    """Check the arguments.
9394

9395
    """
9396
    self.x509_key_name = self.op.x509_key_name
9397
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9398

    
9399
    if self.op.remove_instance and not self.op.shutdown:
9400
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9401
                                 " down before")
9402

    
9403
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9404
      if not self.x509_key_name:
9405
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9406
                                   errors.ECODE_INVAL)
9407

    
9408
      if not self.dest_x509_ca_pem:
9409
        raise errors.OpPrereqError("Missing destination X509 CA",
9410
                                   errors.ECODE_INVAL)
9411

    
9412
  def ExpandNames(self):
9413
    self._ExpandAndLockInstance()
9414

    
9415
    # Lock all nodes for local exports
9416
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9417
      # FIXME: lock only instance primary and destination node
9418
      #
9419
      # Sad but true, for now we have do lock all nodes, as we don't know where
9420
      # the previous export might be, and in this LU we search for it and
9421
      # remove it from its current node. In the future we could fix this by:
9422
      #  - making a tasklet to search (share-lock all), then create the
9423
      #    new one, then one to remove, after
9424
      #  - removing the removal operation altogether
9425
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9426

    
9427
  def DeclareLocks(self, level):
9428
    """Last minute lock declaration."""
9429
    # All nodes are locked anyway, so nothing to do here.
9430

    
9431
  def BuildHooksEnv(self):
9432
    """Build hooks env.
9433

9434
    This will run on the master, primary node and target node.
9435

9436
    """
9437
    env = {
9438
      "EXPORT_MODE": self.op.mode,
9439
      "EXPORT_NODE": self.op.target_node,
9440
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9441
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9442
      # TODO: Generic function for boolean env variables
9443
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9444
      }
9445

    
9446
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9447

    
9448
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9449

    
9450
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9451
      nl.append(self.op.target_node)
9452

    
9453
    return env, nl, nl
9454

    
9455
  def CheckPrereq(self):
9456
    """Check prerequisites.
9457

9458
    This checks that the instance and node names are valid.
9459

9460
    """
9461
    instance_name = self.op.instance_name
9462

    
9463
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9464
    assert self.instance is not None, \
9465
          "Cannot retrieve locked instance %s" % self.op.instance_name
9466
    _CheckNodeOnline(self, self.instance.primary_node)
9467

    
9468
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9469
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9470
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9471
      assert self.dst_node is not None
9472

    
9473
      _CheckNodeOnline(self, self.dst_node.name)
9474
      _CheckNodeNotDrained(self, self.dst_node.name)
9475

    
9476
      self._cds = None
9477
      self.dest_disk_info = None
9478
      self.dest_x509_ca = None
9479

    
9480
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9481
      self.dst_node = None
9482

    
9483
      if len(self.op.target_node) != len(self.instance.disks):
9484
        raise errors.OpPrereqError(("Received destination information for %s"
9485
                                    " disks, but instance %s has %s disks") %
9486
                                   (len(self.op.target_node), instance_name,
9487
                                    len(self.instance.disks)),
9488
                                   errors.ECODE_INVAL)
9489

    
9490
      cds = _GetClusterDomainSecret()
9491

    
9492
      # Check X509 key name
9493
      try:
9494
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9495
      except (TypeError, ValueError), err:
9496
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9497

    
9498
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9499
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9500
                                   errors.ECODE_INVAL)
9501

    
9502
      # Load and verify CA
9503
      try:
9504
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9505
      except OpenSSL.crypto.Error, err:
9506
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9507
                                   (err, ), errors.ECODE_INVAL)
9508

    
9509
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9510
      if errcode is not None:
9511
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9512
                                   (msg, ), errors.ECODE_INVAL)
9513

    
9514
      self.dest_x509_ca = cert
9515

    
9516
      # Verify target information
9517
      disk_info = []
9518
      for idx, disk_data in enumerate(self.op.target_node):
9519
        try:
9520
          (host, port, magic) = \
9521
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9522
        except errors.GenericError, err:
9523
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9524
                                     (idx, err), errors.ECODE_INVAL)
9525

    
9526
        disk_info.append((host, port, magic))
9527

    
9528
      assert len(disk_info) == len(self.op.target_node)
9529
      self.dest_disk_info = disk_info
9530

    
9531
    else:
9532
      raise errors.ProgrammerError("Unhandled export mode %r" %
9533
                                   self.op.mode)
9534

    
9535
    # instance disk type verification
9536
    # TODO: Implement export support for file-based disks
9537
    for disk in self.instance.disks:
9538
      if disk.dev_type == constants.LD_FILE:
9539
        raise errors.OpPrereqError("Export not supported for instances with"
9540
                                   " file-based disks", errors.ECODE_INVAL)
9541

    
9542
  def _CleanupExports(self, feedback_fn):
9543
    """Removes exports of current instance from all other nodes.
9544

9545
    If an instance in a cluster with nodes A..D was exported to node C, its
9546
    exports will be removed from the nodes A, B and D.
9547

9548
    """
9549
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9550

    
9551
    nodelist = self.cfg.GetNodeList()
9552
    nodelist.remove(self.dst_node.name)
9553

    
9554
    # on one-node clusters nodelist will be empty after the removal
9555
    # if we proceed the backup would be removed because OpQueryExports
9556
    # substitutes an empty list with the full cluster node list.
9557
    iname = self.instance.name
9558
    if nodelist:
9559
      feedback_fn("Removing old exports for instance %s" % iname)
9560
      exportlist = self.rpc.call_export_list(nodelist)
9561
      for node in exportlist:
9562
        if exportlist[node].fail_msg:
9563
          continue
9564
        if iname in exportlist[node].payload:
9565
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9566
          if msg:
9567
            self.LogWarning("Could not remove older export for instance %s"
9568
                            " on node %s: %s", iname, node, msg)
9569

    
9570
  def Exec(self, feedback_fn):
9571
    """Export an instance to an image in the cluster.
9572

9573
    """
9574
    assert self.op.mode in constants.EXPORT_MODES
9575

    
9576
    instance = self.instance
9577
    src_node = instance.primary_node
9578

    
9579
    if self.op.shutdown:
9580
      # shutdown the instance, but not the disks
9581
      feedback_fn("Shutting down instance %s" % instance.name)
9582
      result = self.rpc.call_instance_shutdown(src_node, instance,
9583
                                               self.op.shutdown_timeout)
9584
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9585
      result.Raise("Could not shutdown instance %s on"
9586
                   " node %s" % (instance.name, src_node))
9587

    
9588
    # set the disks ID correctly since call_instance_start needs the
9589
    # correct drbd minor to create the symlinks
9590
    for disk in instance.disks:
9591
      self.cfg.SetDiskID(disk, src_node)
9592

    
9593
    activate_disks = (not instance.admin_up)
9594

    
9595
    if activate_disks:
9596
      # Activate the instance disks if we'exporting a stopped instance
9597
      feedback_fn("Activating disks for %s" % instance.name)
9598
      _StartInstanceDisks(self, instance, None)
9599

    
9600
    try:
9601
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9602
                                                     instance)
9603

    
9604
      helper.CreateSnapshots()
9605
      try:
9606
        if (self.op.shutdown and instance.admin_up and
9607
            not self.op.remove_instance):
9608
          assert not activate_disks
9609
          feedback_fn("Starting instance %s" % instance.name)
9610
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9611
          msg = result.fail_msg
9612
          if msg:
9613
            feedback_fn("Failed to start instance: %s" % msg)
9614
            _ShutdownInstanceDisks(self, instance)
9615
            raise errors.OpExecError("Could not start instance: %s" % msg)
9616

    
9617
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9618
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9619
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9620
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9621
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9622

    
9623
          (key_name, _, _) = self.x509_key_name
9624

    
9625
          dest_ca_pem = \
9626
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9627
                                            self.dest_x509_ca)
9628

    
9629
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9630
                                                     key_name, dest_ca_pem,
9631
                                                     timeouts)
9632
      finally:
9633
        helper.Cleanup()
9634

    
9635
      # Check for backwards compatibility
9636
      assert len(dresults) == len(instance.disks)
9637
      assert compat.all(isinstance(i, bool) for i in dresults), \
9638
             "Not all results are boolean: %r" % dresults
9639

    
9640
    finally:
9641
      if activate_disks:
9642
        feedback_fn("Deactivating disks for %s" % instance.name)
9643
        _ShutdownInstanceDisks(self, instance)
9644

    
9645
    if not (compat.all(dresults) and fin_resu):
9646
      failures = []
9647
      if not fin_resu:
9648
        failures.append("export finalization")
9649
      if not compat.all(dresults):
9650
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9651
                               if not dsk)
9652
        failures.append("disk export: disk(s) %s" % fdsk)
9653

    
9654
      raise errors.OpExecError("Export failed, errors in %s" %
9655
                               utils.CommaJoin(failures))
9656

    
9657
    # At this point, the export was successful, we can cleanup/finish
9658

    
9659
    # Remove instance if requested
9660
    if self.op.remove_instance:
9661
      feedback_fn("Removing instance %s" % instance.name)
9662
      _RemoveInstance(self, feedback_fn, instance,
9663
                      self.op.ignore_remove_failures)
9664

    
9665
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9666
      self._CleanupExports(feedback_fn)
9667

    
9668
    return fin_resu, dresults
9669

    
9670

    
9671
class LURemoveExport(NoHooksLU):
9672
  """Remove exports related to the named instance.
9673

9674
  """
9675
  _OP_PARAMS = [
9676
    _PInstanceName,
9677
    ]
9678
  REQ_BGL = False
9679

    
9680
  def ExpandNames(self):
9681
    self.needed_locks = {}
9682
    # We need all nodes to be locked in order for RemoveExport to work, but we
9683
    # don't need to lock the instance itself, as nothing will happen to it (and
9684
    # we can remove exports also for a removed instance)
9685
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9686

    
9687
  def Exec(self, feedback_fn):
9688
    """Remove any export.
9689

9690
    """
9691
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9692
    # If the instance was not found we'll try with the name that was passed in.
9693
    # This will only work if it was an FQDN, though.
9694
    fqdn_warn = False
9695
    if not instance_name:
9696
      fqdn_warn = True
9697
      instance_name = self.op.instance_name
9698

    
9699
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9700
    exportlist = self.rpc.call_export_list(locked_nodes)
9701
    found = False
9702
    for node in exportlist:
9703
      msg = exportlist[node].fail_msg
9704
      if msg:
9705
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9706
        continue
9707
      if instance_name in exportlist[node].payload:
9708
        found = True
9709
        result = self.rpc.call_export_remove(node, instance_name)
9710
        msg = result.fail_msg
9711
        if msg:
9712
          logging.error("Could not remove export for instance %s"
9713
                        " on node %s: %s", instance_name, node, msg)
9714

    
9715
    if fqdn_warn and not found:
9716
      feedback_fn("Export not found. If trying to remove an export belonging"
9717
                  " to a deleted instance please use its Fully Qualified"
9718
                  " Domain Name.")
9719

    
9720

    
9721
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9722
  """Generic tags LU.
9723

9724
  This is an abstract class which is the parent of all the other tags LUs.
9725

9726
  """
9727

    
9728
  def ExpandNames(self):
9729
    self.needed_locks = {}
9730
    if self.op.kind == constants.TAG_NODE:
9731
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9732
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9733
    elif self.op.kind == constants.TAG_INSTANCE:
9734
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9735
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9736

    
9737
  def CheckPrereq(self):
9738
    """Check prerequisites.
9739

9740
    """
9741
    if self.op.kind == constants.TAG_CLUSTER:
9742
      self.target = self.cfg.GetClusterInfo()
9743
    elif self.op.kind == constants.TAG_NODE:
9744
      self.target = self.cfg.GetNodeInfo(self.op.name)
9745
    elif self.op.kind == constants.TAG_INSTANCE:
9746
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9747
    else:
9748
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9749
                                 str(self.op.kind), errors.ECODE_INVAL)
9750

    
9751

    
9752
class LUGetTags(TagsLU):
9753
  """Returns the tags of a given object.
9754

9755
  """
9756
  _OP_PARAMS = [
9757
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9758
    # Name is only meaningful for nodes and instances
9759
    ("name", _NoDefault, _TMaybeString),
9760
    ]
9761
  REQ_BGL = False
9762

    
9763
  def Exec(self, feedback_fn):
9764
    """Returns the tag list.
9765

9766
    """
9767
    return list(self.target.GetTags())
9768

    
9769

    
9770
class LUSearchTags(NoHooksLU):
9771
  """Searches the tags for a given pattern.
9772

9773
  """
9774
  _OP_PARAMS = [
9775
    ("pattern", _NoDefault, _TNonEmptyString),
9776
    ]
9777
  REQ_BGL = False
9778

    
9779
  def ExpandNames(self):
9780
    self.needed_locks = {}
9781

    
9782
  def CheckPrereq(self):
9783
    """Check prerequisites.
9784

9785
    This checks the pattern passed for validity by compiling it.
9786

9787
    """
9788
    try:
9789
      self.re = re.compile(self.op.pattern)
9790
    except re.error, err:
9791
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9792
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9793

    
9794
  def Exec(self, feedback_fn):
9795
    """Returns the tag list.
9796

9797
    """
9798
    cfg = self.cfg
9799
    tgts = [("/cluster", cfg.GetClusterInfo())]
9800
    ilist = cfg.GetAllInstancesInfo().values()
9801
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9802
    nlist = cfg.GetAllNodesInfo().values()
9803
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9804
    results = []
9805
    for path, target in tgts:
9806
      for tag in target.GetTags():
9807
        if self.re.search(tag):
9808
          results.append((path, tag))
9809
    return results
9810

    
9811

    
9812
class LUAddTags(TagsLU):
9813
  """Sets a tag on a given object.
9814

9815
  """
9816
  _OP_PARAMS = [
9817
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9818
    # Name is only meaningful for nodes and instances
9819
    ("name", _NoDefault, _TMaybeString),
9820
    ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9821
    ]
9822
  REQ_BGL = False
9823

    
9824
  def CheckPrereq(self):
9825
    """Check prerequisites.
9826

9827
    This checks the type and length of the tag name and value.
9828

9829
    """
9830
    TagsLU.CheckPrereq(self)
9831
    for tag in self.op.tags:
9832
      objects.TaggableObject.ValidateTag(tag)
9833

    
9834
  def Exec(self, feedback_fn):
9835
    """Sets the tag.
9836

9837
    """
9838
    try:
9839
      for tag in self.op.tags:
9840
        self.target.AddTag(tag)
9841
    except errors.TagError, err:
9842
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9843
    self.cfg.Update(self.target, feedback_fn)
9844

    
9845

    
9846
class LUDelTags(TagsLU):
9847
  """Delete a list of tags from a given object.
9848

9849
  """
9850
  _OP_PARAMS = [
9851
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9852
    # Name is only meaningful for nodes and instances
9853
    ("name", _NoDefault, _TMaybeString),
9854
    ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9855
    ]
9856
  REQ_BGL = False
9857

    
9858
  def CheckPrereq(self):
9859
    """Check prerequisites.
9860

9861
    This checks that we have the given tag.
9862

9863
    """
9864
    TagsLU.CheckPrereq(self)
9865
    for tag in self.op.tags:
9866
      objects.TaggableObject.ValidateTag(tag)
9867
    del_tags = frozenset(self.op.tags)
9868
    cur_tags = self.target.GetTags()
9869
    if not del_tags <= cur_tags:
9870
      diff_tags = del_tags - cur_tags
9871
      diff_names = ["'%s'" % tag for tag in diff_tags]
9872
      diff_names.sort()
9873
      raise errors.OpPrereqError("Tag(s) %s not found" %
9874
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9875

    
9876
  def Exec(self, feedback_fn):
9877
    """Remove the tag from the object.
9878

9879
    """
9880
    for tag in self.op.tags:
9881
      self.target.RemoveTag(tag)
9882
    self.cfg.Update(self.target, feedback_fn)
9883

    
9884

    
9885
class LUTestDelay(NoHooksLU):
9886
  """Sleep for a specified amount of time.
9887

9888
  This LU sleeps on the master and/or nodes for a specified amount of
9889
  time.
9890

9891
  """
9892
  _OP_PARAMS = [
9893
    ("duration", _NoDefault, _TFloat),
9894
    ("on_master", True, _TBool),
9895
    ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9896
    ("repeat", 0, _TPositiveInt)
9897
    ]
9898
  REQ_BGL = False
9899

    
9900
  def ExpandNames(self):
9901
    """Expand names and set required locks.
9902

9903
    This expands the node list, if any.
9904

9905
    """
9906
    self.needed_locks = {}
9907
    if self.op.on_nodes:
9908
      # _GetWantedNodes can be used here, but is not always appropriate to use
9909
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9910
      # more information.
9911
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9912
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9913

    
9914
  def _TestDelay(self):
9915
    """Do the actual sleep.
9916

9917
    """
9918
    if self.op.on_master:
9919
      if not utils.TestDelay(self.op.duration):
9920
        raise errors.OpExecError("Error during master delay test")
9921
    if self.op.on_nodes:
9922
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9923
      for node, node_result in result.items():
9924
        node_result.Raise("Failure during rpc call to node %s" % node)
9925

    
9926
  def Exec(self, feedback_fn):
9927
    """Execute the test delay opcode, with the wanted repetitions.
9928

9929
    """
9930
    if self.op.repeat == 0:
9931
      self._TestDelay()
9932
    else:
9933
      top_value = self.op.repeat - 1
9934
      for i in range(self.op.repeat):
9935
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9936
        self._TestDelay()
9937

    
9938

    
9939
class LUTestJobqueue(NoHooksLU):
9940
  """Utility LU to test some aspects of the job queue.
9941

9942
  """
9943
  _OP_PARAMS = [
9944
    ("notify_waitlock", False, _TBool),
9945
    ("notify_exec", False, _TBool),
9946
    ("log_messages", _EmptyList, _TListOf(_TString)),
9947
    ("fail", False, _TBool),
9948
    ]
9949
  REQ_BGL = False
9950

    
9951
  # Must be lower than default timeout for WaitForJobChange to see whether it
9952
  # notices changed jobs
9953
  _CLIENT_CONNECT_TIMEOUT = 20.0
9954
  _CLIENT_CONFIRM_TIMEOUT = 60.0
9955

    
9956
  @classmethod
9957
  def _NotifyUsingSocket(cls, cb, errcls):
9958
    """Opens a Unix socket and waits for another program to connect.
9959

9960
    @type cb: callable
9961
    @param cb: Callback to send socket name to client
9962
    @type errcls: class
9963
    @param errcls: Exception class to use for errors
9964

9965
    """
9966
    # Using a temporary directory as there's no easy way to create temporary
9967
    # sockets without writing a custom loop around tempfile.mktemp and
9968
    # socket.bind
9969
    tmpdir = tempfile.mkdtemp()
9970
    try:
9971
      tmpsock = utils.PathJoin(tmpdir, "sock")
9972

    
9973
      logging.debug("Creating temporary socket at %s", tmpsock)
9974
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
9975
      try:
9976
        sock.bind(tmpsock)
9977
        sock.listen(1)
9978

    
9979
        # Send details to client
9980
        cb(tmpsock)
9981

    
9982
        # Wait for client to connect before continuing
9983
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
9984
        try:
9985
          (conn, _) = sock.accept()
9986
        except socket.error, err:
9987
          raise errcls("Client didn't connect in time (%s)" % err)
9988
      finally:
9989
        sock.close()
9990
    finally:
9991
      # Remove as soon as client is connected
9992
      shutil.rmtree(tmpdir)
9993

    
9994
    # Wait for client to close
9995
    try:
9996
      try:
9997
        # pylint: disable-msg=E1101
9998
        # Instance of '_socketobject' has no ... member
9999
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10000
        conn.recv(1)
10001
      except socket.error, err:
10002
        raise errcls("Client failed to confirm notification (%s)" % err)
10003
    finally:
10004
      conn.close()
10005

    
10006
  def _SendNotification(self, test, arg, sockname):
10007
    """Sends a notification to the client.
10008

10009
    @type test: string
10010
    @param test: Test name
10011
    @param arg: Test argument (depends on test)
10012
    @type sockname: string
10013
    @param sockname: Socket path
10014

10015
    """
10016
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10017

    
10018
  def _Notify(self, prereq, test, arg):
10019
    """Notifies the client of a test.
10020

10021
    @type prereq: bool
10022
    @param prereq: Whether this is a prereq-phase test
10023
    @type test: string
10024
    @param test: Test name
10025
    @param arg: Test argument (depends on test)
10026

10027
    """
10028
    if prereq:
10029
      errcls = errors.OpPrereqError
10030
    else:
10031
      errcls = errors.OpExecError
10032

    
10033
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10034
                                                  test, arg),
10035
                                   errcls)
10036

    
10037
  def CheckArguments(self):
10038
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10039
    self.expandnames_calls = 0
10040

    
10041
  def ExpandNames(self):
10042
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10043
    if checkargs_calls < 1:
10044
      raise errors.ProgrammerError("CheckArguments was not called")
10045

    
10046
    self.expandnames_calls += 1
10047

    
10048
    if self.op.notify_waitlock:
10049
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10050

    
10051
    self.LogInfo("Expanding names")
10052

    
10053
    # Get lock on master node (just to get a lock, not for a particular reason)
10054
    self.needed_locks = {
10055
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10056
      }
10057

    
10058
  def Exec(self, feedback_fn):
10059
    if self.expandnames_calls < 1:
10060
      raise errors.ProgrammerError("ExpandNames was not called")
10061

    
10062
    if self.op.notify_exec:
10063
      self._Notify(False, constants.JQT_EXEC, None)
10064

    
10065
    self.LogInfo("Executing")
10066

    
10067
    if self.op.log_messages:
10068
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10069
      for idx, msg in enumerate(self.op.log_messages):
10070
        self.LogInfo("Sending log message %s", idx + 1)
10071
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10072
        # Report how many test messages have been sent
10073
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10074

    
10075
    if self.op.fail:
10076
      raise errors.OpExecError("Opcode failure was requested")
10077

    
10078
    return True
10079

    
10080

    
10081
class IAllocator(object):
10082
  """IAllocator framework.
10083

10084
  An IAllocator instance has three sets of attributes:
10085
    - cfg that is needed to query the cluster
10086
    - input data (all members of the _KEYS class attribute are required)
10087
    - four buffer attributes (in|out_data|text), that represent the
10088
      input (to the external script) in text and data structure format,
10089
      and the output from it, again in two formats
10090
    - the result variables from the script (success, info, nodes) for
10091
      easy usage
10092

10093
  """
10094
  # pylint: disable-msg=R0902
10095
  # lots of instance attributes
10096
  _ALLO_KEYS = [
10097
    "name", "mem_size", "disks", "disk_template",
10098
    "os", "tags", "nics", "vcpus", "hypervisor",
10099
    ]
10100
  _RELO_KEYS = [
10101
    "name", "relocate_from",
10102
    ]
10103
  _EVAC_KEYS = [
10104
    "evac_nodes",
10105
    ]
10106

    
10107
  def __init__(self, cfg, rpc, mode, **kwargs):
10108
    self.cfg = cfg
10109
    self.rpc = rpc
10110
    # init buffer variables
10111
    self.in_text = self.out_text = self.in_data = self.out_data = None
10112
    # init all input fields so that pylint is happy
10113
    self.mode = mode
10114
    self.mem_size = self.disks = self.disk_template = None
10115
    self.os = self.tags = self.nics = self.vcpus = None
10116
    self.hypervisor = None
10117
    self.relocate_from = None
10118
    self.name = None
10119
    self.evac_nodes = None
10120
    # computed fields
10121
    self.required_nodes = None
10122
    # init result fields
10123
    self.success = self.info = self.result = None
10124
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10125
      keyset = self._ALLO_KEYS
10126
      fn = self._AddNewInstance
10127
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10128
      keyset = self._RELO_KEYS
10129
      fn = self._AddRelocateInstance
10130
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10131
      keyset = self._EVAC_KEYS
10132
      fn = self._AddEvacuateNodes
10133
    else:
10134
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10135
                                   " IAllocator" % self.mode)
10136
    for key in kwargs:
10137
      if key not in keyset:
10138
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
10139
                                     " IAllocator" % key)
10140
      setattr(self, key, kwargs[key])
10141

    
10142
    for key in keyset:
10143
      if key not in kwargs:
10144
        raise errors.ProgrammerError("Missing input parameter '%s' to"
10145
                                     " IAllocator" % key)
10146
    self._BuildInputData(fn)
10147

    
10148
  def _ComputeClusterData(self):
10149
    """Compute the generic allocator input data.
10150

10151
    This is the data that is independent of the actual operation.
10152

10153
    """
10154
    cfg = self.cfg
10155
    cluster_info = cfg.GetClusterInfo()
10156
    # cluster data
10157
    data = {
10158
      "version": constants.IALLOCATOR_VERSION,
10159
      "cluster_name": cfg.GetClusterName(),
10160
      "cluster_tags": list(cluster_info.GetTags()),
10161
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10162
      # we don't have job IDs
10163
      }
10164
    iinfo = cfg.GetAllInstancesInfo().values()
10165
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10166

    
10167
    # node data
10168
    node_results = {}
10169
    node_list = cfg.GetNodeList()
10170

    
10171
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10172
      hypervisor_name = self.hypervisor
10173
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10174
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10175
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10176
      hypervisor_name = cluster_info.enabled_hypervisors[0]
10177

    
10178
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10179
                                        hypervisor_name)
10180
    node_iinfo = \
10181
      self.rpc.call_all_instances_info(node_list,
10182
                                       cluster_info.enabled_hypervisors)
10183
    for nname, nresult in node_data.items():
10184
      # first fill in static (config-based) values
10185
      ninfo = cfg.GetNodeInfo(nname)
10186
      pnr = {
10187
        "tags": list(ninfo.GetTags()),
10188
        "primary_ip": ninfo.primary_ip,
10189
        "secondary_ip": ninfo.secondary_ip,
10190
        "offline": ninfo.offline,
10191
        "drained": ninfo.drained,
10192
        "master_candidate": ninfo.master_candidate,
10193
        }
10194

    
10195
      if not (ninfo.offline or ninfo.drained):
10196
        nresult.Raise("Can't get data for node %s" % nname)
10197
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10198
                                nname)
10199
        remote_info = nresult.payload
10200

    
10201
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
10202
                     'vg_size', 'vg_free', 'cpu_total']:
10203
          if attr not in remote_info:
10204
            raise errors.OpExecError("Node '%s' didn't return attribute"
10205
                                     " '%s'" % (nname, attr))
10206
          if not isinstance(remote_info[attr], int):
10207
            raise errors.OpExecError("Node '%s' returned invalid value"
10208
                                     " for '%s': %s" %
10209
                                     (nname, attr, remote_info[attr]))
10210
        # compute memory used by primary instances
10211
        i_p_mem = i_p_up_mem = 0
10212
        for iinfo, beinfo in i_list:
10213
          if iinfo.primary_node == nname:
10214
            i_p_mem += beinfo[constants.BE_MEMORY]
10215
            if iinfo.name not in node_iinfo[nname].payload:
10216
              i_used_mem = 0
10217
            else:
10218
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10219
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10220
            remote_info['memory_free'] -= max(0, i_mem_diff)
10221

    
10222
            if iinfo.admin_up:
10223
              i_p_up_mem += beinfo[constants.BE_MEMORY]
10224

    
10225
        # compute memory used by instances
10226
        pnr_dyn = {
10227
          "total_memory": remote_info['memory_total'],
10228
          "reserved_memory": remote_info['memory_dom0'],
10229
          "free_memory": remote_info['memory_free'],
10230
          "total_disk": remote_info['vg_size'],
10231
          "free_disk": remote_info['vg_free'],
10232
          "total_cpus": remote_info['cpu_total'],
10233
          "i_pri_memory": i_p_mem,
10234
          "i_pri_up_memory": i_p_up_mem,
10235
          }
10236
        pnr.update(pnr_dyn)
10237

    
10238
      node_results[nname] = pnr
10239
    data["nodes"] = node_results
10240

    
10241
    # instance data
10242
    instance_data = {}
10243
    for iinfo, beinfo in i_list:
10244
      nic_data = []
10245
      for nic in iinfo.nics:
10246
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10247
        nic_dict = {"mac": nic.mac,
10248
                    "ip": nic.ip,
10249
                    "mode": filled_params[constants.NIC_MODE],
10250
                    "link": filled_params[constants.NIC_LINK],
10251
                   }
10252
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10253
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10254
        nic_data.append(nic_dict)
10255
      pir = {
10256
        "tags": list(iinfo.GetTags()),
10257
        "admin_up": iinfo.admin_up,
10258
        "vcpus": beinfo[constants.BE_VCPUS],
10259
        "memory": beinfo[constants.BE_MEMORY],
10260
        "os": iinfo.os,
10261
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10262
        "nics": nic_data,
10263
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10264
        "disk_template": iinfo.disk_template,
10265
        "hypervisor": iinfo.hypervisor,
10266
        }
10267
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10268
                                                 pir["disks"])
10269
      instance_data[iinfo.name] = pir
10270

    
10271
    data["instances"] = instance_data
10272

    
10273
    self.in_data = data
10274

    
10275
  def _AddNewInstance(self):
10276
    """Add new instance data to allocator structure.
10277

10278
    This in combination with _AllocatorGetClusterData will create the
10279
    correct structure needed as input for the allocator.
10280

10281
    The checks for the completeness of the opcode must have already been
10282
    done.
10283

10284
    """
10285
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10286

    
10287
    if self.disk_template in constants.DTS_NET_MIRROR:
10288
      self.required_nodes = 2
10289
    else:
10290
      self.required_nodes = 1
10291
    request = {
10292
      "name": self.name,
10293
      "disk_template": self.disk_template,
10294
      "tags": self.tags,
10295
      "os": self.os,
10296
      "vcpus": self.vcpus,
10297
      "memory": self.mem_size,
10298
      "disks": self.disks,
10299
      "disk_space_total": disk_space,
10300
      "nics": self.nics,
10301
      "required_nodes": self.required_nodes,
10302
      }
10303
    return request
10304

    
10305
  def _AddRelocateInstance(self):
10306
    """Add relocate instance data to allocator structure.
10307

10308
    This in combination with _IAllocatorGetClusterData will create the
10309
    correct structure needed as input for the allocator.
10310

10311
    The checks for the completeness of the opcode must have already been
10312
    done.
10313

10314
    """
10315
    instance = self.cfg.GetInstanceInfo(self.name)
10316
    if instance is None:
10317
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
10318
                                   " IAllocator" % self.name)
10319

    
10320
    if instance.disk_template not in constants.DTS_NET_MIRROR:
10321
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10322
                                 errors.ECODE_INVAL)
10323

    
10324
    if len(instance.secondary_nodes) != 1:
10325
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
10326
                                 errors.ECODE_STATE)
10327

    
10328
    self.required_nodes = 1
10329
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
10330
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10331

    
10332
    request = {
10333
      "name": self.name,
10334
      "disk_space_total": disk_space,
10335
      "required_nodes": self.required_nodes,
10336
      "relocate_from": self.relocate_from,
10337
      }
10338
    return request
10339

    
10340
  def _AddEvacuateNodes(self):
10341
    """Add evacuate nodes data to allocator structure.
10342

10343
    """
10344
    request = {
10345
      "evac_nodes": self.evac_nodes
10346
      }
10347
    return request
10348

    
10349
  def _BuildInputData(self, fn):
10350
    """Build input data structures.
10351

10352
    """
10353
    self._ComputeClusterData()
10354

    
10355
    request = fn()
10356
    request["type"] = self.mode
10357
    self.in_data["request"] = request
10358

    
10359
    self.in_text = serializer.Dump(self.in_data)
10360

    
10361
  def Run(self, name, validate=True, call_fn=None):
10362
    """Run an instance allocator and return the results.
10363

10364
    """
10365
    if call_fn is None:
10366
      call_fn = self.rpc.call_iallocator_runner
10367

    
10368
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10369
    result.Raise("Failure while running the iallocator script")
10370

    
10371
    self.out_text = result.payload
10372
    if validate:
10373
      self._ValidateResult()
10374

    
10375
  def _ValidateResult(self):
10376
    """Process the allocator results.
10377

10378
    This will process and if successful save the result in
10379
    self.out_data and the other parameters.
10380

10381
    """
10382
    try:
10383
      rdict = serializer.Load(self.out_text)
10384
    except Exception, err:
10385
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10386

    
10387
    if not isinstance(rdict, dict):
10388
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10389

    
10390
    # TODO: remove backwards compatiblity in later versions
10391
    if "nodes" in rdict and "result" not in rdict:
10392
      rdict["result"] = rdict["nodes"]
10393
      del rdict["nodes"]
10394

    
10395
    for key in "success", "info", "result":
10396
      if key not in rdict:
10397
        raise errors.OpExecError("Can't parse iallocator results:"
10398
                                 " missing key '%s'" % key)
10399
      setattr(self, key, rdict[key])
10400

    
10401
    if not isinstance(rdict["result"], list):
10402
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10403
                               " is not a list")
10404
    self.out_data = rdict
10405

    
10406

    
10407
class LUTestAllocator(NoHooksLU):
10408
  """Run allocator tests.
10409

10410
  This LU runs the allocator tests
10411

10412
  """
10413
  _OP_PARAMS = [
10414
    ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10415
    ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10416
    ("name", _NoDefault, _TNonEmptyString),
10417
    ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10418
      _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10419
               _TOr(_TNone, _TNonEmptyString))))),
10420
    ("disks", _NoDefault, _TOr(_TNone, _TList)),
10421
    ("hypervisor", None, _TMaybeString),
10422
    ("allocator", None, _TMaybeString),
10423
    ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10424
    ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10425
    ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10426
    ("os", None, _TMaybeString),
10427
    ("disk_template", None, _TMaybeString),
10428
    ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10429
    ]
10430

    
10431
  def CheckPrereq(self):
10432
    """Check prerequisites.
10433

10434
    This checks the opcode parameters depending on the director and mode test.
10435

10436
    """
10437
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10438
      for attr in ["mem_size", "disks", "disk_template",
10439
                   "os", "tags", "nics", "vcpus"]:
10440
        if not hasattr(self.op, attr):
10441
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10442
                                     attr, errors.ECODE_INVAL)
10443
      iname = self.cfg.ExpandInstanceName(self.op.name)
10444
      if iname is not None:
10445
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10446
                                   iname, errors.ECODE_EXISTS)
10447
      if not isinstance(self.op.nics, list):
10448
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10449
                                   errors.ECODE_INVAL)
10450
      if not isinstance(self.op.disks, list):
10451
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10452
                                   errors.ECODE_INVAL)
10453
      for row in self.op.disks:
10454
        if (not isinstance(row, dict) or
10455
            "size" not in row or
10456
            not isinstance(row["size"], int) or
10457
            "mode" not in row or
10458
            row["mode"] not in ['r', 'w']):
10459
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10460
                                     " parameter", errors.ECODE_INVAL)
10461
      if self.op.hypervisor is None:
10462
        self.op.hypervisor = self.cfg.GetHypervisorType()
10463
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10464
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10465
      self.op.name = fname
10466
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10467
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10468
      if not hasattr(self.op, "evac_nodes"):
10469
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10470
                                   " opcode input", errors.ECODE_INVAL)
10471
    else:
10472
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10473
                                 self.op.mode, errors.ECODE_INVAL)
10474

    
10475
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10476
      if self.op.allocator is None:
10477
        raise errors.OpPrereqError("Missing allocator name",
10478
                                   errors.ECODE_INVAL)
10479
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10480
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10481
                                 self.op.direction, errors.ECODE_INVAL)
10482

    
10483
  def Exec(self, feedback_fn):
10484
    """Run the allocator test.
10485

10486
    """
10487
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10488
      ial = IAllocator(self.cfg, self.rpc,
10489
                       mode=self.op.mode,
10490
                       name=self.op.name,
10491
                       mem_size=self.op.mem_size,
10492
                       disks=self.op.disks,
10493
                       disk_template=self.op.disk_template,
10494
                       os=self.op.os,
10495
                       tags=self.op.tags,
10496
                       nics=self.op.nics,
10497
                       vcpus=self.op.vcpus,
10498
                       hypervisor=self.op.hypervisor,
10499
                       )
10500
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10501
      ial = IAllocator(self.cfg, self.rpc,
10502
                       mode=self.op.mode,
10503
                       name=self.op.name,
10504
                       relocate_from=list(self.relocate_from),
10505
                       )
10506
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10507
      ial = IAllocator(self.cfg, self.rpc,
10508
                       mode=self.op.mode,
10509
                       evac_nodes=self.op.evac_nodes)
10510
    else:
10511
      raise errors.ProgrammerError("Uncatched mode %s in"
10512
                                   " LUTestAllocator.Exec", self.op.mode)
10513

    
10514
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10515
      result = ial.in_text
10516
    else:
10517
      ial.Run(self.op.allocator, validate=False)
10518
      result = ial.out_text
10519
    return result