Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 21674464

History | View | Annotate | Download (368.1 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42

    
43
from ganeti import ssh
44
from ganeti import utils
45
from ganeti import errors
46
from ganeti import hypervisor
47
from ganeti import locking
48
from ganeti import constants
49
from ganeti import objects
50
from ganeti import serializer
51
from ganeti import ssconf
52
from ganeti import uidpool
53
from ganeti import compat
54
from ganeti import masterd
55
from ganeti import netutils
56

    
57
import ganeti.masterd.instance # pylint: disable-msg=W0611
58

    
59

    
60
# Modifiable default values; need to define these here before the
61
# actual LUs
62

    
63
def _EmptyList():
64
  """Returns an empty list.
65

66
  """
67
  return []
68

    
69

    
70
def _EmptyDict():
71
  """Returns an empty dict.
72

73
  """
74
  return {}
75

    
76

    
77
#: The without-default default value
78
_NoDefault = object()
79

    
80

    
81
#: The no-type (value to complex to check it in the type system)
82
_NoType = object()
83

    
84

    
85
# Some basic types
86
def _TNotNone(val):
87
  """Checks if the given value is not None.
88

89
  """
90
  return val is not None
91

    
92

    
93
def _TNone(val):
94
  """Checks if the given value is None.
95

96
  """
97
  return val is None
98

    
99

    
100
def _TBool(val):
101
  """Checks if the given value is a boolean.
102

103
  """
104
  return isinstance(val, bool)
105

    
106

    
107
def _TInt(val):
108
  """Checks if the given value is an integer.
109

110
  """
111
  return isinstance(val, int)
112

    
113

    
114
def _TFloat(val):
115
  """Checks if the given value is a float.
116

117
  """
118
  return isinstance(val, float)
119

    
120

    
121
def _TString(val):
122
  """Checks if the given value is a string.
123

124
  """
125
  return isinstance(val, basestring)
126

    
127

    
128
def _TTrue(val):
129
  """Checks if a given value evaluates to a boolean True value.
130

131
  """
132
  return bool(val)
133

    
134

    
135
def _TElemOf(target_list):
136
  """Builds a function that checks if a given value is a member of a list.
137

138
  """
139
  return lambda val: val in target_list
140

    
141

    
142
# Container types
143
def _TList(val):
144
  """Checks if the given value is a list.
145

146
  """
147
  return isinstance(val, list)
148

    
149

    
150
def _TDict(val):
151
  """Checks if the given value is a dictionary.
152

153
  """
154
  return isinstance(val, dict)
155

    
156

    
157
def _TIsLength(size):
158
  """Check is the given container is of the given size.
159

160
  """
161
  return lambda container: len(container) == size
162

    
163

    
164
# Combinator types
165
def _TAnd(*args):
166
  """Combine multiple functions using an AND operation.
167

168
  """
169
  def fn(val):
170
    return compat.all(t(val) for t in args)
171
  return fn
172

    
173

    
174
def _TOr(*args):
175
  """Combine multiple functions using an AND operation.
176

177
  """
178
  def fn(val):
179
    return compat.any(t(val) for t in args)
180
  return fn
181

    
182

    
183
def _TMap(fn, test):
184
  """Checks that a modified version of the argument passes the given test.
185

186
  """
187
  return lambda val: test(fn(val))
188

    
189

    
190
# Type aliases
191

    
192
#: a non-empty string
193
_TNonEmptyString = _TAnd(_TString, _TTrue)
194

    
195

    
196
#: a maybe non-empty string
197
_TMaybeString = _TOr(_TNonEmptyString, _TNone)
198

    
199

    
200
#: a maybe boolean (bool or none)
201
_TMaybeBool = _TOr(_TBool, _TNone)
202

    
203

    
204
#: a positive integer
205
_TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
206

    
207
#: a strictly positive integer
208
_TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
209

    
210

    
211
def _TListOf(my_type):
212
  """Checks if a given value is a list with all elements of the same type.
213

214
  """
215
  return _TAnd(_TList,
216
               lambda lst: compat.all(my_type(v) for v in lst))
217

    
218

    
219
def _TDictOf(key_type, val_type):
220
  """Checks a dict type for the type of its key/values.
221

222
  """
223
  return _TAnd(_TDict,
224
               lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
225
                                and compat.all(val_type(v)
226
                                               for v in my_dict.values())))
227

    
228

    
229
# Common opcode attributes
230

    
231
#: output fields for a query operation
232
_POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
233

    
234

    
235
#: the shutdown timeout
236
_PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
237
                     _TPositiveInt)
238

    
239
#: the force parameter
240
_PForce = ("force", False, _TBool)
241

    
242
#: a required instance name (for single-instance LUs)
243
_PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
244

    
245

    
246
#: a required node name (for single-node LUs)
247
_PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
248

    
249
#: the migration type (live/non-live)
250
_PMigrationMode = ("mode", None, _TOr(_TNone,
251
                                      _TElemOf(constants.HT_MIGRATION_MODES)))
252

    
253
#: the obsolete 'live' mode (boolean)
254
_PMigrationLive = ("live", None, _TMaybeBool)
255

    
256

    
257
# End types
258
class LogicalUnit(object):
259
  """Logical Unit base class.
260

261
  Subclasses must follow these rules:
262
    - implement ExpandNames
263
    - implement CheckPrereq (except when tasklets are used)
264
    - implement Exec (except when tasklets are used)
265
    - implement BuildHooksEnv
266
    - redefine HPATH and HTYPE
267
    - optionally redefine their run requirements:
268
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
269

270
  Note that all commands require root permissions.
271

272
  @ivar dry_run_result: the value (if any) that will be returned to the caller
273
      in dry-run mode (signalled by opcode dry_run parameter)
274
  @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
275
      they should get if not already defined, and types they must match
276

277
  """
278
  HPATH = None
279
  HTYPE = None
280
  _OP_PARAMS = []
281
  REQ_BGL = True
282

    
283
  def __init__(self, processor, op, context, rpc):
284
    """Constructor for LogicalUnit.
285

286
    This needs to be overridden in derived classes in order to check op
287
    validity.
288

289
    """
290
    self.proc = processor
291
    self.op = op
292
    self.cfg = context.cfg
293
    self.context = context
294
    self.rpc = rpc
295
    # Dicts used to declare locking needs to mcpu
296
    self.needed_locks = None
297
    self.acquired_locks = {}
298
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
299
    self.add_locks = {}
300
    self.remove_locks = {}
301
    # Used to force good behavior when calling helper functions
302
    self.recalculate_locks = {}
303
    self.__ssh = None
304
    # logging
305
    self.Log = processor.Log # pylint: disable-msg=C0103
306
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
307
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
308
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
309
    # support for dry-run
310
    self.dry_run_result = None
311
    # support for generic debug attribute
312
    if (not hasattr(self.op, "debug_level") or
313
        not isinstance(self.op.debug_level, int)):
314
      self.op.debug_level = 0
315

    
316
    # Tasklets
317
    self.tasklets = None
318

    
319
    # The new kind-of-type-system
320
    op_id = self.op.OP_ID
321
    for attr_name, aval, test in self._OP_PARAMS:
322
      if not hasattr(op, attr_name):
323
        if aval == _NoDefault:
324
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
325
                                     (op_id, attr_name), errors.ECODE_INVAL)
326
        else:
327
          if callable(aval):
328
            dval = aval()
329
          else:
330
            dval = aval
331
          setattr(self.op, attr_name, dval)
332
      attr_val = getattr(op, attr_name)
333
      if test == _NoType:
334
        # no tests here
335
        continue
336
      if not callable(test):
337
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
338
                                     " given type is not a proper type (%s)" %
339
                                     (op_id, attr_name, test))
340
      if not test(attr_val):
341
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
342
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
343
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
344
                                   (op_id, attr_name), errors.ECODE_INVAL)
345

    
346
    self.CheckArguments()
347

    
348
  def __GetSSH(self):
349
    """Returns the SshRunner object
350

351
    """
352
    if not self.__ssh:
353
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
354
    return self.__ssh
355

    
356
  ssh = property(fget=__GetSSH)
357

    
358
  def CheckArguments(self):
359
    """Check syntactic validity for the opcode arguments.
360

361
    This method is for doing a simple syntactic check and ensure
362
    validity of opcode parameters, without any cluster-related
363
    checks. While the same can be accomplished in ExpandNames and/or
364
    CheckPrereq, doing these separate is better because:
365

366
      - ExpandNames is left as as purely a lock-related function
367
      - CheckPrereq is run after we have acquired locks (and possible
368
        waited for them)
369

370
    The function is allowed to change the self.op attribute so that
371
    later methods can no longer worry about missing parameters.
372

373
    """
374
    pass
375

    
376
  def ExpandNames(self):
377
    """Expand names for this LU.
378

379
    This method is called before starting to execute the opcode, and it should
380
    update all the parameters of the opcode to their canonical form (e.g. a
381
    short node name must be fully expanded after this method has successfully
382
    completed). This way locking, hooks, logging, ecc. can work correctly.
383

384
    LUs which implement this method must also populate the self.needed_locks
385
    member, as a dict with lock levels as keys, and a list of needed lock names
386
    as values. Rules:
387

388
      - use an empty dict if you don't need any lock
389
      - if you don't need any lock at a particular level omit that level
390
      - don't put anything for the BGL level
391
      - if you want all locks at a level use locking.ALL_SET as a value
392

393
    If you need to share locks (rather than acquire them exclusively) at one
394
    level you can modify self.share_locks, setting a true value (usually 1) for
395
    that level. By default locks are not shared.
396

397
    This function can also define a list of tasklets, which then will be
398
    executed in order instead of the usual LU-level CheckPrereq and Exec
399
    functions, if those are not defined by the LU.
400

401
    Examples::
402

403
      # Acquire all nodes and one instance
404
      self.needed_locks = {
405
        locking.LEVEL_NODE: locking.ALL_SET,
406
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
407
      }
408
      # Acquire just two nodes
409
      self.needed_locks = {
410
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
411
      }
412
      # Acquire no locks
413
      self.needed_locks = {} # No, you can't leave it to the default value None
414

415
    """
416
    # The implementation of this method is mandatory only if the new LU is
417
    # concurrent, so that old LUs don't need to be changed all at the same
418
    # time.
419
    if self.REQ_BGL:
420
      self.needed_locks = {} # Exclusive LUs don't need locks.
421
    else:
422
      raise NotImplementedError
423

    
424
  def DeclareLocks(self, level):
425
    """Declare LU locking needs for a level
426

427
    While most LUs can just declare their locking needs at ExpandNames time,
428
    sometimes there's the need to calculate some locks after having acquired
429
    the ones before. This function is called just before acquiring locks at a
430
    particular level, but after acquiring the ones at lower levels, and permits
431
    such calculations. It can be used to modify self.needed_locks, and by
432
    default it does nothing.
433

434
    This function is only called if you have something already set in
435
    self.needed_locks for the level.
436

437
    @param level: Locking level which is going to be locked
438
    @type level: member of ganeti.locking.LEVELS
439

440
    """
441

    
442
  def CheckPrereq(self):
443
    """Check prerequisites for this LU.
444

445
    This method should check that the prerequisites for the execution
446
    of this LU are fulfilled. It can do internode communication, but
447
    it should be idempotent - no cluster or system changes are
448
    allowed.
449

450
    The method should raise errors.OpPrereqError in case something is
451
    not fulfilled. Its return value is ignored.
452

453
    This method should also update all the parameters of the opcode to
454
    their canonical form if it hasn't been done by ExpandNames before.
455

456
    """
457
    if self.tasklets is not None:
458
      for (idx, tl) in enumerate(self.tasklets):
459
        logging.debug("Checking prerequisites for tasklet %s/%s",
460
                      idx + 1, len(self.tasklets))
461
        tl.CheckPrereq()
462
    else:
463
      pass
464

    
465
  def Exec(self, feedback_fn):
466
    """Execute the LU.
467

468
    This method should implement the actual work. It should raise
469
    errors.OpExecError for failures that are somewhat dealt with in
470
    code, or expected.
471

472
    """
473
    if self.tasklets is not None:
474
      for (idx, tl) in enumerate(self.tasklets):
475
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
476
        tl.Exec(feedback_fn)
477
    else:
478
      raise NotImplementedError
479

    
480
  def BuildHooksEnv(self):
481
    """Build hooks environment for this LU.
482

483
    This method should return a three-node tuple consisting of: a dict
484
    containing the environment that will be used for running the
485
    specific hook for this LU, a list of node names on which the hook
486
    should run before the execution, and a list of node names on which
487
    the hook should run after the execution.
488

489
    The keys of the dict must not have 'GANETI_' prefixed as this will
490
    be handled in the hooks runner. Also note additional keys will be
491
    added by the hooks runner. If the LU doesn't define any
492
    environment, an empty dict (and not None) should be returned.
493

494
    No nodes should be returned as an empty list (and not None).
495

496
    Note that if the HPATH for a LU class is None, this function will
497
    not be called.
498

499
    """
500
    raise NotImplementedError
501

    
502
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
503
    """Notify the LU about the results of its hooks.
504

505
    This method is called every time a hooks phase is executed, and notifies
506
    the Logical Unit about the hooks' result. The LU can then use it to alter
507
    its result based on the hooks.  By default the method does nothing and the
508
    previous result is passed back unchanged but any LU can define it if it
509
    wants to use the local cluster hook-scripts somehow.
510

511
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
512
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
513
    @param hook_results: the results of the multi-node hooks rpc call
514
    @param feedback_fn: function used send feedback back to the caller
515
    @param lu_result: the previous Exec result this LU had, or None
516
        in the PRE phase
517
    @return: the new Exec result, based on the previous result
518
        and hook results
519

520
    """
521
    # API must be kept, thus we ignore the unused argument and could
522
    # be a function warnings
523
    # pylint: disable-msg=W0613,R0201
524
    return lu_result
525

    
526
  def _ExpandAndLockInstance(self):
527
    """Helper function to expand and lock an instance.
528

529
    Many LUs that work on an instance take its name in self.op.instance_name
530
    and need to expand it and then declare the expanded name for locking. This
531
    function does it, and then updates self.op.instance_name to the expanded
532
    name. It also initializes needed_locks as a dict, if this hasn't been done
533
    before.
534

535
    """
536
    if self.needed_locks is None:
537
      self.needed_locks = {}
538
    else:
539
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
540
        "_ExpandAndLockInstance called with instance-level locks set"
541
    self.op.instance_name = _ExpandInstanceName(self.cfg,
542
                                                self.op.instance_name)
543
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
544

    
545
  def _LockInstancesNodes(self, primary_only=False):
546
    """Helper function to declare instances' nodes for locking.
547

548
    This function should be called after locking one or more instances to lock
549
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
550
    with all primary or secondary nodes for instances already locked and
551
    present in self.needed_locks[locking.LEVEL_INSTANCE].
552

553
    It should be called from DeclareLocks, and for safety only works if
554
    self.recalculate_locks[locking.LEVEL_NODE] is set.
555

556
    In the future it may grow parameters to just lock some instance's nodes, or
557
    to just lock primaries or secondary nodes, if needed.
558

559
    If should be called in DeclareLocks in a way similar to::
560

561
      if level == locking.LEVEL_NODE:
562
        self._LockInstancesNodes()
563

564
    @type primary_only: boolean
565
    @param primary_only: only lock primary nodes of locked instances
566

567
    """
568
    assert locking.LEVEL_NODE in self.recalculate_locks, \
569
      "_LockInstancesNodes helper function called with no nodes to recalculate"
570

    
571
    # TODO: check if we're really been called with the instance locks held
572

    
573
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
574
    # future we might want to have different behaviors depending on the value
575
    # of self.recalculate_locks[locking.LEVEL_NODE]
576
    wanted_nodes = []
577
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
578
      instance = self.context.cfg.GetInstanceInfo(instance_name)
579
      wanted_nodes.append(instance.primary_node)
580
      if not primary_only:
581
        wanted_nodes.extend(instance.secondary_nodes)
582

    
583
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
584
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
585
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
586
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
587

    
588
    del self.recalculate_locks[locking.LEVEL_NODE]
589

    
590

    
591
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
592
  """Simple LU which runs no hooks.
593

594
  This LU is intended as a parent for other LogicalUnits which will
595
  run no hooks, in order to reduce duplicate code.
596

597
  """
598
  HPATH = None
599
  HTYPE = None
600

    
601
  def BuildHooksEnv(self):
602
    """Empty BuildHooksEnv for NoHooksLu.
603

604
    This just raises an error.
605

606
    """
607
    assert False, "BuildHooksEnv called for NoHooksLUs"
608

    
609

    
610
class Tasklet:
611
  """Tasklet base class.
612

613
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
614
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
615
  tasklets know nothing about locks.
616

617
  Subclasses must follow these rules:
618
    - Implement CheckPrereq
619
    - Implement Exec
620

621
  """
622
  def __init__(self, lu):
623
    self.lu = lu
624

    
625
    # Shortcuts
626
    self.cfg = lu.cfg
627
    self.rpc = lu.rpc
628

    
629
  def CheckPrereq(self):
630
    """Check prerequisites for this tasklets.
631

632
    This method should check whether the prerequisites for the execution of
633
    this tasklet are fulfilled. It can do internode communication, but it
634
    should be idempotent - no cluster or system changes are allowed.
635

636
    The method should raise errors.OpPrereqError in case something is not
637
    fulfilled. Its return value is ignored.
638

639
    This method should also update all parameters to their canonical form if it
640
    hasn't been done before.
641

642
    """
643
    pass
644

    
645
  def Exec(self, feedback_fn):
646
    """Execute the tasklet.
647

648
    This method should implement the actual work. It should raise
649
    errors.OpExecError for failures that are somewhat dealt with in code, or
650
    expected.
651

652
    """
653
    raise NotImplementedError
654

    
655

    
656
def _GetWantedNodes(lu, nodes):
657
  """Returns list of checked and expanded node names.
658

659
  @type lu: L{LogicalUnit}
660
  @param lu: the logical unit on whose behalf we execute
661
  @type nodes: list
662
  @param nodes: list of node names or None for all nodes
663
  @rtype: list
664
  @return: the list of nodes, sorted
665
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
666

667
  """
668
  if not nodes:
669
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
670
      " non-empty list of nodes whose name is to be expanded.")
671

    
672
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
673
  return utils.NiceSort(wanted)
674

    
675

    
676
def _GetWantedInstances(lu, instances):
677
  """Returns list of checked and expanded instance names.
678

679
  @type lu: L{LogicalUnit}
680
  @param lu: the logical unit on whose behalf we execute
681
  @type instances: list
682
  @param instances: list of instance names or None for all instances
683
  @rtype: list
684
  @return: the list of instances, sorted
685
  @raise errors.OpPrereqError: if the instances parameter is wrong type
686
  @raise errors.OpPrereqError: if any of the passed instances is not found
687

688
  """
689
  if instances:
690
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
691
  else:
692
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
693
  return wanted
694

    
695

    
696
def _GetUpdatedParams(old_params, update_dict,
697
                      use_default=True, use_none=False):
698
  """Return the new version of a parameter dictionary.
699

700
  @type old_params: dict
701
  @param old_params: old parameters
702
  @type update_dict: dict
703
  @param update_dict: dict containing new parameter values, or
704
      constants.VALUE_DEFAULT to reset the parameter to its default
705
      value
706
  @param use_default: boolean
707
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
708
      values as 'to be deleted' values
709
  @param use_none: boolean
710
  @type use_none: whether to recognise C{None} values as 'to be
711
      deleted' values
712
  @rtype: dict
713
  @return: the new parameter dictionary
714

715
  """
716
  params_copy = copy.deepcopy(old_params)
717
  for key, val in update_dict.iteritems():
718
    if ((use_default and val == constants.VALUE_DEFAULT) or
719
        (use_none and val is None)):
720
      try:
721
        del params_copy[key]
722
      except KeyError:
723
        pass
724
    else:
725
      params_copy[key] = val
726
  return params_copy
727

    
728

    
729
def _CheckOutputFields(static, dynamic, selected):
730
  """Checks whether all selected fields are valid.
731

732
  @type static: L{utils.FieldSet}
733
  @param static: static fields set
734
  @type dynamic: L{utils.FieldSet}
735
  @param dynamic: dynamic fields set
736

737
  """
738
  f = utils.FieldSet()
739
  f.Extend(static)
740
  f.Extend(dynamic)
741

    
742
  delta = f.NonMatching(selected)
743
  if delta:
744
    raise errors.OpPrereqError("Unknown output fields selected: %s"
745
                               % ",".join(delta), errors.ECODE_INVAL)
746

    
747

    
748
def _CheckGlobalHvParams(params):
749
  """Validates that given hypervisor params are not global ones.
750

751
  This will ensure that instances don't get customised versions of
752
  global params.
753

754
  """
755
  used_globals = constants.HVC_GLOBALS.intersection(params)
756
  if used_globals:
757
    msg = ("The following hypervisor parameters are global and cannot"
758
           " be customized at instance level, please modify them at"
759
           " cluster level: %s" % utils.CommaJoin(used_globals))
760
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
761

    
762

    
763
def _CheckNodeOnline(lu, node):
764
  """Ensure that a given node is online.
765

766
  @param lu: the LU on behalf of which we make the check
767
  @param node: the node to check
768
  @raise errors.OpPrereqError: if the node is offline
769

770
  """
771
  if lu.cfg.GetNodeInfo(node).offline:
772
    raise errors.OpPrereqError("Can't use offline node %s" % node,
773
                               errors.ECODE_INVAL)
774

    
775

    
776
def _CheckNodeNotDrained(lu, node):
777
  """Ensure that a given node is not drained.
778

779
  @param lu: the LU on behalf of which we make the check
780
  @param node: the node to check
781
  @raise errors.OpPrereqError: if the node is drained
782

783
  """
784
  if lu.cfg.GetNodeInfo(node).drained:
785
    raise errors.OpPrereqError("Can't use drained node %s" % node,
786
                               errors.ECODE_INVAL)
787

    
788

    
789
def _CheckNodeHasOS(lu, node, os_name, force_variant):
790
  """Ensure that a node supports a given OS.
791

792
  @param lu: the LU on behalf of which we make the check
793
  @param node: the node to check
794
  @param os_name: the OS to query about
795
  @param force_variant: whether to ignore variant errors
796
  @raise errors.OpPrereqError: if the node is not supporting the OS
797

798
  """
799
  result = lu.rpc.call_os_get(node, os_name)
800
  result.Raise("OS '%s' not in supported OS list for node %s" %
801
               (os_name, node),
802
               prereq=True, ecode=errors.ECODE_INVAL)
803
  if not force_variant:
804
    _CheckOSVariant(result.payload, os_name)
805

    
806

    
807
def _RequireFileStorage():
808
  """Checks that file storage is enabled.
809

810
  @raise errors.OpPrereqError: when file storage is disabled
811

812
  """
813
  if not constants.ENABLE_FILE_STORAGE:
814
    raise errors.OpPrereqError("File storage disabled at configure time",
815
                               errors.ECODE_INVAL)
816

    
817

    
818
def _CheckDiskTemplate(template):
819
  """Ensure a given disk template is valid.
820

821
  """
822
  if template not in constants.DISK_TEMPLATES:
823
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
824
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
825
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
826
  if template == constants.DT_FILE:
827
    _RequireFileStorage()
828
  return True
829

    
830

    
831
def _CheckStorageType(storage_type):
832
  """Ensure a given storage type is valid.
833

834
  """
835
  if storage_type not in constants.VALID_STORAGE_TYPES:
836
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
837
                               errors.ECODE_INVAL)
838
  if storage_type == constants.ST_FILE:
839
    _RequireFileStorage()
840
  return True
841

    
842

    
843
def _GetClusterDomainSecret():
844
  """Reads the cluster domain secret.
845

846
  """
847
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
848
                               strict=True)
849

    
850

    
851
def _CheckInstanceDown(lu, instance, reason):
852
  """Ensure that an instance is not running."""
853
  if instance.admin_up:
854
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
855
                               (instance.name, reason), errors.ECODE_STATE)
856

    
857
  pnode = instance.primary_node
858
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
859
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
860
              prereq=True, ecode=errors.ECODE_ENVIRON)
861

    
862
  if instance.name in ins_l.payload:
863
    raise errors.OpPrereqError("Instance %s is running, %s" %
864
                               (instance.name, reason), errors.ECODE_STATE)
865

    
866

    
867
def _ExpandItemName(fn, name, kind):
868
  """Expand an item name.
869

870
  @param fn: the function to use for expansion
871
  @param name: requested item name
872
  @param kind: text description ('Node' or 'Instance')
873
  @return: the resolved (full) name
874
  @raise errors.OpPrereqError: if the item is not found
875

876
  """
877
  full_name = fn(name)
878
  if full_name is None:
879
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
880
                               errors.ECODE_NOENT)
881
  return full_name
882

    
883

    
884
def _ExpandNodeName(cfg, name):
885
  """Wrapper over L{_ExpandItemName} for nodes."""
886
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
887

    
888

    
889
def _ExpandInstanceName(cfg, name):
890
  """Wrapper over L{_ExpandItemName} for instance."""
891
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
892

    
893

    
894
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
895
                          memory, vcpus, nics, disk_template, disks,
896
                          bep, hvp, hypervisor_name):
897
  """Builds instance related env variables for hooks
898

899
  This builds the hook environment from individual variables.
900

901
  @type name: string
902
  @param name: the name of the instance
903
  @type primary_node: string
904
  @param primary_node: the name of the instance's primary node
905
  @type secondary_nodes: list
906
  @param secondary_nodes: list of secondary nodes as strings
907
  @type os_type: string
908
  @param os_type: the name of the instance's OS
909
  @type status: boolean
910
  @param status: the should_run status of the instance
911
  @type memory: string
912
  @param memory: the memory size of the instance
913
  @type vcpus: string
914
  @param vcpus: the count of VCPUs the instance has
915
  @type nics: list
916
  @param nics: list of tuples (ip, mac, mode, link) representing
917
      the NICs the instance has
918
  @type disk_template: string
919
  @param disk_template: the disk template of the instance
920
  @type disks: list
921
  @param disks: the list of (size, mode) pairs
922
  @type bep: dict
923
  @param bep: the backend parameters for the instance
924
  @type hvp: dict
925
  @param hvp: the hypervisor parameters for the instance
926
  @type hypervisor_name: string
927
  @param hypervisor_name: the hypervisor for the instance
928
  @rtype: dict
929
  @return: the hook environment for this instance
930

931
  """
932
  if status:
933
    str_status = "up"
934
  else:
935
    str_status = "down"
936
  env = {
937
    "OP_TARGET": name,
938
    "INSTANCE_NAME": name,
939
    "INSTANCE_PRIMARY": primary_node,
940
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
941
    "INSTANCE_OS_TYPE": os_type,
942
    "INSTANCE_STATUS": str_status,
943
    "INSTANCE_MEMORY": memory,
944
    "INSTANCE_VCPUS": vcpus,
945
    "INSTANCE_DISK_TEMPLATE": disk_template,
946
    "INSTANCE_HYPERVISOR": hypervisor_name,
947
  }
948

    
949
  if nics:
950
    nic_count = len(nics)
951
    for idx, (ip, mac, mode, link) in enumerate(nics):
952
      if ip is None:
953
        ip = ""
954
      env["INSTANCE_NIC%d_IP" % idx] = ip
955
      env["INSTANCE_NIC%d_MAC" % idx] = mac
956
      env["INSTANCE_NIC%d_MODE" % idx] = mode
957
      env["INSTANCE_NIC%d_LINK" % idx] = link
958
      if mode == constants.NIC_MODE_BRIDGED:
959
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
960
  else:
961
    nic_count = 0
962

    
963
  env["INSTANCE_NIC_COUNT"] = nic_count
964

    
965
  if disks:
966
    disk_count = len(disks)
967
    for idx, (size, mode) in enumerate(disks):
968
      env["INSTANCE_DISK%d_SIZE" % idx] = size
969
      env["INSTANCE_DISK%d_MODE" % idx] = mode
970
  else:
971
    disk_count = 0
972

    
973
  env["INSTANCE_DISK_COUNT"] = disk_count
974

    
975
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
976
    for key, value in source.items():
977
      env["INSTANCE_%s_%s" % (kind, key)] = value
978

    
979
  return env
980

    
981

    
982
def _NICListToTuple(lu, nics):
983
  """Build a list of nic information tuples.
984

985
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
986
  value in LUQueryInstanceData.
987

988
  @type lu:  L{LogicalUnit}
989
  @param lu: the logical unit on whose behalf we execute
990
  @type nics: list of L{objects.NIC}
991
  @param nics: list of nics to convert to hooks tuples
992

993
  """
994
  hooks_nics = []
995
  cluster = lu.cfg.GetClusterInfo()
996
  for nic in nics:
997
    ip = nic.ip
998
    mac = nic.mac
999
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1000
    mode = filled_params[constants.NIC_MODE]
1001
    link = filled_params[constants.NIC_LINK]
1002
    hooks_nics.append((ip, mac, mode, link))
1003
  return hooks_nics
1004

    
1005

    
1006
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1007
  """Builds instance related env variables for hooks from an object.
1008

1009
  @type lu: L{LogicalUnit}
1010
  @param lu: the logical unit on whose behalf we execute
1011
  @type instance: L{objects.Instance}
1012
  @param instance: the instance for which we should build the
1013
      environment
1014
  @type override: dict
1015
  @param override: dictionary with key/values that will override
1016
      our values
1017
  @rtype: dict
1018
  @return: the hook environment dictionary
1019

1020
  """
1021
  cluster = lu.cfg.GetClusterInfo()
1022
  bep = cluster.FillBE(instance)
1023
  hvp = cluster.FillHV(instance)
1024
  args = {
1025
    'name': instance.name,
1026
    'primary_node': instance.primary_node,
1027
    'secondary_nodes': instance.secondary_nodes,
1028
    'os_type': instance.os,
1029
    'status': instance.admin_up,
1030
    'memory': bep[constants.BE_MEMORY],
1031
    'vcpus': bep[constants.BE_VCPUS],
1032
    'nics': _NICListToTuple(lu, instance.nics),
1033
    'disk_template': instance.disk_template,
1034
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1035
    'bep': bep,
1036
    'hvp': hvp,
1037
    'hypervisor_name': instance.hypervisor,
1038
  }
1039
  if override:
1040
    args.update(override)
1041
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1042

    
1043

    
1044
def _AdjustCandidatePool(lu, exceptions):
1045
  """Adjust the candidate pool after node operations.
1046

1047
  """
1048
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1049
  if mod_list:
1050
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1051
               utils.CommaJoin(node.name for node in mod_list))
1052
    for name in mod_list:
1053
      lu.context.ReaddNode(name)
1054
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1055
  if mc_now > mc_max:
1056
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1057
               (mc_now, mc_max))
1058

    
1059

    
1060
def _DecideSelfPromotion(lu, exceptions=None):
1061
  """Decide whether I should promote myself as a master candidate.
1062

1063
  """
1064
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1065
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1066
  # the new node will increase mc_max with one, so:
1067
  mc_should = min(mc_should + 1, cp_size)
1068
  return mc_now < mc_should
1069

    
1070

    
1071
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1072
  """Check that the brigdes needed by a list of nics exist.
1073

1074
  """
1075
  cluster = lu.cfg.GetClusterInfo()
1076
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1077
  brlist = [params[constants.NIC_LINK] for params in paramslist
1078
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1079
  if brlist:
1080
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1081
    result.Raise("Error checking bridges on destination node '%s'" %
1082
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1083

    
1084

    
1085
def _CheckInstanceBridgesExist(lu, instance, node=None):
1086
  """Check that the brigdes needed by an instance exist.
1087

1088
  """
1089
  if node is None:
1090
    node = instance.primary_node
1091
  _CheckNicsBridgesExist(lu, instance.nics, node)
1092

    
1093

    
1094
def _CheckOSVariant(os_obj, name):
1095
  """Check whether an OS name conforms to the os variants specification.
1096

1097
  @type os_obj: L{objects.OS}
1098
  @param os_obj: OS object to check
1099
  @type name: string
1100
  @param name: OS name passed by the user, to check for validity
1101

1102
  """
1103
  if not os_obj.supported_variants:
1104
    return
1105
  variant = objects.OS.GetVariant(name)
1106
  if not variant:
1107
    raise errors.OpPrereqError("OS name must include a variant",
1108
                               errors.ECODE_INVAL)
1109

    
1110
  if variant not in os_obj.supported_variants:
1111
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1112

    
1113

    
1114
def _GetNodeInstancesInner(cfg, fn):
1115
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1116

    
1117

    
1118
def _GetNodeInstances(cfg, node_name):
1119
  """Returns a list of all primary and secondary instances on a node.
1120

1121
  """
1122

    
1123
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1124

    
1125

    
1126
def _GetNodePrimaryInstances(cfg, node_name):
1127
  """Returns primary instances on a node.
1128

1129
  """
1130
  return _GetNodeInstancesInner(cfg,
1131
                                lambda inst: node_name == inst.primary_node)
1132

    
1133

    
1134
def _GetNodeSecondaryInstances(cfg, node_name):
1135
  """Returns secondary instances on a node.
1136

1137
  """
1138
  return _GetNodeInstancesInner(cfg,
1139
                                lambda inst: node_name in inst.secondary_nodes)
1140

    
1141

    
1142
def _GetStorageTypeArgs(cfg, storage_type):
1143
  """Returns the arguments for a storage type.
1144

1145
  """
1146
  # Special case for file storage
1147
  if storage_type == constants.ST_FILE:
1148
    # storage.FileStorage wants a list of storage directories
1149
    return [[cfg.GetFileStorageDir()]]
1150

    
1151
  return []
1152

    
1153

    
1154
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1155
  faulty = []
1156

    
1157
  for dev in instance.disks:
1158
    cfg.SetDiskID(dev, node_name)
1159

    
1160
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1161
  result.Raise("Failed to get disk status from node %s" % node_name,
1162
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1163

    
1164
  for idx, bdev_status in enumerate(result.payload):
1165
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1166
      faulty.append(idx)
1167

    
1168
  return faulty
1169

    
1170

    
1171
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1172
  """Check the sanity of iallocator and node arguments and use the
1173
  cluster-wide iallocator if appropriate.
1174

1175
  Check that at most one of (iallocator, node) is specified. If none is
1176
  specified, then the LU's opcode's iallocator slot is filled with the
1177
  cluster-wide default iallocator.
1178

1179
  @type iallocator_slot: string
1180
  @param iallocator_slot: the name of the opcode iallocator slot
1181
  @type node_slot: string
1182
  @param node_slot: the name of the opcode target node slot
1183

1184
  """
1185
  node = getattr(lu.op, node_slot, None)
1186
  iallocator = getattr(lu.op, iallocator_slot, None)
1187

    
1188
  if node is not None and iallocator is not None:
1189
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1190
                               errors.ECODE_INVAL)
1191
  elif node is None and iallocator is None:
1192
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1193
    if default_iallocator:
1194
      setattr(lu.op, iallocator_slot, default_iallocator)
1195
    else:
1196
      raise errors.OpPrereqError("No iallocator or node given and no"
1197
                                 " cluster-wide default iallocator found."
1198
                                 " Please specify either an iallocator or a"
1199
                                 " node, or set a cluster-wide default"
1200
                                 " iallocator.")
1201

    
1202

    
1203
class LUPostInitCluster(LogicalUnit):
1204
  """Logical unit for running hooks after cluster initialization.
1205

1206
  """
1207
  HPATH = "cluster-init"
1208
  HTYPE = constants.HTYPE_CLUSTER
1209

    
1210
  def BuildHooksEnv(self):
1211
    """Build hooks env.
1212

1213
    """
1214
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1215
    mn = self.cfg.GetMasterNode()
1216
    return env, [], [mn]
1217

    
1218
  def Exec(self, feedback_fn):
1219
    """Nothing to do.
1220

1221
    """
1222
    return True
1223

    
1224

    
1225
class LUDestroyCluster(LogicalUnit):
1226
  """Logical unit for destroying the cluster.
1227

1228
  """
1229
  HPATH = "cluster-destroy"
1230
  HTYPE = constants.HTYPE_CLUSTER
1231

    
1232
  def BuildHooksEnv(self):
1233
    """Build hooks env.
1234

1235
    """
1236
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1237
    return env, [], []
1238

    
1239
  def CheckPrereq(self):
1240
    """Check prerequisites.
1241

1242
    This checks whether the cluster is empty.
1243

1244
    Any errors are signaled by raising errors.OpPrereqError.
1245

1246
    """
1247
    master = self.cfg.GetMasterNode()
1248

    
1249
    nodelist = self.cfg.GetNodeList()
1250
    if len(nodelist) != 1 or nodelist[0] != master:
1251
      raise errors.OpPrereqError("There are still %d node(s) in"
1252
                                 " this cluster." % (len(nodelist) - 1),
1253
                                 errors.ECODE_INVAL)
1254
    instancelist = self.cfg.GetInstanceList()
1255
    if instancelist:
1256
      raise errors.OpPrereqError("There are still %d instance(s) in"
1257
                                 " this cluster." % len(instancelist),
1258
                                 errors.ECODE_INVAL)
1259

    
1260
  def Exec(self, feedback_fn):
1261
    """Destroys the cluster.
1262

1263
    """
1264
    master = self.cfg.GetMasterNode()
1265
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1266

    
1267
    # Run post hooks on master node before it's removed
1268
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1269
    try:
1270
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1271
    except:
1272
      # pylint: disable-msg=W0702
1273
      self.LogWarning("Errors occurred running hooks on %s" % master)
1274

    
1275
    result = self.rpc.call_node_stop_master(master, False)
1276
    result.Raise("Could not disable the master role")
1277

    
1278
    if modify_ssh_setup:
1279
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1280
      utils.CreateBackup(priv_key)
1281
      utils.CreateBackup(pub_key)
1282

    
1283
    return master
1284

    
1285

    
1286
def _VerifyCertificate(filename):
1287
  """Verifies a certificate for LUVerifyCluster.
1288

1289
  @type filename: string
1290
  @param filename: Path to PEM file
1291

1292
  """
1293
  try:
1294
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1295
                                           utils.ReadFile(filename))
1296
  except Exception, err: # pylint: disable-msg=W0703
1297
    return (LUVerifyCluster.ETYPE_ERROR,
1298
            "Failed to load X509 certificate %s: %s" % (filename, err))
1299

    
1300
  (errcode, msg) = \
1301
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1302
                                constants.SSL_CERT_EXPIRATION_ERROR)
1303

    
1304
  if msg:
1305
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1306
  else:
1307
    fnamemsg = None
1308

    
1309
  if errcode is None:
1310
    return (None, fnamemsg)
1311
  elif errcode == utils.CERT_WARNING:
1312
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1313
  elif errcode == utils.CERT_ERROR:
1314
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1315

    
1316
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1317

    
1318

    
1319
class LUVerifyCluster(LogicalUnit):
1320
  """Verifies the cluster status.
1321

1322
  """
1323
  HPATH = "cluster-verify"
1324
  HTYPE = constants.HTYPE_CLUSTER
1325
  _OP_PARAMS = [
1326
    ("skip_checks", _EmptyList,
1327
     _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1328
    ("verbose", False, _TBool),
1329
    ("error_codes", False, _TBool),
1330
    ("debug_simulate_errors", False, _TBool),
1331
    ]
1332
  REQ_BGL = False
1333

    
1334
  TCLUSTER = "cluster"
1335
  TNODE = "node"
1336
  TINSTANCE = "instance"
1337

    
1338
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1339
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1340
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1341
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1342
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1343
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1344
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1345
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1346
  ENODEDRBD = (TNODE, "ENODEDRBD")
1347
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1348
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1349
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1350
  ENODEHV = (TNODE, "ENODEHV")
1351
  ENODELVM = (TNODE, "ENODELVM")
1352
  ENODEN1 = (TNODE, "ENODEN1")
1353
  ENODENET = (TNODE, "ENODENET")
1354
  ENODEOS = (TNODE, "ENODEOS")
1355
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1356
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1357
  ENODERPC = (TNODE, "ENODERPC")
1358
  ENODESSH = (TNODE, "ENODESSH")
1359
  ENODEVERSION = (TNODE, "ENODEVERSION")
1360
  ENODESETUP = (TNODE, "ENODESETUP")
1361
  ENODETIME = (TNODE, "ENODETIME")
1362

    
1363
  ETYPE_FIELD = "code"
1364
  ETYPE_ERROR = "ERROR"
1365
  ETYPE_WARNING = "WARNING"
1366

    
1367
  class NodeImage(object):
1368
    """A class representing the logical and physical status of a node.
1369

1370
    @type name: string
1371
    @ivar name: the node name to which this object refers
1372
    @ivar volumes: a structure as returned from
1373
        L{ganeti.backend.GetVolumeList} (runtime)
1374
    @ivar instances: a list of running instances (runtime)
1375
    @ivar pinst: list of configured primary instances (config)
1376
    @ivar sinst: list of configured secondary instances (config)
1377
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1378
        of this node (config)
1379
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1380
    @ivar dfree: free disk, as reported by the node (runtime)
1381
    @ivar offline: the offline status (config)
1382
    @type rpc_fail: boolean
1383
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1384
        not whether the individual keys were correct) (runtime)
1385
    @type lvm_fail: boolean
1386
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1387
    @type hyp_fail: boolean
1388
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1389
    @type ghost: boolean
1390
    @ivar ghost: whether this is a known node or not (config)
1391
    @type os_fail: boolean
1392
    @ivar os_fail: whether the RPC call didn't return valid OS data
1393
    @type oslist: list
1394
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1395

1396
    """
1397
    def __init__(self, offline=False, name=None):
1398
      self.name = name
1399
      self.volumes = {}
1400
      self.instances = []
1401
      self.pinst = []
1402
      self.sinst = []
1403
      self.sbp = {}
1404
      self.mfree = 0
1405
      self.dfree = 0
1406
      self.offline = offline
1407
      self.rpc_fail = False
1408
      self.lvm_fail = False
1409
      self.hyp_fail = False
1410
      self.ghost = False
1411
      self.os_fail = False
1412
      self.oslist = {}
1413

    
1414
  def ExpandNames(self):
1415
    self.needed_locks = {
1416
      locking.LEVEL_NODE: locking.ALL_SET,
1417
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1418
    }
1419
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1420

    
1421
  def _Error(self, ecode, item, msg, *args, **kwargs):
1422
    """Format an error message.
1423

1424
    Based on the opcode's error_codes parameter, either format a
1425
    parseable error code, or a simpler error string.
1426

1427
    This must be called only from Exec and functions called from Exec.
1428

1429
    """
1430
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1431
    itype, etxt = ecode
1432
    # first complete the msg
1433
    if args:
1434
      msg = msg % args
1435
    # then format the whole message
1436
    if self.op.error_codes:
1437
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1438
    else:
1439
      if item:
1440
        item = " " + item
1441
      else:
1442
        item = ""
1443
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1444
    # and finally report it via the feedback_fn
1445
    self._feedback_fn("  - %s" % msg)
1446

    
1447
  def _ErrorIf(self, cond, *args, **kwargs):
1448
    """Log an error message if the passed condition is True.
1449

1450
    """
1451
    cond = bool(cond) or self.op.debug_simulate_errors
1452
    if cond:
1453
      self._Error(*args, **kwargs)
1454
    # do not mark the operation as failed for WARN cases only
1455
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1456
      self.bad = self.bad or cond
1457

    
1458
  def _VerifyNode(self, ninfo, nresult):
1459
    """Perform some basic validation on data returned from a node.
1460

1461
      - check the result data structure is well formed and has all the
1462
        mandatory fields
1463
      - check ganeti version
1464

1465
    @type ninfo: L{objects.Node}
1466
    @param ninfo: the node to check
1467
    @param nresult: the results from the node
1468
    @rtype: boolean
1469
    @return: whether overall this call was successful (and we can expect
1470
         reasonable values in the respose)
1471

1472
    """
1473
    node = ninfo.name
1474
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1475

    
1476
    # main result, nresult should be a non-empty dict
1477
    test = not nresult or not isinstance(nresult, dict)
1478
    _ErrorIf(test, self.ENODERPC, node,
1479
                  "unable to verify node: no data returned")
1480
    if test:
1481
      return False
1482

    
1483
    # compares ganeti version
1484
    local_version = constants.PROTOCOL_VERSION
1485
    remote_version = nresult.get("version", None)
1486
    test = not (remote_version and
1487
                isinstance(remote_version, (list, tuple)) and
1488
                len(remote_version) == 2)
1489
    _ErrorIf(test, self.ENODERPC, node,
1490
             "connection to node returned invalid data")
1491
    if test:
1492
      return False
1493

    
1494
    test = local_version != remote_version[0]
1495
    _ErrorIf(test, self.ENODEVERSION, node,
1496
             "incompatible protocol versions: master %s,"
1497
             " node %s", local_version, remote_version[0])
1498
    if test:
1499
      return False
1500

    
1501
    # node seems compatible, we can actually try to look into its results
1502

    
1503
    # full package version
1504
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1505
                  self.ENODEVERSION, node,
1506
                  "software version mismatch: master %s, node %s",
1507
                  constants.RELEASE_VERSION, remote_version[1],
1508
                  code=self.ETYPE_WARNING)
1509

    
1510
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1511
    if isinstance(hyp_result, dict):
1512
      for hv_name, hv_result in hyp_result.iteritems():
1513
        test = hv_result is not None
1514
        _ErrorIf(test, self.ENODEHV, node,
1515
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1516

    
1517

    
1518
    test = nresult.get(constants.NV_NODESETUP,
1519
                           ["Missing NODESETUP results"])
1520
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1521
             "; ".join(test))
1522

    
1523
    return True
1524

    
1525
  def _VerifyNodeTime(self, ninfo, nresult,
1526
                      nvinfo_starttime, nvinfo_endtime):
1527
    """Check the node time.
1528

1529
    @type ninfo: L{objects.Node}
1530
    @param ninfo: the node to check
1531
    @param nresult: the remote results for the node
1532
    @param nvinfo_starttime: the start time of the RPC call
1533
    @param nvinfo_endtime: the end time of the RPC call
1534

1535
    """
1536
    node = ninfo.name
1537
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1538

    
1539
    ntime = nresult.get(constants.NV_TIME, None)
1540
    try:
1541
      ntime_merged = utils.MergeTime(ntime)
1542
    except (ValueError, TypeError):
1543
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1544
      return
1545

    
1546
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1547
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1548
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1549
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1550
    else:
1551
      ntime_diff = None
1552

    
1553
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1554
             "Node time diverges by at least %s from master node time",
1555
             ntime_diff)
1556

    
1557
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1558
    """Check the node time.
1559

1560
    @type ninfo: L{objects.Node}
1561
    @param ninfo: the node to check
1562
    @param nresult: the remote results for the node
1563
    @param vg_name: the configured VG name
1564

1565
    """
1566
    if vg_name is None:
1567
      return
1568

    
1569
    node = ninfo.name
1570
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1571

    
1572
    # checks vg existence and size > 20G
1573
    vglist = nresult.get(constants.NV_VGLIST, None)
1574
    test = not vglist
1575
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1576
    if not test:
1577
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1578
                                            constants.MIN_VG_SIZE)
1579
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1580

    
1581
    # check pv names
1582
    pvlist = nresult.get(constants.NV_PVLIST, None)
1583
    test = pvlist is None
1584
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1585
    if not test:
1586
      # check that ':' is not present in PV names, since it's a
1587
      # special character for lvcreate (denotes the range of PEs to
1588
      # use on the PV)
1589
      for _, pvname, owner_vg in pvlist:
1590
        test = ":" in pvname
1591
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1592
                 " '%s' of VG '%s'", pvname, owner_vg)
1593

    
1594
  def _VerifyNodeNetwork(self, ninfo, nresult):
1595
    """Check the node time.
1596

1597
    @type ninfo: L{objects.Node}
1598
    @param ninfo: the node to check
1599
    @param nresult: the remote results for the node
1600

1601
    """
1602
    node = ninfo.name
1603
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1604

    
1605
    test = constants.NV_NODELIST not in nresult
1606
    _ErrorIf(test, self.ENODESSH, node,
1607
             "node hasn't returned node ssh connectivity data")
1608
    if not test:
1609
      if nresult[constants.NV_NODELIST]:
1610
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1611
          _ErrorIf(True, self.ENODESSH, node,
1612
                   "ssh communication with node '%s': %s", a_node, a_msg)
1613

    
1614
    test = constants.NV_NODENETTEST not in nresult
1615
    _ErrorIf(test, self.ENODENET, node,
1616
             "node hasn't returned node tcp connectivity data")
1617
    if not test:
1618
      if nresult[constants.NV_NODENETTEST]:
1619
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1620
        for anode in nlist:
1621
          _ErrorIf(True, self.ENODENET, node,
1622
                   "tcp communication with node '%s': %s",
1623
                   anode, nresult[constants.NV_NODENETTEST][anode])
1624

    
1625
    test = constants.NV_MASTERIP not in nresult
1626
    _ErrorIf(test, self.ENODENET, node,
1627
             "node hasn't returned node master IP reachability data")
1628
    if not test:
1629
      if not nresult[constants.NV_MASTERIP]:
1630
        if node == self.master_node:
1631
          msg = "the master node cannot reach the master IP (not configured?)"
1632
        else:
1633
          msg = "cannot reach the master IP"
1634
        _ErrorIf(True, self.ENODENET, node, msg)
1635

    
1636

    
1637
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1638
    """Verify an instance.
1639

1640
    This function checks to see if the required block devices are
1641
    available on the instance's node.
1642

1643
    """
1644
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1645
    node_current = instanceconfig.primary_node
1646

    
1647
    node_vol_should = {}
1648
    instanceconfig.MapLVsByNode(node_vol_should)
1649

    
1650
    for node in node_vol_should:
1651
      n_img = node_image[node]
1652
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1653
        # ignore missing volumes on offline or broken nodes
1654
        continue
1655
      for volume in node_vol_should[node]:
1656
        test = volume not in n_img.volumes
1657
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1658
                 "volume %s missing on node %s", volume, node)
1659

    
1660
    if instanceconfig.admin_up:
1661
      pri_img = node_image[node_current]
1662
      test = instance not in pri_img.instances and not pri_img.offline
1663
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1664
               "instance not running on its primary node %s",
1665
               node_current)
1666

    
1667
    for node, n_img in node_image.items():
1668
      if (not node == node_current):
1669
        test = instance in n_img.instances
1670
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1671
                 "instance should not run on node %s", node)
1672

    
1673
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1674
    """Verify if there are any unknown volumes in the cluster.
1675

1676
    The .os, .swap and backup volumes are ignored. All other volumes are
1677
    reported as unknown.
1678

1679
    @type reserved: L{ganeti.utils.FieldSet}
1680
    @param reserved: a FieldSet of reserved volume names
1681

1682
    """
1683
    for node, n_img in node_image.items():
1684
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1685
        # skip non-healthy nodes
1686
        continue
1687
      for volume in n_img.volumes:
1688
        test = ((node not in node_vol_should or
1689
                volume not in node_vol_should[node]) and
1690
                not reserved.Matches(volume))
1691
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1692
                      "volume %s is unknown", volume)
1693

    
1694
  def _VerifyOrphanInstances(self, instancelist, node_image):
1695
    """Verify the list of running instances.
1696

1697
    This checks what instances are running but unknown to the cluster.
1698

1699
    """
1700
    for node, n_img in node_image.items():
1701
      for o_inst in n_img.instances:
1702
        test = o_inst not in instancelist
1703
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1704
                      "instance %s on node %s should not exist", o_inst, node)
1705

    
1706
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1707
    """Verify N+1 Memory Resilience.
1708

1709
    Check that if one single node dies we can still start all the
1710
    instances it was primary for.
1711

1712
    """
1713
    for node, n_img in node_image.items():
1714
      # This code checks that every node which is now listed as
1715
      # secondary has enough memory to host all instances it is
1716
      # supposed to should a single other node in the cluster fail.
1717
      # FIXME: not ready for failover to an arbitrary node
1718
      # FIXME: does not support file-backed instances
1719
      # WARNING: we currently take into account down instances as well
1720
      # as up ones, considering that even if they're down someone
1721
      # might want to start them even in the event of a node failure.
1722
      for prinode, instances in n_img.sbp.items():
1723
        needed_mem = 0
1724
        for instance in instances:
1725
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1726
          if bep[constants.BE_AUTO_BALANCE]:
1727
            needed_mem += bep[constants.BE_MEMORY]
1728
        test = n_img.mfree < needed_mem
1729
        self._ErrorIf(test, self.ENODEN1, node,
1730
                      "not enough memory on to accommodate"
1731
                      " failovers should peer node %s fail", prinode)
1732

    
1733
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1734
                       master_files):
1735
    """Verifies and computes the node required file checksums.
1736

1737
    @type ninfo: L{objects.Node}
1738
    @param ninfo: the node to check
1739
    @param nresult: the remote results for the node
1740
    @param file_list: required list of files
1741
    @param local_cksum: dictionary of local files and their checksums
1742
    @param master_files: list of files that only masters should have
1743

1744
    """
1745
    node = ninfo.name
1746
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1747

    
1748
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1749
    test = not isinstance(remote_cksum, dict)
1750
    _ErrorIf(test, self.ENODEFILECHECK, node,
1751
             "node hasn't returned file checksum data")
1752
    if test:
1753
      return
1754

    
1755
    for file_name in file_list:
1756
      node_is_mc = ninfo.master_candidate
1757
      must_have = (file_name not in master_files) or node_is_mc
1758
      # missing
1759
      test1 = file_name not in remote_cksum
1760
      # invalid checksum
1761
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1762
      # existing and good
1763
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1764
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1765
               "file '%s' missing", file_name)
1766
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1767
               "file '%s' has wrong checksum", file_name)
1768
      # not candidate and this is not a must-have file
1769
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1770
               "file '%s' should not exist on non master"
1771
               " candidates (and the file is outdated)", file_name)
1772
      # all good, except non-master/non-must have combination
1773
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1774
               "file '%s' should not exist"
1775
               " on non master candidates", file_name)
1776

    
1777
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1778
                      drbd_map):
1779
    """Verifies and the node DRBD status.
1780

1781
    @type ninfo: L{objects.Node}
1782
    @param ninfo: the node to check
1783
    @param nresult: the remote results for the node
1784
    @param instanceinfo: the dict of instances
1785
    @param drbd_helper: the configured DRBD usermode helper
1786
    @param drbd_map: the DRBD map as returned by
1787
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1788

1789
    """
1790
    node = ninfo.name
1791
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1792

    
1793
    if drbd_helper:
1794
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1795
      test = (helper_result == None)
1796
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1797
               "no drbd usermode helper returned")
1798
      if helper_result:
1799
        status, payload = helper_result
1800
        test = not status
1801
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1802
                 "drbd usermode helper check unsuccessful: %s", payload)
1803
        test = status and (payload != drbd_helper)
1804
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1805
                 "wrong drbd usermode helper: %s", payload)
1806

    
1807
    # compute the DRBD minors
1808
    node_drbd = {}
1809
    for minor, instance in drbd_map[node].items():
1810
      test = instance not in instanceinfo
1811
      _ErrorIf(test, self.ECLUSTERCFG, None,
1812
               "ghost instance '%s' in temporary DRBD map", instance)
1813
        # ghost instance should not be running, but otherwise we
1814
        # don't give double warnings (both ghost instance and
1815
        # unallocated minor in use)
1816
      if test:
1817
        node_drbd[minor] = (instance, False)
1818
      else:
1819
        instance = instanceinfo[instance]
1820
        node_drbd[minor] = (instance.name, instance.admin_up)
1821

    
1822
    # and now check them
1823
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1824
    test = not isinstance(used_minors, (tuple, list))
1825
    _ErrorIf(test, self.ENODEDRBD, node,
1826
             "cannot parse drbd status file: %s", str(used_minors))
1827
    if test:
1828
      # we cannot check drbd status
1829
      return
1830

    
1831
    for minor, (iname, must_exist) in node_drbd.items():
1832
      test = minor not in used_minors and must_exist
1833
      _ErrorIf(test, self.ENODEDRBD, node,
1834
               "drbd minor %d of instance %s is not active", minor, iname)
1835
    for minor in used_minors:
1836
      test = minor not in node_drbd
1837
      _ErrorIf(test, self.ENODEDRBD, node,
1838
               "unallocated drbd minor %d is in use", minor)
1839

    
1840
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1841
    """Builds the node OS structures.
1842

1843
    @type ninfo: L{objects.Node}
1844
    @param ninfo: the node to check
1845
    @param nresult: the remote results for the node
1846
    @param nimg: the node image object
1847

1848
    """
1849
    node = ninfo.name
1850
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1851

    
1852
    remote_os = nresult.get(constants.NV_OSLIST, None)
1853
    test = (not isinstance(remote_os, list) or
1854
            not compat.all(isinstance(v, list) and len(v) == 7
1855
                           for v in remote_os))
1856

    
1857
    _ErrorIf(test, self.ENODEOS, node,
1858
             "node hasn't returned valid OS data")
1859

    
1860
    nimg.os_fail = test
1861

    
1862
    if test:
1863
      return
1864

    
1865
    os_dict = {}
1866

    
1867
    for (name, os_path, status, diagnose,
1868
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1869

    
1870
      if name not in os_dict:
1871
        os_dict[name] = []
1872

    
1873
      # parameters is a list of lists instead of list of tuples due to
1874
      # JSON lacking a real tuple type, fix it:
1875
      parameters = [tuple(v) for v in parameters]
1876
      os_dict[name].append((os_path, status, diagnose,
1877
                            set(variants), set(parameters), set(api_ver)))
1878

    
1879
    nimg.oslist = os_dict
1880

    
1881
  def _VerifyNodeOS(self, ninfo, nimg, base):
1882
    """Verifies the node OS list.
1883

1884
    @type ninfo: L{objects.Node}
1885
    @param ninfo: the node to check
1886
    @param nimg: the node image object
1887
    @param base: the 'template' node we match against (e.g. from the master)
1888

1889
    """
1890
    node = ninfo.name
1891
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1892

    
1893
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1894

    
1895
    for os_name, os_data in nimg.oslist.items():
1896
      assert os_data, "Empty OS status for OS %s?!" % os_name
1897
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1898
      _ErrorIf(not f_status, self.ENODEOS, node,
1899
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1900
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1901
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1902
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1903
      # this will catched in backend too
1904
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1905
               and not f_var, self.ENODEOS, node,
1906
               "OS %s with API at least %d does not declare any variant",
1907
               os_name, constants.OS_API_V15)
1908
      # comparisons with the 'base' image
1909
      test = os_name not in base.oslist
1910
      _ErrorIf(test, self.ENODEOS, node,
1911
               "Extra OS %s not present on reference node (%s)",
1912
               os_name, base.name)
1913
      if test:
1914
        continue
1915
      assert base.oslist[os_name], "Base node has empty OS status?"
1916
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1917
      if not b_status:
1918
        # base OS is invalid, skipping
1919
        continue
1920
      for kind, a, b in [("API version", f_api, b_api),
1921
                         ("variants list", f_var, b_var),
1922
                         ("parameters", f_param, b_param)]:
1923
        _ErrorIf(a != b, self.ENODEOS, node,
1924
                 "OS %s %s differs from reference node %s: %s vs. %s",
1925
                 kind, os_name, base.name,
1926
                 utils.CommaJoin(a), utils.CommaJoin(b))
1927

    
1928
    # check any missing OSes
1929
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1930
    _ErrorIf(missing, self.ENODEOS, node,
1931
             "OSes present on reference node %s but missing on this node: %s",
1932
             base.name, utils.CommaJoin(missing))
1933

    
1934
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1935
    """Verifies and updates the node volume data.
1936

1937
    This function will update a L{NodeImage}'s internal structures
1938
    with data from the remote call.
1939

1940
    @type ninfo: L{objects.Node}
1941
    @param ninfo: the node to check
1942
    @param nresult: the remote results for the node
1943
    @param nimg: the node image object
1944
    @param vg_name: the configured VG name
1945

1946
    """
1947
    node = ninfo.name
1948
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1949

    
1950
    nimg.lvm_fail = True
1951
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1952
    if vg_name is None:
1953
      pass
1954
    elif isinstance(lvdata, basestring):
1955
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1956
               utils.SafeEncode(lvdata))
1957
    elif not isinstance(lvdata, dict):
1958
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1959
    else:
1960
      nimg.volumes = lvdata
1961
      nimg.lvm_fail = False
1962

    
1963
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1964
    """Verifies and updates the node instance list.
1965

1966
    If the listing was successful, then updates this node's instance
1967
    list. Otherwise, it marks the RPC call as failed for the instance
1968
    list key.
1969

1970
    @type ninfo: L{objects.Node}
1971
    @param ninfo: the node to check
1972
    @param nresult: the remote results for the node
1973
    @param nimg: the node image object
1974

1975
    """
1976
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1977
    test = not isinstance(idata, list)
1978
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1979
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1980
    if test:
1981
      nimg.hyp_fail = True
1982
    else:
1983
      nimg.instances = idata
1984

    
1985
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1986
    """Verifies and computes a node information map
1987

1988
    @type ninfo: L{objects.Node}
1989
    @param ninfo: the node to check
1990
    @param nresult: the remote results for the node
1991
    @param nimg: the node image object
1992
    @param vg_name: the configured VG name
1993

1994
    """
1995
    node = ninfo.name
1996
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1997

    
1998
    # try to read free memory (from the hypervisor)
1999
    hv_info = nresult.get(constants.NV_HVINFO, None)
2000
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2001
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2002
    if not test:
2003
      try:
2004
        nimg.mfree = int(hv_info["memory_free"])
2005
      except (ValueError, TypeError):
2006
        _ErrorIf(True, self.ENODERPC, node,
2007
                 "node returned invalid nodeinfo, check hypervisor")
2008

    
2009
    # FIXME: devise a free space model for file based instances as well
2010
    if vg_name is not None:
2011
      test = (constants.NV_VGLIST not in nresult or
2012
              vg_name not in nresult[constants.NV_VGLIST])
2013
      _ErrorIf(test, self.ENODELVM, node,
2014
               "node didn't return data for the volume group '%s'"
2015
               " - it is either missing or broken", vg_name)
2016
      if not test:
2017
        try:
2018
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2019
        except (ValueError, TypeError):
2020
          _ErrorIf(True, self.ENODERPC, node,
2021
                   "node returned invalid LVM info, check LVM status")
2022

    
2023
  def BuildHooksEnv(self):
2024
    """Build hooks env.
2025

2026
    Cluster-Verify hooks just ran in the post phase and their failure makes
2027
    the output be logged in the verify output and the verification to fail.
2028

2029
    """
2030
    all_nodes = self.cfg.GetNodeList()
2031
    env = {
2032
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2033
      }
2034
    for node in self.cfg.GetAllNodesInfo().values():
2035
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2036

    
2037
    return env, [], all_nodes
2038

    
2039
  def Exec(self, feedback_fn):
2040
    """Verify integrity of cluster, performing various test on nodes.
2041

2042
    """
2043
    self.bad = False
2044
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2045
    verbose = self.op.verbose
2046
    self._feedback_fn = feedback_fn
2047
    feedback_fn("* Verifying global settings")
2048
    for msg in self.cfg.VerifyConfig():
2049
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2050

    
2051
    # Check the cluster certificates
2052
    for cert_filename in constants.ALL_CERT_FILES:
2053
      (errcode, msg) = _VerifyCertificate(cert_filename)
2054
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2055

    
2056
    vg_name = self.cfg.GetVGName()
2057
    drbd_helper = self.cfg.GetDRBDHelper()
2058
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2059
    cluster = self.cfg.GetClusterInfo()
2060
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2061
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2062
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2063
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2064
                        for iname in instancelist)
2065
    i_non_redundant = [] # Non redundant instances
2066
    i_non_a_balanced = [] # Non auto-balanced instances
2067
    n_offline = 0 # Count of offline nodes
2068
    n_drained = 0 # Count of nodes being drained
2069
    node_vol_should = {}
2070

    
2071
    # FIXME: verify OS list
2072
    # do local checksums
2073
    master_files = [constants.CLUSTER_CONF_FILE]
2074
    master_node = self.master_node = self.cfg.GetMasterNode()
2075
    master_ip = self.cfg.GetMasterIP()
2076

    
2077
    file_names = ssconf.SimpleStore().GetFileList()
2078
    file_names.extend(constants.ALL_CERT_FILES)
2079
    file_names.extend(master_files)
2080
    if cluster.modify_etc_hosts:
2081
      file_names.append(constants.ETC_HOSTS)
2082

    
2083
    local_checksums = utils.FingerprintFiles(file_names)
2084

    
2085
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2086
    node_verify_param = {
2087
      constants.NV_FILELIST: file_names,
2088
      constants.NV_NODELIST: [node.name for node in nodeinfo
2089
                              if not node.offline],
2090
      constants.NV_HYPERVISOR: hypervisors,
2091
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2092
                                  node.secondary_ip) for node in nodeinfo
2093
                                 if not node.offline],
2094
      constants.NV_INSTANCELIST: hypervisors,
2095
      constants.NV_VERSION: None,
2096
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2097
      constants.NV_NODESETUP: None,
2098
      constants.NV_TIME: None,
2099
      constants.NV_MASTERIP: (master_node, master_ip),
2100
      constants.NV_OSLIST: None,
2101
      }
2102

    
2103
    if vg_name is not None:
2104
      node_verify_param[constants.NV_VGLIST] = None
2105
      node_verify_param[constants.NV_LVLIST] = vg_name
2106
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2107
      node_verify_param[constants.NV_DRBDLIST] = None
2108

    
2109
    if drbd_helper:
2110
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2111

    
2112
    # Build our expected cluster state
2113
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2114
                                                 name=node.name))
2115
                      for node in nodeinfo)
2116

    
2117
    for instance in instancelist:
2118
      inst_config = instanceinfo[instance]
2119

    
2120
      for nname in inst_config.all_nodes:
2121
        if nname not in node_image:
2122
          # ghost node
2123
          gnode = self.NodeImage(name=nname)
2124
          gnode.ghost = True
2125
          node_image[nname] = gnode
2126

    
2127
      inst_config.MapLVsByNode(node_vol_should)
2128

    
2129
      pnode = inst_config.primary_node
2130
      node_image[pnode].pinst.append(instance)
2131

    
2132
      for snode in inst_config.secondary_nodes:
2133
        nimg = node_image[snode]
2134
        nimg.sinst.append(instance)
2135
        if pnode not in nimg.sbp:
2136
          nimg.sbp[pnode] = []
2137
        nimg.sbp[pnode].append(instance)
2138

    
2139
    # At this point, we have the in-memory data structures complete,
2140
    # except for the runtime information, which we'll gather next
2141

    
2142
    # Due to the way our RPC system works, exact response times cannot be
2143
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2144
    # time before and after executing the request, we can at least have a time
2145
    # window.
2146
    nvinfo_starttime = time.time()
2147
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2148
                                           self.cfg.GetClusterName())
2149
    nvinfo_endtime = time.time()
2150

    
2151
    all_drbd_map = self.cfg.ComputeDRBDMap()
2152

    
2153
    feedback_fn("* Verifying node status")
2154

    
2155
    refos_img = None
2156

    
2157
    for node_i in nodeinfo:
2158
      node = node_i.name
2159
      nimg = node_image[node]
2160

    
2161
      if node_i.offline:
2162
        if verbose:
2163
          feedback_fn("* Skipping offline node %s" % (node,))
2164
        n_offline += 1
2165
        continue
2166

    
2167
      if node == master_node:
2168
        ntype = "master"
2169
      elif node_i.master_candidate:
2170
        ntype = "master candidate"
2171
      elif node_i.drained:
2172
        ntype = "drained"
2173
        n_drained += 1
2174
      else:
2175
        ntype = "regular"
2176
      if verbose:
2177
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2178

    
2179
      msg = all_nvinfo[node].fail_msg
2180
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2181
      if msg:
2182
        nimg.rpc_fail = True
2183
        continue
2184

    
2185
      nresult = all_nvinfo[node].payload
2186

    
2187
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2188
      self._VerifyNodeNetwork(node_i, nresult)
2189
      self._VerifyNodeLVM(node_i, nresult, vg_name)
2190
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2191
                            master_files)
2192
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2193
                           all_drbd_map)
2194
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2195

    
2196
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2197
      self._UpdateNodeInstances(node_i, nresult, nimg)
2198
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2199
      self._UpdateNodeOS(node_i, nresult, nimg)
2200
      if not nimg.os_fail:
2201
        if refos_img is None:
2202
          refos_img = nimg
2203
        self._VerifyNodeOS(node_i, nimg, refos_img)
2204

    
2205
    feedback_fn("* Verifying instance status")
2206
    for instance in instancelist:
2207
      if verbose:
2208
        feedback_fn("* Verifying instance %s" % instance)
2209
      inst_config = instanceinfo[instance]
2210
      self._VerifyInstance(instance, inst_config, node_image)
2211
      inst_nodes_offline = []
2212

    
2213
      pnode = inst_config.primary_node
2214
      pnode_img = node_image[pnode]
2215
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2216
               self.ENODERPC, pnode, "instance %s, connection to"
2217
               " primary node failed", instance)
2218

    
2219
      if pnode_img.offline:
2220
        inst_nodes_offline.append(pnode)
2221

    
2222
      # If the instance is non-redundant we cannot survive losing its primary
2223
      # node, so we are not N+1 compliant. On the other hand we have no disk
2224
      # templates with more than one secondary so that situation is not well
2225
      # supported either.
2226
      # FIXME: does not support file-backed instances
2227
      if not inst_config.secondary_nodes:
2228
        i_non_redundant.append(instance)
2229
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2230
               instance, "instance has multiple secondary nodes: %s",
2231
               utils.CommaJoin(inst_config.secondary_nodes),
2232
               code=self.ETYPE_WARNING)
2233

    
2234
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2235
        i_non_a_balanced.append(instance)
2236

    
2237
      for snode in inst_config.secondary_nodes:
2238
        s_img = node_image[snode]
2239
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2240
                 "instance %s, connection to secondary node failed", instance)
2241

    
2242
        if s_img.offline:
2243
          inst_nodes_offline.append(snode)
2244

    
2245
      # warn that the instance lives on offline nodes
2246
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2247
               "instance lives on offline node(s) %s",
2248
               utils.CommaJoin(inst_nodes_offline))
2249
      # ... or ghost nodes
2250
      for node in inst_config.all_nodes:
2251
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2252
                 "instance lives on ghost node %s", node)
2253

    
2254
    feedback_fn("* Verifying orphan volumes")
2255
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2256
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2257

    
2258
    feedback_fn("* Verifying orphan instances")
2259
    self._VerifyOrphanInstances(instancelist, node_image)
2260

    
2261
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2262
      feedback_fn("* Verifying N+1 Memory redundancy")
2263
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2264

    
2265
    feedback_fn("* Other Notes")
2266
    if i_non_redundant:
2267
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2268
                  % len(i_non_redundant))
2269

    
2270
    if i_non_a_balanced:
2271
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2272
                  % len(i_non_a_balanced))
2273

    
2274
    if n_offline:
2275
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2276

    
2277
    if n_drained:
2278
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2279

    
2280
    return not self.bad
2281

    
2282
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2283
    """Analyze the post-hooks' result
2284

2285
    This method analyses the hook result, handles it, and sends some
2286
    nicely-formatted feedback back to the user.
2287

2288
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2289
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2290
    @param hooks_results: the results of the multi-node hooks rpc call
2291
    @param feedback_fn: function used send feedback back to the caller
2292
    @param lu_result: previous Exec result
2293
    @return: the new Exec result, based on the previous result
2294
        and hook results
2295

2296
    """
2297
    # We only really run POST phase hooks, and are only interested in
2298
    # their results
2299
    if phase == constants.HOOKS_PHASE_POST:
2300
      # Used to change hooks' output to proper indentation
2301
      indent_re = re.compile('^', re.M)
2302
      feedback_fn("* Hooks Results")
2303
      assert hooks_results, "invalid result from hooks"
2304

    
2305
      for node_name in hooks_results:
2306
        res = hooks_results[node_name]
2307
        msg = res.fail_msg
2308
        test = msg and not res.offline
2309
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2310
                      "Communication failure in hooks execution: %s", msg)
2311
        if res.offline or msg:
2312
          # No need to investigate payload if node is offline or gave an error.
2313
          # override manually lu_result here as _ErrorIf only
2314
          # overrides self.bad
2315
          lu_result = 1
2316
          continue
2317
        for script, hkr, output in res.payload:
2318
          test = hkr == constants.HKR_FAIL
2319
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2320
                        "Script %s failed, output:", script)
2321
          if test:
2322
            output = indent_re.sub('      ', output)
2323
            feedback_fn("%s" % output)
2324
            lu_result = 0
2325

    
2326
      return lu_result
2327

    
2328

    
2329
class LUVerifyDisks(NoHooksLU):
2330
  """Verifies the cluster disks status.
2331

2332
  """
2333
  REQ_BGL = False
2334

    
2335
  def ExpandNames(self):
2336
    self.needed_locks = {
2337
      locking.LEVEL_NODE: locking.ALL_SET,
2338
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2339
    }
2340
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2341

    
2342
  def Exec(self, feedback_fn):
2343
    """Verify integrity of cluster disks.
2344

2345
    @rtype: tuple of three items
2346
    @return: a tuple of (dict of node-to-node_error, list of instances
2347
        which need activate-disks, dict of instance: (node, volume) for
2348
        missing volumes
2349

2350
    """
2351
    result = res_nodes, res_instances, res_missing = {}, [], {}
2352

    
2353
    vg_name = self.cfg.GetVGName()
2354
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2355
    instances = [self.cfg.GetInstanceInfo(name)
2356
                 for name in self.cfg.GetInstanceList()]
2357

    
2358
    nv_dict = {}
2359
    for inst in instances:
2360
      inst_lvs = {}
2361
      if (not inst.admin_up or
2362
          inst.disk_template not in constants.DTS_NET_MIRROR):
2363
        continue
2364
      inst.MapLVsByNode(inst_lvs)
2365
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2366
      for node, vol_list in inst_lvs.iteritems():
2367
        for vol in vol_list:
2368
          nv_dict[(node, vol)] = inst
2369

    
2370
    if not nv_dict:
2371
      return result
2372

    
2373
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2374

    
2375
    for node in nodes:
2376
      # node_volume
2377
      node_res = node_lvs[node]
2378
      if node_res.offline:
2379
        continue
2380
      msg = node_res.fail_msg
2381
      if msg:
2382
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2383
        res_nodes[node] = msg
2384
        continue
2385

    
2386
      lvs = node_res.payload
2387
      for lv_name, (_, _, lv_online) in lvs.items():
2388
        inst = nv_dict.pop((node, lv_name), None)
2389
        if (not lv_online and inst is not None
2390
            and inst.name not in res_instances):
2391
          res_instances.append(inst.name)
2392

    
2393
    # any leftover items in nv_dict are missing LVs, let's arrange the
2394
    # data better
2395
    for key, inst in nv_dict.iteritems():
2396
      if inst.name not in res_missing:
2397
        res_missing[inst.name] = []
2398
      res_missing[inst.name].append(key)
2399

    
2400
    return result
2401

    
2402

    
2403
class LURepairDiskSizes(NoHooksLU):
2404
  """Verifies the cluster disks sizes.
2405

2406
  """
2407
  _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2408
  REQ_BGL = False
2409

    
2410
  def ExpandNames(self):
2411
    if self.op.instances:
2412
      self.wanted_names = []
2413
      for name in self.op.instances:
2414
        full_name = _ExpandInstanceName(self.cfg, name)
2415
        self.wanted_names.append(full_name)
2416
      self.needed_locks = {
2417
        locking.LEVEL_NODE: [],
2418
        locking.LEVEL_INSTANCE: self.wanted_names,
2419
        }
2420
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2421
    else:
2422
      self.wanted_names = None
2423
      self.needed_locks = {
2424
        locking.LEVEL_NODE: locking.ALL_SET,
2425
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2426
        }
2427
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2428

    
2429
  def DeclareLocks(self, level):
2430
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2431
      self._LockInstancesNodes(primary_only=True)
2432

    
2433
  def CheckPrereq(self):
2434
    """Check prerequisites.
2435

2436
    This only checks the optional instance list against the existing names.
2437

2438
    """
2439
    if self.wanted_names is None:
2440
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2441

    
2442
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2443
                             in self.wanted_names]
2444

    
2445
  def _EnsureChildSizes(self, disk):
2446
    """Ensure children of the disk have the needed disk size.
2447

2448
    This is valid mainly for DRBD8 and fixes an issue where the
2449
    children have smaller disk size.
2450

2451
    @param disk: an L{ganeti.objects.Disk} object
2452

2453
    """
2454
    if disk.dev_type == constants.LD_DRBD8:
2455
      assert disk.children, "Empty children for DRBD8?"
2456
      fchild = disk.children[0]
2457
      mismatch = fchild.size < disk.size
2458
      if mismatch:
2459
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2460
                     fchild.size, disk.size)
2461
        fchild.size = disk.size
2462

    
2463
      # and we recurse on this child only, not on the metadev
2464
      return self._EnsureChildSizes(fchild) or mismatch
2465
    else:
2466
      return False
2467

    
2468
  def Exec(self, feedback_fn):
2469
    """Verify the size of cluster disks.
2470

2471
    """
2472
    # TODO: check child disks too
2473
    # TODO: check differences in size between primary/secondary nodes
2474
    per_node_disks = {}
2475
    for instance in self.wanted_instances:
2476
      pnode = instance.primary_node
2477
      if pnode not in per_node_disks:
2478
        per_node_disks[pnode] = []
2479
      for idx, disk in enumerate(instance.disks):
2480
        per_node_disks[pnode].append((instance, idx, disk))
2481

    
2482
    changed = []
2483
    for node, dskl in per_node_disks.items():
2484
      newl = [v[2].Copy() for v in dskl]
2485
      for dsk in newl:
2486
        self.cfg.SetDiskID(dsk, node)
2487
      result = self.rpc.call_blockdev_getsizes(node, newl)
2488
      if result.fail_msg:
2489
        self.LogWarning("Failure in blockdev_getsizes call to node"
2490
                        " %s, ignoring", node)
2491
        continue
2492
      if len(result.data) != len(dskl):
2493
        self.LogWarning("Invalid result from node %s, ignoring node results",
2494
                        node)
2495
        continue
2496
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2497
        if size is None:
2498
          self.LogWarning("Disk %d of instance %s did not return size"
2499
                          " information, ignoring", idx, instance.name)
2500
          continue
2501
        if not isinstance(size, (int, long)):
2502
          self.LogWarning("Disk %d of instance %s did not return valid"
2503
                          " size information, ignoring", idx, instance.name)
2504
          continue
2505
        size = size >> 20
2506
        if size != disk.size:
2507
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2508
                       " correcting: recorded %d, actual %d", idx,
2509
                       instance.name, disk.size, size)
2510
          disk.size = size
2511
          self.cfg.Update(instance, feedback_fn)
2512
          changed.append((instance.name, idx, size))
2513
        if self._EnsureChildSizes(disk):
2514
          self.cfg.Update(instance, feedback_fn)
2515
          changed.append((instance.name, idx, disk.size))
2516
    return changed
2517

    
2518

    
2519
class LURenameCluster(LogicalUnit):
2520
  """Rename the cluster.
2521

2522
  """
2523
  HPATH = "cluster-rename"
2524
  HTYPE = constants.HTYPE_CLUSTER
2525
  _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2526

    
2527
  def BuildHooksEnv(self):
2528
    """Build hooks env.
2529

2530
    """
2531
    env = {
2532
      "OP_TARGET": self.cfg.GetClusterName(),
2533
      "NEW_NAME": self.op.name,
2534
      }
2535
    mn = self.cfg.GetMasterNode()
2536
    all_nodes = self.cfg.GetNodeList()
2537
    return env, [mn], all_nodes
2538

    
2539
  def CheckPrereq(self):
2540
    """Verify that the passed name is a valid one.
2541

2542
    """
2543
    hostname = netutils.GetHostInfo(self.op.name)
2544

    
2545
    new_name = hostname.name
2546
    self.ip = new_ip = hostname.ip
2547
    old_name = self.cfg.GetClusterName()
2548
    old_ip = self.cfg.GetMasterIP()
2549
    if new_name == old_name and new_ip == old_ip:
2550
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2551
                                 " cluster has changed",
2552
                                 errors.ECODE_INVAL)
2553
    if new_ip != old_ip:
2554
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2555
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2556
                                   " reachable on the network. Aborting." %
2557
                                   new_ip, errors.ECODE_NOTUNIQUE)
2558

    
2559
    self.op.name = new_name
2560

    
2561
  def Exec(self, feedback_fn):
2562
    """Rename the cluster.
2563

2564
    """
2565
    clustername = self.op.name
2566
    ip = self.ip
2567

    
2568
    # shutdown the master IP
2569
    master = self.cfg.GetMasterNode()
2570
    result = self.rpc.call_node_stop_master(master, False)
2571
    result.Raise("Could not disable the master role")
2572

    
2573
    try:
2574
      cluster = self.cfg.GetClusterInfo()
2575
      cluster.cluster_name = clustername
2576
      cluster.master_ip = ip
2577
      self.cfg.Update(cluster, feedback_fn)
2578

    
2579
      # update the known hosts file
2580
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2581
      node_list = self.cfg.GetNodeList()
2582
      try:
2583
        node_list.remove(master)
2584
      except ValueError:
2585
        pass
2586
      result = self.rpc.call_upload_file(node_list,
2587
                                         constants.SSH_KNOWN_HOSTS_FILE)
2588
      for to_node, to_result in result.iteritems():
2589
        msg = to_result.fail_msg
2590
        if msg:
2591
          msg = ("Copy of file %s to node %s failed: %s" %
2592
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2593
          self.proc.LogWarning(msg)
2594

    
2595
    finally:
2596
      result = self.rpc.call_node_start_master(master, False, False)
2597
      msg = result.fail_msg
2598
      if msg:
2599
        self.LogWarning("Could not re-enable the master role on"
2600
                        " the master, please restart manually: %s", msg)
2601

    
2602
    return clustername
2603

    
2604

    
2605
class LUSetClusterParams(LogicalUnit):
2606
  """Change the parameters of the cluster.
2607

2608
  """
2609
  HPATH = "cluster-modify"
2610
  HTYPE = constants.HTYPE_CLUSTER
2611
  _OP_PARAMS = [
2612
    ("vg_name", None, _TMaybeString),
2613
    ("enabled_hypervisors", None,
2614
     _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2615
    ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2616
    ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2617
    ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2618
    ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2619
    ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2620
    ("uid_pool", None, _NoType),
2621
    ("add_uids", None, _NoType),
2622
    ("remove_uids", None, _NoType),
2623
    ("maintain_node_health", None, _TMaybeBool),
2624
    ("nicparams", None, _TOr(_TDict, _TNone)),
2625
    ("drbd_helper", None, _TOr(_TString, _TNone)),
2626
    ("default_iallocator", None, _TMaybeString),
2627
    ("reserved_lvs", None, _TOr(_TListOf(_TNonEmptyString), _TNone)),
2628
    ("hidden_oss", None, _TOr(_TListOf(\
2629
          _TAnd(_TList,
2630
                _TIsLength(2),
2631
                _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2632
          _TNone)),
2633
    ("blacklisted_oss", None, _TOr(_TListOf(\
2634
          _TAnd(_TList,
2635
                _TIsLength(2),
2636
                _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2637
          _TNone)),
2638
    ]
2639
  REQ_BGL = False
2640

    
2641
  def CheckArguments(self):
2642
    """Check parameters
2643

2644
    """
2645
    if self.op.uid_pool:
2646
      uidpool.CheckUidPool(self.op.uid_pool)
2647

    
2648
    if self.op.add_uids:
2649
      uidpool.CheckUidPool(self.op.add_uids)
2650

    
2651
    if self.op.remove_uids:
2652
      uidpool.CheckUidPool(self.op.remove_uids)
2653

    
2654
  def ExpandNames(self):
2655
    # FIXME: in the future maybe other cluster params won't require checking on
2656
    # all nodes to be modified.
2657
    self.needed_locks = {
2658
      locking.LEVEL_NODE: locking.ALL_SET,
2659
    }
2660
    self.share_locks[locking.LEVEL_NODE] = 1
2661

    
2662
  def BuildHooksEnv(self):
2663
    """Build hooks env.
2664

2665
    """
2666
    env = {
2667
      "OP_TARGET": self.cfg.GetClusterName(),
2668
      "NEW_VG_NAME": self.op.vg_name,
2669
      }
2670
    mn = self.cfg.GetMasterNode()
2671
    return env, [mn], [mn]
2672

    
2673
  def CheckPrereq(self):
2674
    """Check prerequisites.
2675

2676
    This checks whether the given params don't conflict and
2677
    if the given volume group is valid.
2678

2679
    """
2680
    if self.op.vg_name is not None and not self.op.vg_name:
2681
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2682
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2683
                                   " instances exist", errors.ECODE_INVAL)
2684

    
2685
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2686
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2687
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2688
                                   " drbd-based instances exist",
2689
                                   errors.ECODE_INVAL)
2690

    
2691
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2692

    
2693
    # if vg_name not None, checks given volume group on all nodes
2694
    if self.op.vg_name:
2695
      vglist = self.rpc.call_vg_list(node_list)
2696
      for node in node_list:
2697
        msg = vglist[node].fail_msg
2698
        if msg:
2699
          # ignoring down node
2700
          self.LogWarning("Error while gathering data on node %s"
2701
                          " (ignoring node): %s", node, msg)
2702
          continue
2703
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2704
                                              self.op.vg_name,
2705
                                              constants.MIN_VG_SIZE)
2706
        if vgstatus:
2707
          raise errors.OpPrereqError("Error on node '%s': %s" %
2708
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2709

    
2710
    if self.op.drbd_helper:
2711
      # checks given drbd helper on all nodes
2712
      helpers = self.rpc.call_drbd_helper(node_list)
2713
      for node in node_list:
2714
        ninfo = self.cfg.GetNodeInfo(node)
2715
        if ninfo.offline:
2716
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2717
          continue
2718
        msg = helpers[node].fail_msg
2719
        if msg:
2720
          raise errors.OpPrereqError("Error checking drbd helper on node"
2721
                                     " '%s': %s" % (node, msg),
2722
                                     errors.ECODE_ENVIRON)
2723
        node_helper = helpers[node].payload
2724
        if node_helper != self.op.drbd_helper:
2725
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2726
                                     (node, node_helper), errors.ECODE_ENVIRON)
2727

    
2728
    self.cluster = cluster = self.cfg.GetClusterInfo()
2729
    # validate params changes
2730
    if self.op.beparams:
2731
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2732
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2733

    
2734
    if self.op.nicparams:
2735
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2736
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2737
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2738
      nic_errors = []
2739

    
2740
      # check all instances for consistency
2741
      for instance in self.cfg.GetAllInstancesInfo().values():
2742
        for nic_idx, nic in enumerate(instance.nics):
2743
          params_copy = copy.deepcopy(nic.nicparams)
2744
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2745

    
2746
          # check parameter syntax
2747
          try:
2748
            objects.NIC.CheckParameterSyntax(params_filled)
2749
          except errors.ConfigurationError, err:
2750
            nic_errors.append("Instance %s, nic/%d: %s" %
2751
                              (instance.name, nic_idx, err))
2752

    
2753
          # if we're moving instances to routed, check that they have an ip
2754
          target_mode = params_filled[constants.NIC_MODE]
2755
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2756
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2757
                              (instance.name, nic_idx))
2758
      if nic_errors:
2759
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2760
                                   "\n".join(nic_errors))
2761

    
2762
    # hypervisor list/parameters
2763
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2764
    if self.op.hvparams:
2765
      for hv_name, hv_dict in self.op.hvparams.items():
2766
        if hv_name not in self.new_hvparams:
2767
          self.new_hvparams[hv_name] = hv_dict
2768
        else:
2769
          self.new_hvparams[hv_name].update(hv_dict)
2770

    
2771
    # os hypervisor parameters
2772
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2773
    if self.op.os_hvp:
2774
      for os_name, hvs in self.op.os_hvp.items():
2775
        if os_name not in self.new_os_hvp:
2776
          self.new_os_hvp[os_name] = hvs
2777
        else:
2778
          for hv_name, hv_dict in hvs.items():
2779
            if hv_name not in self.new_os_hvp[os_name]:
2780
              self.new_os_hvp[os_name][hv_name] = hv_dict
2781
            else:
2782
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2783

    
2784
    # os parameters
2785
    self.new_osp = objects.FillDict(cluster.osparams, {})
2786
    if self.op.osparams:
2787
      for os_name, osp in self.op.osparams.items():
2788
        if os_name not in self.new_osp:
2789
          self.new_osp[os_name] = {}
2790

    
2791
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2792
                                                  use_none=True)
2793

    
2794
        if not self.new_osp[os_name]:
2795
          # we removed all parameters
2796
          del self.new_osp[os_name]
2797
        else:
2798
          # check the parameter validity (remote check)
2799
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2800
                         os_name, self.new_osp[os_name])
2801

    
2802
    # changes to the hypervisor list
2803
    if self.op.enabled_hypervisors is not None:
2804
      self.hv_list = self.op.enabled_hypervisors
2805
      for hv in self.hv_list:
2806
        # if the hypervisor doesn't already exist in the cluster
2807
        # hvparams, we initialize it to empty, and then (in both
2808
        # cases) we make sure to fill the defaults, as we might not
2809
        # have a complete defaults list if the hypervisor wasn't
2810
        # enabled before
2811
        if hv not in new_hvp:
2812
          new_hvp[hv] = {}
2813
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2814
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2815
    else:
2816
      self.hv_list = cluster.enabled_hypervisors
2817

    
2818
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2819
      # either the enabled list has changed, or the parameters have, validate
2820
      for hv_name, hv_params in self.new_hvparams.items():
2821
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2822
            (self.op.enabled_hypervisors and
2823
             hv_name in self.op.enabled_hypervisors)):
2824
          # either this is a new hypervisor, or its parameters have changed
2825
          hv_class = hypervisor.GetHypervisor(hv_name)
2826
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2827
          hv_class.CheckParameterSyntax(hv_params)
2828
          _CheckHVParams(self, node_list, hv_name, hv_params)
2829

    
2830
    if self.op.os_hvp:
2831
      # no need to check any newly-enabled hypervisors, since the
2832
      # defaults have already been checked in the above code-block
2833
      for os_name, os_hvp in self.new_os_hvp.items():
2834
        for hv_name, hv_params in os_hvp.items():
2835
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2836
          # we need to fill in the new os_hvp on top of the actual hv_p
2837
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2838
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2839
          hv_class = hypervisor.GetHypervisor(hv_name)
2840
          hv_class.CheckParameterSyntax(new_osp)
2841
          _CheckHVParams(self, node_list, hv_name, new_osp)
2842

    
2843
    if self.op.default_iallocator:
2844
      alloc_script = utils.FindFile(self.op.default_iallocator,
2845
                                    constants.IALLOCATOR_SEARCH_PATH,
2846
                                    os.path.isfile)
2847
      if alloc_script is None:
2848
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2849
                                   " specified" % self.op.default_iallocator,
2850
                                   errors.ECODE_INVAL)
2851

    
2852
  def Exec(self, feedback_fn):
2853
    """Change the parameters of the cluster.
2854

2855
    """
2856
    if self.op.vg_name is not None:
2857
      new_volume = self.op.vg_name
2858
      if not new_volume:
2859
        new_volume = None
2860
      if new_volume != self.cfg.GetVGName():
2861
        self.cfg.SetVGName(new_volume)
2862
      else:
2863
        feedback_fn("Cluster LVM configuration already in desired"
2864
                    " state, not changing")
2865
    if self.op.drbd_helper is not None:
2866
      new_helper = self.op.drbd_helper
2867
      if not new_helper:
2868
        new_helper = None
2869
      if new_helper != self.cfg.GetDRBDHelper():
2870
        self.cfg.SetDRBDHelper(new_helper)
2871
      else:
2872
        feedback_fn("Cluster DRBD helper already in desired state,"
2873
                    " not changing")
2874
    if self.op.hvparams:
2875
      self.cluster.hvparams = self.new_hvparams
2876
    if self.op.os_hvp:
2877
      self.cluster.os_hvp = self.new_os_hvp
2878
    if self.op.enabled_hypervisors is not None:
2879
      self.cluster.hvparams = self.new_hvparams
2880
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2881
    if self.op.beparams:
2882
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2883
    if self.op.nicparams:
2884
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2885
    if self.op.osparams:
2886
      self.cluster.osparams = self.new_osp
2887

    
2888
    if self.op.candidate_pool_size is not None:
2889
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2890
      # we need to update the pool size here, otherwise the save will fail
2891
      _AdjustCandidatePool(self, [])
2892

    
2893
    if self.op.maintain_node_health is not None:
2894
      self.cluster.maintain_node_health = self.op.maintain_node_health
2895

    
2896
    if self.op.add_uids is not None:
2897
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2898

    
2899
    if self.op.remove_uids is not None:
2900
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2901

    
2902
    if self.op.uid_pool is not None:
2903
      self.cluster.uid_pool = self.op.uid_pool
2904

    
2905
    if self.op.default_iallocator is not None:
2906
      self.cluster.default_iallocator = self.op.default_iallocator
2907

    
2908
    if self.op.reserved_lvs is not None:
2909
      self.cluster.reserved_lvs = self.op.reserved_lvs
2910

    
2911
    def helper_oss(aname, mods, desc):
2912
      lst = getattr(self.cluster, aname)
2913
      for key, val in mods:
2914
        if key == constants.DDM_ADD:
2915
          if val in lst:
2916
            feedback_fn("OS %s already in %s, ignoring", val, desc)
2917
          else:
2918
            lst.append(val)
2919
        elif key == constants.DDM_REMOVE:
2920
          if val in lst:
2921
            lst.remove(val)
2922
          else:
2923
            feedback_fn("OS %s not found in %s, ignoring", val, desc)
2924
        else:
2925
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
2926

    
2927
    if self.op.hidden_oss:
2928
      helper_oss("hidden_oss", self.op.hidden_oss,
2929
                 "hidden OS list")
2930

    
2931
    if self.op.blacklisted_oss:
2932
      helper_oss("blacklisted_oss", self.op.blacklisted_oss,
2933
                 "blacklisted OS list")
2934

    
2935
    self.cfg.Update(self.cluster, feedback_fn)
2936

    
2937

    
2938
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2939
  """Distribute additional files which are part of the cluster configuration.
2940

2941
  ConfigWriter takes care of distributing the config and ssconf files, but
2942
  there are more files which should be distributed to all nodes. This function
2943
  makes sure those are copied.
2944

2945
  @param lu: calling logical unit
2946
  @param additional_nodes: list of nodes not in the config to distribute to
2947

2948
  """
2949
  # 1. Gather target nodes
2950
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2951
  dist_nodes = lu.cfg.GetOnlineNodeList()
2952
  if additional_nodes is not None:
2953
    dist_nodes.extend(additional_nodes)
2954
  if myself.name in dist_nodes:
2955
    dist_nodes.remove(myself.name)
2956

    
2957
  # 2. Gather files to distribute
2958
  dist_files = set([constants.ETC_HOSTS,
2959
                    constants.SSH_KNOWN_HOSTS_FILE,
2960
                    constants.RAPI_CERT_FILE,
2961
                    constants.RAPI_USERS_FILE,
2962
                    constants.CONFD_HMAC_KEY,
2963
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2964
                   ])
2965

    
2966
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2967
  for hv_name in enabled_hypervisors:
2968
    hv_class = hypervisor.GetHypervisor(hv_name)
2969
    dist_files.update(hv_class.GetAncillaryFiles())
2970

    
2971
  # 3. Perform the files upload
2972
  for fname in dist_files:
2973
    if os.path.exists(fname):
2974
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2975
      for to_node, to_result in result.items():
2976
        msg = to_result.fail_msg
2977
        if msg:
2978
          msg = ("Copy of file %s to node %s failed: %s" %
2979
                 (fname, to_node, msg))
2980
          lu.proc.LogWarning(msg)
2981

    
2982

    
2983
class LURedistributeConfig(NoHooksLU):
2984
  """Force the redistribution of cluster configuration.
2985

2986
  This is a very simple LU.
2987

2988
  """
2989
  REQ_BGL = False
2990

    
2991
  def ExpandNames(self):
2992
    self.needed_locks = {
2993
      locking.LEVEL_NODE: locking.ALL_SET,
2994
    }
2995
    self.share_locks[locking.LEVEL_NODE] = 1
2996

    
2997
  def Exec(self, feedback_fn):
2998
    """Redistribute the configuration.
2999

3000
    """
3001
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3002
    _RedistributeAncillaryFiles(self)
3003

    
3004

    
3005
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3006
  """Sleep and poll for an instance's disk to sync.
3007

3008
  """
3009
  if not instance.disks or disks is not None and not disks:
3010
    return True
3011

    
3012
  disks = _ExpandCheckDisks(instance, disks)
3013

    
3014
  if not oneshot:
3015
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3016

    
3017
  node = instance.primary_node
3018

    
3019
  for dev in disks:
3020
    lu.cfg.SetDiskID(dev, node)
3021

    
3022
  # TODO: Convert to utils.Retry
3023

    
3024
  retries = 0
3025
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3026
  while True:
3027
    max_time = 0
3028
    done = True
3029
    cumul_degraded = False
3030
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3031
    msg = rstats.fail_msg
3032
    if msg:
3033
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3034
      retries += 1
3035
      if retries >= 10:
3036
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3037
                                 " aborting." % node)
3038
      time.sleep(6)
3039
      continue
3040
    rstats = rstats.payload
3041
    retries = 0
3042
    for i, mstat in enumerate(rstats):
3043
      if mstat is None:
3044
        lu.LogWarning("Can't compute data for node %s/%s",
3045
                           node, disks[i].iv_name)
3046
        continue
3047

    
3048
      cumul_degraded = (cumul_degraded or
3049
                        (mstat.is_degraded and mstat.sync_percent is None))
3050
      if mstat.sync_percent is not None:
3051
        done = False
3052
        if mstat.estimated_time is not None:
3053
          rem_time = ("%s remaining (estimated)" %
3054
                      utils.FormatSeconds(mstat.estimated_time))
3055
          max_time = mstat.estimated_time
3056
        else:
3057
          rem_time = "no time estimate"
3058
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3059
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3060

    
3061
    # if we're done but degraded, let's do a few small retries, to
3062
    # make sure we see a stable and not transient situation; therefore
3063
    # we force restart of the loop
3064
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3065
      logging.info("Degraded disks found, %d retries left", degr_retries)
3066
      degr_retries -= 1
3067
      time.sleep(1)
3068
      continue
3069

    
3070
    if done or oneshot:
3071
      break
3072

    
3073
    time.sleep(min(60, max_time))
3074

    
3075
  if done:
3076
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3077
  return not cumul_degraded
3078

    
3079

    
3080
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3081
  """Check that mirrors are not degraded.
3082

3083
  The ldisk parameter, if True, will change the test from the
3084
  is_degraded attribute (which represents overall non-ok status for
3085
  the device(s)) to the ldisk (representing the local storage status).
3086

3087
  """
3088
  lu.cfg.SetDiskID(dev, node)
3089

    
3090
  result = True
3091

    
3092
  if on_primary or dev.AssembleOnSecondary():
3093
    rstats = lu.rpc.call_blockdev_find(node, dev)
3094
    msg = rstats.fail_msg
3095
    if msg:
3096
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3097
      result = False
3098
    elif not rstats.payload:
3099
      lu.LogWarning("Can't find disk on node %s", node)
3100
      result = False
3101
    else:
3102
      if ldisk:
3103
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3104
      else:
3105
        result = result and not rstats.payload.is_degraded
3106

    
3107
  if dev.children:
3108
    for child in dev.children:
3109
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3110

    
3111
  return result
3112

    
3113

    
3114
class LUDiagnoseOS(NoHooksLU):
3115
  """Logical unit for OS diagnose/query.
3116

3117
  """
3118
  _OP_PARAMS = [
3119
    _POutputFields,
3120
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3121
    ]
3122
  REQ_BGL = False
3123
  _HID = "hidden"
3124
  _BLK = "blacklisted"
3125
  _VLD = "valid"
3126
  _FIELDS_STATIC = utils.FieldSet()
3127
  _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3128
                                   "parameters", "api_versions", _HID, _BLK)
3129

    
3130
  def CheckArguments(self):
3131
    if self.op.names:
3132
      raise errors.OpPrereqError("Selective OS query not supported",
3133
                                 errors.ECODE_INVAL)
3134

    
3135
    _CheckOutputFields(static=self._FIELDS_STATIC,
3136
                       dynamic=self._FIELDS_DYNAMIC,
3137
                       selected=self.op.output_fields)
3138

    
3139
  def ExpandNames(self):
3140
    # Lock all nodes, in shared mode
3141
    # Temporary removal of locks, should be reverted later
3142
    # TODO: reintroduce locks when they are lighter-weight
3143
    self.needed_locks = {}
3144
    #self.share_locks[locking.LEVEL_NODE] = 1
3145
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3146

    
3147
  @staticmethod
3148
  def _DiagnoseByOS(rlist):
3149
    """Remaps a per-node return list into an a per-os per-node dictionary
3150

3151
    @param rlist: a map with node names as keys and OS objects as values
3152

3153
    @rtype: dict
3154
    @return: a dictionary with osnames as keys and as value another
3155
        map, with nodes as keys and tuples of (path, status, diagnose,
3156
        variants, parameters, api_versions) as values, eg::
3157

3158
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3159
                                     (/srv/..., False, "invalid api")],
3160
                           "node2": [(/srv/..., True, "", [], [])]}
3161
          }
3162

3163
    """
3164
    all_os = {}
3165
    # we build here the list of nodes that didn't fail the RPC (at RPC
3166
    # level), so that nodes with a non-responding node daemon don't
3167
    # make all OSes invalid
3168
    good_nodes = [node_name for node_name in rlist
3169
                  if not rlist[node_name].fail_msg]
3170
    for node_name, nr in rlist.items():
3171
      if nr.fail_msg or not nr.payload:
3172
        continue
3173
      for (name, path, status, diagnose, variants,
3174
           params, api_versions) in nr.payload:
3175
        if name not in all_os:
3176
          # build a list of nodes for this os containing empty lists
3177
          # for each node in node_list
3178
          all_os[name] = {}
3179
          for nname in good_nodes:
3180
            all_os[name][nname] = []
3181
        # convert params from [name, help] to (name, help)
3182
        params = [tuple(v) for v in params]
3183
        all_os[name][node_name].append((path, status, diagnose,
3184
                                        variants, params, api_versions))
3185
    return all_os
3186

    
3187
  def Exec(self, feedback_fn):
3188
    """Compute the list of OSes.
3189

3190
    """
3191
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3192
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3193
    pol = self._DiagnoseByOS(node_data)
3194
    output = []
3195
    cluster = self.cfg.GetClusterInfo()
3196

    
3197
    for os_name in utils.NiceSort(pol.keys()):
3198
      os_data = pol[os_name]
3199
      row = []
3200
      valid = True
3201
      (variants, params, api_versions) = null_state = (set(), set(), set())
3202
      for idx, osl in enumerate(os_data.values()):
3203
        valid = bool(valid and osl and osl[0][1])
3204
        if not valid:
3205
          (variants, params, api_versions) = null_state
3206
          break
3207
        node_variants, node_params, node_api = osl[0][3:6]
3208
        if idx == 0: # first entry
3209
          variants = set(node_variants)
3210
          params = set(node_params)
3211
          api_versions = set(node_api)
3212
        else: # keep consistency
3213
          variants.intersection_update(node_variants)
3214
          params.intersection_update(node_params)
3215
          api_versions.intersection_update(node_api)
3216

    
3217
      is_hid = os_name in cluster.hidden_oss
3218
      is_blk = os_name in cluster.blacklisted_oss
3219
      if ((self._HID not in self.op.output_fields and is_hid) or
3220
          (self._BLK not in self.op.output_fields and is_blk) or
3221
          (self._VLD not in self.op.output_fields and not valid)):
3222
        continue
3223

    
3224
      for field in self.op.output_fields:
3225
        if field == "name":
3226
          val = os_name
3227
        elif field == self._VLD:
3228
          val = valid
3229
        elif field == "node_status":
3230
          # this is just a copy of the dict
3231
          val = {}
3232
          for node_name, nos_list in os_data.items():
3233
            val[node_name] = nos_list
3234
        elif field == "variants":
3235
          val = utils.NiceSort(list(variants))
3236
        elif field == "parameters":
3237
          val = list(params)
3238
        elif field == "api_versions":
3239
          val = list(api_versions)
3240
        elif field == self._HID:
3241
          val = is_hid
3242
        elif field == self._BLK:
3243
          val = is_blk
3244
        else:
3245
          raise errors.ParameterError(field)
3246
        row.append(val)
3247
      output.append(row)
3248

    
3249
    return output
3250

    
3251

    
3252
class LURemoveNode(LogicalUnit):
3253
  """Logical unit for removing a node.
3254

3255
  """
3256
  HPATH = "node-remove"
3257
  HTYPE = constants.HTYPE_NODE
3258
  _OP_PARAMS = [
3259
    _PNodeName,
3260
    ]
3261

    
3262
  def BuildHooksEnv(self):
3263
    """Build hooks env.
3264

3265
    This doesn't run on the target node in the pre phase as a failed
3266
    node would then be impossible to remove.
3267

3268
    """
3269
    env = {
3270
      "OP_TARGET": self.op.node_name,
3271
      "NODE_NAME": self.op.node_name,
3272
      }
3273
    all_nodes = self.cfg.GetNodeList()
3274
    try:
3275
      all_nodes.remove(self.op.node_name)
3276
    except ValueError:
3277
      logging.warning("Node %s which is about to be removed not found"
3278
                      " in the all nodes list", self.op.node_name)
3279
    return env, all_nodes, all_nodes
3280

    
3281
  def CheckPrereq(self):
3282
    """Check prerequisites.
3283

3284
    This checks:
3285
     - the node exists in the configuration
3286
     - it does not have primary or secondary instances
3287
     - it's not the master
3288

3289
    Any errors are signaled by raising errors.OpPrereqError.
3290

3291
    """
3292
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3293
    node = self.cfg.GetNodeInfo(self.op.node_name)
3294
    assert node is not None
3295

    
3296
    instance_list = self.cfg.GetInstanceList()
3297

    
3298
    masternode = self.cfg.GetMasterNode()
3299
    if node.name == masternode:
3300
      raise errors.OpPrereqError("Node is the master node,"
3301
                                 " you need to failover first.",
3302
                                 errors.ECODE_INVAL)
3303

    
3304
    for instance_name in instance_list:
3305
      instance = self.cfg.GetInstanceInfo(instance_name)
3306
      if node.name in instance.all_nodes:
3307
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3308
                                   " please remove first." % instance_name,
3309
                                   errors.ECODE_INVAL)
3310
    self.op.node_name = node.name
3311
    self.node = node
3312

    
3313
  def Exec(self, feedback_fn):
3314
    """Removes the node from the cluster.
3315

3316
    """
3317
    node = self.node
3318
    logging.info("Stopping the node daemon and removing configs from node %s",
3319
                 node.name)
3320

    
3321
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3322

    
3323
    # Promote nodes to master candidate as needed
3324
    _AdjustCandidatePool(self, exceptions=[node.name])
3325
    self.context.RemoveNode(node.name)
3326

    
3327
    # Run post hooks on the node before it's removed
3328
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3329
    try:
3330
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3331
    except:
3332
      # pylint: disable-msg=W0702
3333
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3334

    
3335
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3336
    msg = result.fail_msg
3337
    if msg:
3338
      self.LogWarning("Errors encountered on the remote node while leaving"
3339
                      " the cluster: %s", msg)
3340

    
3341
    # Remove node from our /etc/hosts
3342
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3343
      # FIXME: this should be done via an rpc call to node daemon
3344
      utils.RemoveHostFromEtcHosts(node.name)
3345
      _RedistributeAncillaryFiles(self)
3346

    
3347

    
3348
class LUQueryNodes(NoHooksLU):
3349
  """Logical unit for querying nodes.
3350

3351
  """
3352
  # pylint: disable-msg=W0142
3353
  _OP_PARAMS = [
3354
    _POutputFields,
3355
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3356
    ("use_locking", False, _TBool),
3357
    ]
3358
  REQ_BGL = False
3359

    
3360
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3361
                    "master_candidate", "offline", "drained"]
3362

    
3363
  _FIELDS_DYNAMIC = utils.FieldSet(
3364
    "dtotal", "dfree",
3365
    "mtotal", "mnode", "mfree",
3366
    "bootid",
3367
    "ctotal", "cnodes", "csockets",
3368
    )
3369

    
3370
  _FIELDS_STATIC = utils.FieldSet(*[
3371
    "pinst_cnt", "sinst_cnt",
3372
    "pinst_list", "sinst_list",
3373
    "pip", "sip", "tags",
3374
    "master",
3375
    "role"] + _SIMPLE_FIELDS
3376
    )
3377

    
3378
  def CheckArguments(self):
3379
    _CheckOutputFields(static=self._FIELDS_STATIC,
3380
                       dynamic=self._FIELDS_DYNAMIC,
3381
                       selected=self.op.output_fields)
3382

    
3383
  def ExpandNames(self):
3384
    self.needed_locks = {}
3385
    self.share_locks[locking.LEVEL_NODE] = 1
3386

    
3387
    if self.op.names:
3388
      self.wanted = _GetWantedNodes(self, self.op.names)
3389
    else:
3390
      self.wanted = locking.ALL_SET
3391

    
3392
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3393
    self.do_locking = self.do_node_query and self.op.use_locking
3394
    if self.do_locking:
3395
      # if we don't request only static fields, we need to lock the nodes
3396
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
3397

    
3398
  def Exec(self, feedback_fn):
3399
    """Computes the list of nodes and their attributes.
3400

3401
    """
3402
    all_info = self.cfg.GetAllNodesInfo()
3403
    if self.do_locking:
3404
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
3405
    elif self.wanted != locking.ALL_SET:
3406
      nodenames = self.wanted
3407
      missing = set(nodenames).difference(all_info.keys())
3408
      if missing:
3409
        raise errors.OpExecError(
3410
          "Some nodes were removed before retrieving their data: %s" % missing)
3411
    else:
3412
      nodenames = all_info.keys()
3413

    
3414
    nodenames = utils.NiceSort(nodenames)
3415
    nodelist = [all_info[name] for name in nodenames]
3416

    
3417
    # begin data gathering
3418

    
3419
    if self.do_node_query:
3420
      live_data = {}
3421
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3422
                                          self.cfg.GetHypervisorType())
3423
      for name in nodenames:
3424
        nodeinfo = node_data[name]
3425
        if not nodeinfo.fail_msg and nodeinfo.payload:
3426
          nodeinfo = nodeinfo.payload
3427
          fn = utils.TryConvert
3428
          live_data[name] = {
3429
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3430
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3431
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
3432
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3433
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3434
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3435
            "bootid": nodeinfo.get('bootid', None),
3436
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3437
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3438
            }
3439
        else:
3440
          live_data[name] = {}
3441
    else:
3442
      live_data = dict.fromkeys(nodenames, {})
3443

    
3444
    node_to_primary = dict([(name, set()) for name in nodenames])
3445
    node_to_secondary = dict([(name, set()) for name in nodenames])
3446

    
3447
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3448
                             "sinst_cnt", "sinst_list"))
3449
    if inst_fields & frozenset(self.op.output_fields):
3450
      inst_data = self.cfg.GetAllInstancesInfo()
3451

    
3452
      for inst in inst_data.values():
3453
        if inst.primary_node in node_to_primary:
3454
          node_to_primary[inst.primary_node].add(inst.name)
3455
        for secnode in inst.secondary_nodes:
3456
          if secnode in node_to_secondary:
3457
            node_to_secondary[secnode].add(inst.name)
3458

    
3459
    master_node = self.cfg.GetMasterNode()
3460

    
3461
    # end data gathering
3462

    
3463
    output = []
3464
    for node in nodelist:
3465
      node_output = []
3466
      for field in self.op.output_fields:
3467
        if field in self._SIMPLE_FIELDS:
3468
          val = getattr(node, field)
3469
        elif field == "pinst_list":
3470
          val = list(node_to_primary[node.name])
3471
        elif field == "sinst_list":
3472
          val = list(node_to_secondary[node.name])
3473
        elif field == "pinst_cnt":
3474
          val = len(node_to_primary[node.name])
3475
        elif field == "sinst_cnt":
3476
          val = len(node_to_secondary[node.name])
3477
        elif field == "pip":
3478
          val = node.primary_ip
3479
        elif field == "sip":
3480
          val = node.secondary_ip
3481
        elif field == "tags":
3482
          val = list(node.GetTags())
3483
        elif field == "master":
3484
          val = node.name == master_node
3485
        elif self._FIELDS_DYNAMIC.Matches(field):
3486
          val = live_data[node.name].get(field, None)
3487
        elif field == "role":
3488
          if node.name == master_node:
3489
            val = "M"
3490
          elif node.master_candidate:
3491
            val = "C"
3492
          elif node.drained:
3493
            val = "D"
3494
          elif node.offline:
3495
            val = "O"
3496
          else:
3497
            val = "R"
3498
        else:
3499
          raise errors.ParameterError(field)
3500
        node_output.append(val)
3501
      output.append(node_output)
3502

    
3503
    return output
3504

    
3505

    
3506
class LUQueryNodeVolumes(NoHooksLU):
3507
  """Logical unit for getting volumes on node(s).
3508

3509
  """
3510
  _OP_PARAMS = [
3511
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3512
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3513
    ]
3514
  REQ_BGL = False
3515
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3516
  _FIELDS_STATIC = utils.FieldSet("node")
3517

    
3518
  def CheckArguments(self):
3519
    _CheckOutputFields(static=self._FIELDS_STATIC,
3520
                       dynamic=self._FIELDS_DYNAMIC,
3521
                       selected=self.op.output_fields)
3522

    
3523
  def ExpandNames(self):
3524
    self.needed_locks = {}
3525
    self.share_locks[locking.LEVEL_NODE] = 1
3526
    if not self.op.nodes:
3527
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3528
    else:
3529
      self.needed_locks[locking.LEVEL_NODE] = \
3530
        _GetWantedNodes(self, self.op.nodes)
3531

    
3532
  def Exec(self, feedback_fn):
3533
    """Computes the list of nodes and their attributes.
3534

3535
    """
3536
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3537
    volumes = self.rpc.call_node_volumes(nodenames)
3538

    
3539
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3540
             in self.cfg.GetInstanceList()]
3541

    
3542
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3543

    
3544
    output = []
3545
    for node in nodenames:
3546
      nresult = volumes[node]
3547
      if nresult.offline:
3548
        continue
3549
      msg = nresult.fail_msg
3550
      if msg:
3551
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3552
        continue
3553

    
3554
      node_vols = nresult.payload[:]
3555
      node_vols.sort(key=lambda vol: vol['dev'])
3556

    
3557
      for vol in node_vols:
3558
        node_output = []
3559
        for field in self.op.output_fields:
3560
          if field == "node":
3561
            val = node
3562
          elif field == "phys":
3563
            val = vol['dev']
3564
          elif field == "vg":
3565
            val = vol['vg']
3566
          elif field == "name":
3567
            val = vol['name']
3568
          elif field == "size":
3569
            val = int(float(vol['size']))
3570
          elif field == "instance":
3571
            for inst in ilist:
3572
              if node not in lv_by_node[inst]:
3573
                continue
3574
              if vol['name'] in lv_by_node[inst][node]:
3575
                val = inst.name
3576
                break
3577
            else:
3578
              val = '-'
3579
          else:
3580
            raise errors.ParameterError(field)
3581
          node_output.append(str(val))
3582

    
3583
        output.append(node_output)
3584

    
3585
    return output
3586

    
3587

    
3588
class LUQueryNodeStorage(NoHooksLU):
3589
  """Logical unit for getting information on storage units on node(s).
3590

3591
  """
3592
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3593
  _OP_PARAMS = [
3594
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3595
    ("storage_type", _NoDefault, _CheckStorageType),
3596
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3597
    ("name", None, _TMaybeString),
3598
    ]
3599
  REQ_BGL = False
3600

    
3601
  def CheckArguments(self):
3602
    _CheckOutputFields(static=self._FIELDS_STATIC,
3603
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3604
                       selected=self.op.output_fields)
3605

    
3606
  def ExpandNames(self):
3607
    self.needed_locks = {}
3608
    self.share_locks[locking.LEVEL_NODE] = 1
3609

    
3610
    if self.op.nodes:
3611
      self.needed_locks[locking.LEVEL_NODE] = \
3612
        _GetWantedNodes(self, self.op.nodes)
3613
    else:
3614
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3615

    
3616
  def Exec(self, feedback_fn):
3617
    """Computes the list of nodes and their attributes.
3618

3619
    """
3620
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3621

    
3622
    # Always get name to sort by
3623
    if constants.SF_NAME in self.op.output_fields:
3624
      fields = self.op.output_fields[:]
3625
    else:
3626
      fields = [constants.SF_NAME] + self.op.output_fields
3627

    
3628
    # Never ask for node or type as it's only known to the LU
3629
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3630
      while extra in fields:
3631
        fields.remove(extra)
3632

    
3633
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3634
    name_idx = field_idx[constants.SF_NAME]
3635

    
3636
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3637
    data = self.rpc.call_storage_list(self.nodes,
3638
                                      self.op.storage_type, st_args,
3639
                                      self.op.name, fields)
3640

    
3641
    result = []
3642

    
3643
    for node in utils.NiceSort(self.nodes):
3644
      nresult = data[node]
3645
      if nresult.offline:
3646
        continue
3647

    
3648
      msg = nresult.fail_msg
3649
      if msg:
3650
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3651
        continue
3652

    
3653
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3654

    
3655
      for name in utils.NiceSort(rows.keys()):
3656
        row = rows[name]
3657

    
3658
        out = []
3659

    
3660
        for field in self.op.output_fields:
3661
          if field == constants.SF_NODE:
3662
            val = node
3663
          elif field == constants.SF_TYPE:
3664
            val = self.op.storage_type
3665
          elif field in field_idx:
3666
            val = row[field_idx[field]]
3667
          else:
3668
            raise errors.ParameterError(field)
3669

    
3670
          out.append(val)
3671

    
3672
        result.append(out)
3673

    
3674
    return result
3675

    
3676

    
3677
class LUModifyNodeStorage(NoHooksLU):
3678
  """Logical unit for modifying a storage volume on a node.
3679

3680
  """
3681
  _OP_PARAMS = [
3682
    _PNodeName,
3683
    ("storage_type", _NoDefault, _CheckStorageType),
3684
    ("name", _NoDefault, _TNonEmptyString),
3685
    ("changes", _NoDefault, _TDict),
3686
    ]
3687
  REQ_BGL = False
3688

    
3689
  def CheckArguments(self):
3690
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3691

    
3692
    storage_type = self.op.storage_type
3693

    
3694
    try:
3695
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3696
    except KeyError:
3697
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3698
                                 " modified" % storage_type,
3699
                                 errors.ECODE_INVAL)
3700

    
3701
    diff = set(self.op.changes.keys()) - modifiable
3702
    if diff:
3703
      raise errors.OpPrereqError("The following fields can not be modified for"
3704
                                 " storage units of type '%s': %r" %
3705
                                 (storage_type, list(diff)),
3706
                                 errors.ECODE_INVAL)
3707

    
3708
  def ExpandNames(self):
3709
    self.needed_locks = {
3710
      locking.LEVEL_NODE: self.op.node_name,
3711
      }
3712

    
3713
  def Exec(self, feedback_fn):
3714
    """Computes the list of nodes and their attributes.
3715

3716
    """
3717
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3718
    result = self.rpc.call_storage_modify(self.op.node_name,
3719
                                          self.op.storage_type, st_args,
3720
                                          self.op.name, self.op.changes)
3721
    result.Raise("Failed to modify storage unit '%s' on %s" %
3722
                 (self.op.name, self.op.node_name))
3723

    
3724

    
3725
class LUAddNode(LogicalUnit):
3726
  """Logical unit for adding node to the cluster.
3727

3728
  """
3729
  HPATH = "node-add"
3730
  HTYPE = constants.HTYPE_NODE
3731
  _OP_PARAMS = [
3732
    _PNodeName,
3733
    ("primary_ip", None, _NoType),
3734
    ("secondary_ip", None, _TMaybeString),
3735
    ("readd", False, _TBool),
3736
    ]
3737

    
3738
  def CheckArguments(self):
3739
    # validate/normalize the node name
3740
    self.op.node_name = netutils.HostInfo.NormalizeName(self.op.node_name)
3741

    
3742
  def BuildHooksEnv(self):
3743
    """Build hooks env.
3744

3745
    This will run on all nodes before, and on all nodes + the new node after.
3746

3747
    """
3748
    env = {
3749
      "OP_TARGET": self.op.node_name,
3750
      "NODE_NAME": self.op.node_name,
3751
      "NODE_PIP": self.op.primary_ip,
3752
      "NODE_SIP": self.op.secondary_ip,
3753
      }
3754
    nodes_0 = self.cfg.GetNodeList()
3755
    nodes_1 = nodes_0 + [self.op.node_name, ]
3756
    return env, nodes_0, nodes_1
3757

    
3758
  def CheckPrereq(self):
3759
    """Check prerequisites.
3760

3761
    This checks:
3762
     - the new node is not already in the config
3763
     - it is resolvable
3764
     - its parameters (single/dual homed) matches the cluster
3765

3766
    Any errors are signaled by raising errors.OpPrereqError.
3767

3768
    """
3769
    node_name = self.op.node_name
3770
    cfg = self.cfg
3771

    
3772
    dns_data = netutils.GetHostInfo(node_name)
3773

    
3774
    node = dns_data.name
3775
    primary_ip = self.op.primary_ip = dns_data.ip
3776
    if self.op.secondary_ip is None:
3777
      self.op.secondary_ip = primary_ip
3778
    if not netutils.IsValidIP4(self.op.secondary_ip):
3779
      raise errors.OpPrereqError("Invalid secondary IP given",
3780
                                 errors.ECODE_INVAL)
3781
    secondary_ip = self.op.secondary_ip
3782

    
3783
    node_list = cfg.GetNodeList()
3784
    if not self.op.readd and node in node_list:
3785
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3786
                                 node, errors.ECODE_EXISTS)
3787
    elif self.op.readd and node not in node_list:
3788
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3789
                                 errors.ECODE_NOENT)
3790

    
3791
    self.changed_primary_ip = False
3792

    
3793
    for existing_node_name in node_list:
3794
      existing_node = cfg.GetNodeInfo(existing_node_name)
3795

    
3796
      if self.op.readd and node == existing_node_name:
3797
        if existing_node.secondary_ip != secondary_ip:
3798
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3799
                                     " address configuration as before",
3800
                                     errors.ECODE_INVAL)
3801
        if existing_node.primary_ip != primary_ip:
3802
          self.changed_primary_ip = True
3803

    
3804
        continue
3805

    
3806
      if (existing_node.primary_ip == primary_ip or
3807
          existing_node.secondary_ip == primary_ip or
3808
          existing_node.primary_ip == secondary_ip or
3809
          existing_node.secondary_ip == secondary_ip):
3810
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3811
                                   " existing node %s" % existing_node.name,
3812
                                   errors.ECODE_NOTUNIQUE)
3813

    
3814
    # check that the type of the node (single versus dual homed) is the
3815
    # same as for the master
3816
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3817
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3818
    newbie_singlehomed = secondary_ip == primary_ip
3819
    if master_singlehomed != newbie_singlehomed:
3820
      if master_singlehomed:
3821
        raise errors.OpPrereqError("The master has no private ip but the"
3822
                                   " new node has one",
3823
                                   errors.ECODE_INVAL)
3824
      else:
3825
        raise errors.OpPrereqError("The master has a private ip but the"
3826
                                   " new node doesn't have one",
3827
                                   errors.ECODE_INVAL)
3828

    
3829
    # checks reachability
3830
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3831
      raise errors.OpPrereqError("Node not reachable by ping",
3832
                                 errors.ECODE_ENVIRON)
3833

    
3834
    if not newbie_singlehomed:
3835
      # check reachability from my secondary ip to newbie's secondary ip
3836
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3837
                           source=myself.secondary_ip):
3838
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3839
                                   " based ping to noded port",
3840
                                   errors.ECODE_ENVIRON)
3841

    
3842
    if self.op.readd:
3843
      exceptions = [node]
3844
    else:
3845
      exceptions = []
3846

    
3847
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3848

    
3849
    if self.op.readd:
3850
      self.new_node = self.cfg.GetNodeInfo(node)
3851
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3852
    else:
3853
      self.new_node = objects.Node(name=node,
3854
                                   primary_ip=primary_ip,
3855
                                   secondary_ip=secondary_ip,
3856
                                   master_candidate=self.master_candidate,
3857
                                   offline=False, drained=False)
3858

    
3859
  def Exec(self, feedback_fn):
3860
    """Adds the new node to the cluster.
3861

3862
    """
3863
    new_node = self.new_node
3864
    node = new_node.name
3865

    
3866
    # for re-adds, reset the offline/drained/master-candidate flags;
3867
    # we need to reset here, otherwise offline would prevent RPC calls
3868
    # later in the procedure; this also means that if the re-add
3869
    # fails, we are left with a non-offlined, broken node
3870
    if self.op.readd:
3871
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3872
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3873
      # if we demote the node, we do cleanup later in the procedure
3874
      new_node.master_candidate = self.master_candidate
3875
      if self.changed_primary_ip:
3876
        new_node.primary_ip = self.op.primary_ip
3877

    
3878
    # notify the user about any possible mc promotion
3879
    if new_node.master_candidate:
3880
      self.LogInfo("Node will be a master candidate")
3881

    
3882
    # check connectivity
3883
    result = self.rpc.call_version([node])[node]
3884
    result.Raise("Can't get version information from node %s" % node)
3885
    if constants.PROTOCOL_VERSION == result.payload:
3886
      logging.info("Communication to node %s fine, sw version %s match",
3887
                   node, result.payload)
3888
    else:
3889
      raise errors.OpExecError("Version mismatch master version %s,"
3890
                               " node version %s" %
3891
                               (constants.PROTOCOL_VERSION, result.payload))
3892

    
3893
    # setup ssh on node
3894
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3895
      logging.info("Copy ssh key to node %s", node)
3896
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3897
      keyarray = []
3898
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3899
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3900
                  priv_key, pub_key]
3901

    
3902
      for i in keyfiles:
3903
        keyarray.append(utils.ReadFile(i))
3904

    
3905
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3906
                                      keyarray[2], keyarray[3], keyarray[4],
3907
                                      keyarray[5])
3908
      result.Raise("Cannot transfer ssh keys to the new node")
3909

    
3910
    # Add node to our /etc/hosts, and add key to known_hosts
3911
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3912
      # FIXME: this should be done via an rpc call to node daemon
3913
      utils.AddHostToEtcHosts(new_node.name)
3914

    
3915
    if new_node.secondary_ip != new_node.primary_ip:
3916
      result = self.rpc.call_node_has_ip_address(new_node.name,
3917
                                                 new_node.secondary_ip)
3918
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3919
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3920
      if not result.payload:
3921
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3922
                                 " you gave (%s). Please fix and re-run this"
3923
                                 " command." % new_node.secondary_ip)
3924

    
3925
    node_verify_list = [self.cfg.GetMasterNode()]
3926
    node_verify_param = {
3927
      constants.NV_NODELIST: [node],
3928
      # TODO: do a node-net-test as well?
3929
    }
3930

    
3931
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3932
                                       self.cfg.GetClusterName())
3933
    for verifier in node_verify_list:
3934
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3935
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3936
      if nl_payload:
3937
        for failed in nl_payload:
3938
          feedback_fn("ssh/hostname verification failed"
3939
                      " (checking from %s): %s" %
3940
                      (verifier, nl_payload[failed]))
3941
        raise errors.OpExecError("ssh/hostname verification failed.")
3942

    
3943
    if self.op.readd:
3944
      _RedistributeAncillaryFiles(self)
3945
      self.context.ReaddNode(new_node)
3946
      # make sure we redistribute the config
3947
      self.cfg.Update(new_node, feedback_fn)
3948
      # and make sure the new node will not have old files around
3949
      if not new_node.master_candidate:
3950
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3951
        msg = result.fail_msg
3952
        if msg:
3953
          self.LogWarning("Node failed to demote itself from master"
3954
                          " candidate status: %s" % msg)
3955
    else:
3956
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3957
      self.context.AddNode(new_node, self.proc.GetECId())
3958

    
3959

    
3960
class LUSetNodeParams(LogicalUnit):
3961
  """Modifies the parameters of a node.
3962

3963
  """
3964
  HPATH = "node-modify"
3965
  HTYPE = constants.HTYPE_NODE
3966
  _OP_PARAMS = [
3967
    _PNodeName,
3968
    ("master_candidate", None, _TMaybeBool),
3969
    ("offline", None, _TMaybeBool),
3970
    ("drained", None, _TMaybeBool),
3971
    ("auto_promote", False, _TBool),
3972
    _PForce,
3973
    ]
3974
  REQ_BGL = False
3975

    
3976
  def CheckArguments(self):
3977
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3978
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3979
    if all_mods.count(None) == 3:
3980
      raise errors.OpPrereqError("Please pass at least one modification",
3981
                                 errors.ECODE_INVAL)
3982
    if all_mods.count(True) > 1:
3983
      raise errors.OpPrereqError("Can't set the node into more than one"
3984
                                 " state at the same time",
3985
                                 errors.ECODE_INVAL)
3986

    
3987
    # Boolean value that tells us whether we're offlining or draining the node
3988
    self.offline_or_drain = (self.op.offline == True or
3989
                             self.op.drained == True)
3990
    self.deoffline_or_drain = (self.op.offline == False or
3991
                               self.op.drained == False)
3992
    self.might_demote = (self.op.master_candidate == False or
3993
                         self.offline_or_drain)
3994

    
3995
    self.lock_all = self.op.auto_promote and self.might_demote
3996

    
3997

    
3998
  def ExpandNames(self):
3999
    if self.lock_all:
4000
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4001
    else:
4002
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4003

    
4004
  def BuildHooksEnv(self):
4005
    """Build hooks env.
4006

4007
    This runs on the master node.
4008

4009
    """
4010
    env = {
4011
      "OP_TARGET": self.op.node_name,
4012
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4013
      "OFFLINE": str(self.op.offline),
4014
      "DRAINED": str(self.op.drained),
4015
      }
4016
    nl = [self.cfg.GetMasterNode(),
4017
          self.op.node_name]
4018
    return env, nl, nl
4019

    
4020
  def CheckPrereq(self):
4021
    """Check prerequisites.
4022

4023
    This only checks the instance list against the existing names.
4024

4025
    """
4026
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4027

    
4028
    if (self.op.master_candidate is not None or
4029
        self.op.drained is not None or
4030
        self.op.offline is not None):
4031
      # we can't change the master's node flags
4032
      if self.op.node_name == self.cfg.GetMasterNode():
4033
        raise errors.OpPrereqError("The master role can be changed"
4034
                                   " only via master-failover",
4035
                                   errors.ECODE_INVAL)
4036

    
4037

    
4038
    if node.master_candidate and self.might_demote and not self.lock_all:
4039
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
4040
      # check if after removing the current node, we're missing master
4041
      # candidates
4042
      (mc_remaining, mc_should, _) = \
4043
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4044
      if mc_remaining < mc_should:
4045
        raise errors.OpPrereqError("Not enough master candidates, please"
4046
                                   " pass auto_promote to allow promotion",
4047
                                   errors.ECODE_INVAL)
4048

    
4049
    if (self.op.master_candidate == True and
4050
        ((node.offline and not self.op.offline == False) or
4051
         (node.drained and not self.op.drained == False))):
4052
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
4053
                                 " to master_candidate" % node.name,
4054
                                 errors.ECODE_INVAL)
4055

    
4056
    # If we're being deofflined/drained, we'll MC ourself if needed
4057
    if (self.deoffline_or_drain and not self.offline_or_drain and not
4058
        self.op.master_candidate == True and not node.master_candidate):
4059
      self.op.master_candidate = _DecideSelfPromotion(self)
4060
      if self.op.master_candidate:
4061
        self.LogInfo("Autopromoting node to master candidate")
4062

    
4063
    return
4064

    
4065
  def Exec(self, feedback_fn):
4066
    """Modifies a node.
4067

4068
    """
4069
    node = self.node
4070

    
4071
    result = []
4072
    changed_mc = False
4073

    
4074
    if self.op.offline is not None:
4075
      node.offline = self.op.offline
4076
      result.append(("offline", str(self.op.offline)))
4077
      if self.op.offline == True:
4078
        if node.master_candidate:
4079
          node.master_candidate = False
4080
          changed_mc = True
4081
          result.append(("master_candidate", "auto-demotion due to offline"))
4082
        if node.drained:
4083
          node.drained = False
4084
          result.append(("drained", "clear drained status due to offline"))
4085

    
4086
    if self.op.master_candidate is not None:
4087
      node.master_candidate = self.op.master_candidate
4088
      changed_mc = True
4089
      result.append(("master_candidate", str(self.op.master_candidate)))
4090
      if self.op.master_candidate == False:
4091
        rrc = self.rpc.call_node_demote_from_mc(node.name)
4092
        msg = rrc.fail_msg
4093
        if msg:
4094
          self.LogWarning("Node failed to demote itself: %s" % msg)
4095

    
4096
    if self.op.drained is not None:
4097
      node.drained = self.op.drained
4098
      result.append(("drained", str(self.op.drained)))
4099
      if self.op.drained == True:
4100
        if node.master_candidate:
4101
          node.master_candidate = False
4102
          changed_mc = True
4103
          result.append(("master_candidate", "auto-demotion due to drain"))
4104
          rrc = self.rpc.call_node_demote_from_mc(node.name)
4105
          msg = rrc.fail_msg
4106
          if msg:
4107
            self.LogWarning("Node failed to demote itself: %s" % msg)
4108
        if node.offline:
4109
          node.offline = False
4110
          result.append(("offline", "clear offline status due to drain"))
4111

    
4112
    # we locked all nodes, we adjust the CP before updating this node
4113
    if self.lock_all:
4114
      _AdjustCandidatePool(self, [node.name])
4115

    
4116
    # this will trigger configuration file update, if needed
4117
    self.cfg.Update(node, feedback_fn)
4118

    
4119
    # this will trigger job queue propagation or cleanup
4120
    if changed_mc:
4121
      self.context.ReaddNode(node)
4122

    
4123
    return result
4124

    
4125

    
4126
class LUPowercycleNode(NoHooksLU):
4127
  """Powercycles a node.
4128

4129
  """
4130
  _OP_PARAMS = [
4131
    _PNodeName,
4132
    _PForce,
4133
    ]
4134
  REQ_BGL = False
4135

    
4136
  def CheckArguments(self):
4137
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4138
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4139
      raise errors.OpPrereqError("The node is the master and the force"
4140
                                 " parameter was not set",
4141
                                 errors.ECODE_INVAL)
4142

    
4143
  def ExpandNames(self):
4144
    """Locking for PowercycleNode.
4145

4146
    This is a last-resort option and shouldn't block on other
4147
    jobs. Therefore, we grab no locks.
4148

4149
    """
4150
    self.needed_locks = {}
4151

    
4152
  def Exec(self, feedback_fn):
4153
    """Reboots a node.
4154

4155
    """
4156
    result = self.rpc.call_node_powercycle(self.op.node_name,
4157
                                           self.cfg.GetHypervisorType())
4158
    result.Raise("Failed to schedule the reboot")
4159
    return result.payload
4160

    
4161

    
4162
class LUQueryClusterInfo(NoHooksLU):
4163
  """Query cluster configuration.
4164

4165
  """
4166
  REQ_BGL = False
4167

    
4168
  def ExpandNames(self):
4169
    self.needed_locks = {}
4170

    
4171
  def Exec(self, feedback_fn):
4172
    """Return cluster config.
4173

4174
    """
4175
    cluster = self.cfg.GetClusterInfo()
4176
    os_hvp = {}
4177

    
4178
    # Filter just for enabled hypervisors
4179
    for os_name, hv_dict in cluster.os_hvp.items():
4180
      os_hvp[os_name] = {}
4181
      for hv_name, hv_params in hv_dict.items():
4182
        if hv_name in cluster.enabled_hypervisors:
4183
          os_hvp[os_name][hv_name] = hv_params
4184

    
4185
    result = {
4186
      "software_version": constants.RELEASE_VERSION,
4187
      "protocol_version": constants.PROTOCOL_VERSION,
4188
      "config_version": constants.CONFIG_VERSION,
4189
      "os_api_version": max(constants.OS_API_VERSIONS),
4190
      "export_version": constants.EXPORT_VERSION,
4191
      "architecture": (platform.architecture()[0], platform.machine()),
4192
      "name": cluster.cluster_name,
4193
      "master": cluster.master_node,
4194
      "default_hypervisor": cluster.enabled_hypervisors[0],
4195
      "enabled_hypervisors": cluster.enabled_hypervisors,
4196
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4197
                        for hypervisor_name in cluster.enabled_hypervisors]),
4198
      "os_hvp": os_hvp,
4199
      "beparams": cluster.beparams,
4200
      "osparams": cluster.osparams,
4201
      "nicparams": cluster.nicparams,
4202
      "candidate_pool_size": cluster.candidate_pool_size,
4203
      "master_netdev": cluster.master_netdev,
4204
      "volume_group_name": cluster.volume_group_name,
4205
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4206
      "file_storage_dir": cluster.file_storage_dir,
4207
      "maintain_node_health": cluster.maintain_node_health,
4208
      "ctime": cluster.ctime,
4209
      "mtime": cluster.mtime,
4210
      "uuid": cluster.uuid,
4211
      "tags": list(cluster.GetTags()),
4212
      "uid_pool": cluster.uid_pool,
4213
      "default_iallocator": cluster.default_iallocator,
4214
      "reserved_lvs": cluster.reserved_lvs,
4215
      }
4216

    
4217
    return result
4218

    
4219

    
4220
class LUQueryConfigValues(NoHooksLU):
4221
  """Return configuration values.
4222

4223
  """
4224
  _OP_PARAMS = [_POutputFields]
4225
  REQ_BGL = False
4226
  _FIELDS_DYNAMIC = utils.FieldSet()
4227
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4228
                                  "watcher_pause")
4229

    
4230
  def CheckArguments(self):
4231
    _CheckOutputFields(static=self._FIELDS_STATIC,
4232
                       dynamic=self._FIELDS_DYNAMIC,
4233
                       selected=self.op.output_fields)
4234

    
4235
  def ExpandNames(self):
4236
    self.needed_locks = {}
4237

    
4238
  def Exec(self, feedback_fn):
4239
    """Dump a representation of the cluster config to the standard output.
4240

4241
    """
4242
    values = []
4243
    for field in self.op.output_fields:
4244
      if field == "cluster_name":
4245
        entry = self.cfg.GetClusterName()
4246
      elif field == "master_node":
4247
        entry = self.cfg.GetMasterNode()
4248
      elif field == "drain_flag":
4249
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4250
      elif field == "watcher_pause":
4251
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4252
      else:
4253
        raise errors.ParameterError(field)
4254
      values.append(entry)
4255
    return values
4256

    
4257

    
4258
class LUActivateInstanceDisks(NoHooksLU):
4259
  """Bring up an instance's disks.
4260

4261
  """
4262
  _OP_PARAMS = [
4263
    _PInstanceName,
4264
    ("ignore_size", False, _TBool),
4265
    ]
4266
  REQ_BGL = False
4267

    
4268
  def ExpandNames(self):
4269
    self._ExpandAndLockInstance()
4270
    self.needed_locks[locking.LEVEL_NODE] = []
4271
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4272

    
4273
  def DeclareLocks(self, level):
4274
    if level == locking.LEVEL_NODE:
4275
      self._LockInstancesNodes()
4276

    
4277
  def CheckPrereq(self):
4278
    """Check prerequisites.
4279

4280
    This checks that the instance is in the cluster.
4281

4282
    """
4283
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4284
    assert self.instance is not None, \
4285
      "Cannot retrieve locked instance %s" % self.op.instance_name
4286
    _CheckNodeOnline(self, self.instance.primary_node)
4287

    
4288
  def Exec(self, feedback_fn):
4289
    """Activate the disks.
4290

4291
    """
4292
    disks_ok, disks_info = \
4293
              _AssembleInstanceDisks(self, self.instance,
4294
                                     ignore_size=self.op.ignore_size)
4295
    if not disks_ok:
4296
      raise errors.OpExecError("Cannot activate block devices")
4297

    
4298
    return disks_info
4299

    
4300

    
4301
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4302
                           ignore_size=False):
4303
  """Prepare the block devices for an instance.
4304

4305
  This sets up the block devices on all nodes.
4306

4307
  @type lu: L{LogicalUnit}
4308
  @param lu: the logical unit on whose behalf we execute
4309
  @type instance: L{objects.Instance}
4310
  @param instance: the instance for whose disks we assemble
4311
  @type disks: list of L{objects.Disk} or None
4312
  @param disks: which disks to assemble (or all, if None)
4313
  @type ignore_secondaries: boolean
4314
  @param ignore_secondaries: if true, errors on secondary nodes
4315
      won't result in an error return from the function
4316
  @type ignore_size: boolean
4317
  @param ignore_size: if true, the current known size of the disk
4318
      will not be used during the disk activation, useful for cases
4319
      when the size is wrong
4320
  @return: False if the operation failed, otherwise a list of
4321
      (host, instance_visible_name, node_visible_name)
4322
      with the mapping from node devices to instance devices
4323

4324
  """
4325
  device_info = []
4326
  disks_ok = True
4327
  iname = instance.name
4328
  disks = _ExpandCheckDisks(instance, disks)
4329

    
4330
  # With the two passes mechanism we try to reduce the window of
4331
  # opportunity for the race condition of switching DRBD to primary
4332
  # before handshaking occured, but we do not eliminate it
4333

    
4334
  # The proper fix would be to wait (with some limits) until the
4335
  # connection has been made and drbd transitions from WFConnection
4336
  # into any other network-connected state (Connected, SyncTarget,
4337
  # SyncSource, etc.)
4338

    
4339
  # 1st pass, assemble on all nodes in secondary mode
4340
  for inst_disk in disks:
4341
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4342
      if ignore_size:
4343
        node_disk = node_disk.Copy()
4344
        node_disk.UnsetSize()
4345
      lu.cfg.SetDiskID(node_disk, node)
4346
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4347
      msg = result.fail_msg
4348
      if msg:
4349
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4350
                           " (is_primary=False, pass=1): %s",
4351
                           inst_disk.iv_name, node, msg)
4352
        if not ignore_secondaries:
4353
          disks_ok = False
4354

    
4355
  # FIXME: race condition on drbd migration to primary
4356

    
4357
  # 2nd pass, do only the primary node
4358
  for inst_disk in disks:
4359
    dev_path = None
4360

    
4361
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4362
      if node != instance.primary_node:
4363
        continue
4364
      if ignore_size:
4365
        node_disk = node_disk.Copy()
4366
        node_disk.UnsetSize()
4367
      lu.cfg.SetDiskID(node_disk, node)
4368
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4369
      msg = result.fail_msg
4370
      if msg:
4371
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4372
                           " (is_primary=True, pass=2): %s",
4373
                           inst_disk.iv_name, node, msg)
4374
        disks_ok = False
4375
      else:
4376
        dev_path = result.payload
4377

    
4378
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4379

    
4380
  # leave the disks configured for the primary node
4381
  # this is a workaround that would be fixed better by
4382
  # improving the logical/physical id handling
4383
  for disk in disks:
4384
    lu.cfg.SetDiskID(disk, instance.primary_node)
4385

    
4386
  return disks_ok, device_info
4387

    
4388

    
4389
def _StartInstanceDisks(lu, instance, force):
4390
  """Start the disks of an instance.
4391

4392
  """
4393
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4394
                                           ignore_secondaries=force)
4395
  if not disks_ok:
4396
    _ShutdownInstanceDisks(lu, instance)
4397
    if force is not None and not force:
4398
      lu.proc.LogWarning("", hint="If the message above refers to a"
4399
                         " secondary node,"
4400
                         " you can retry the operation using '--force'.")
4401
    raise errors.OpExecError("Disk consistency error")
4402

    
4403

    
4404
class LUDeactivateInstanceDisks(NoHooksLU):
4405
  """Shutdown an instance's disks.
4406

4407
  """
4408
  _OP_PARAMS = [
4409
    _PInstanceName,
4410
    ]
4411
  REQ_BGL = False
4412

    
4413
  def ExpandNames(self):
4414
    self._ExpandAndLockInstance()
4415
    self.needed_locks[locking.LEVEL_NODE] = []
4416
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4417

    
4418
  def DeclareLocks(self, level):
4419
    if level == locking.LEVEL_NODE:
4420
      self._LockInstancesNodes()
4421

    
4422
  def CheckPrereq(self):
4423
    """Check prerequisites.
4424

4425
    This checks that the instance is in the cluster.
4426

4427
    """
4428
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4429
    assert self.instance is not None, \
4430
      "Cannot retrieve locked instance %s" % self.op.instance_name
4431

    
4432
  def Exec(self, feedback_fn):
4433
    """Deactivate the disks
4434

4435
    """
4436
    instance = self.instance
4437
    _SafeShutdownInstanceDisks(self, instance)
4438

    
4439

    
4440
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4441
  """Shutdown block devices of an instance.
4442

4443
  This function checks if an instance is running, before calling
4444
  _ShutdownInstanceDisks.
4445

4446
  """
4447
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4448
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4449

    
4450

    
4451
def _ExpandCheckDisks(instance, disks):
4452
  """Return the instance disks selected by the disks list
4453

4454
  @type disks: list of L{objects.Disk} or None
4455
  @param disks: selected disks
4456
  @rtype: list of L{objects.Disk}
4457
  @return: selected instance disks to act on
4458

4459
  """
4460
  if disks is None:
4461
    return instance.disks
4462
  else:
4463
    if not set(disks).issubset(instance.disks):
4464
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4465
                                   " target instance")
4466
    return disks
4467

    
4468

    
4469
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4470
  """Shutdown block devices of an instance.
4471

4472
  This does the shutdown on all nodes of the instance.
4473

4474
  If the ignore_primary is false, errors on the primary node are
4475
  ignored.
4476

4477
  """
4478
  all_result = True
4479
  disks = _ExpandCheckDisks(instance, disks)
4480

    
4481
  for disk in disks:
4482
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4483
      lu.cfg.SetDiskID(top_disk, node)
4484
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4485
      msg = result.fail_msg
4486
      if msg:
4487
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4488
                      disk.iv_name, node, msg)
4489
        if not ignore_primary or node != instance.primary_node:
4490
          all_result = False
4491
  return all_result
4492

    
4493

    
4494
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4495
  """Checks if a node has enough free memory.
4496

4497
  This function check if a given node has the needed amount of free
4498
  memory. In case the node has less memory or we cannot get the
4499
  information from the node, this function raise an OpPrereqError
4500
  exception.
4501

4502
  @type lu: C{LogicalUnit}
4503
  @param lu: a logical unit from which we get configuration data
4504
  @type node: C{str}
4505
  @param node: the node to check
4506
  @type reason: C{str}
4507
  @param reason: string to use in the error message
4508
  @type requested: C{int}
4509
  @param requested: the amount of memory in MiB to check for
4510
  @type hypervisor_name: C{str}
4511
  @param hypervisor_name: the hypervisor to ask for memory stats
4512
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4513
      we cannot check the node
4514

4515
  """
4516
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4517
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4518
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4519
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4520
  if not isinstance(free_mem, int):
4521
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4522
                               " was '%s'" % (node, free_mem),
4523
                               errors.ECODE_ENVIRON)
4524
  if requested > free_mem:
4525
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4526
                               " needed %s MiB, available %s MiB" %
4527
                               (node, reason, requested, free_mem),
4528
                               errors.ECODE_NORES)
4529

    
4530

    
4531
def _CheckNodesFreeDisk(lu, nodenames, requested):
4532
  """Checks if nodes have enough free disk space in the default VG.
4533

4534
  This function check if all given nodes have the needed amount of
4535
  free disk. In case any node has less disk or we cannot get the
4536
  information from the node, this function raise an OpPrereqError
4537
  exception.
4538

4539
  @type lu: C{LogicalUnit}
4540
  @param lu: a logical unit from which we get configuration data
4541
  @type nodenames: C{list}
4542
  @param nodenames: the list of node names to check
4543
  @type requested: C{int}
4544
  @param requested: the amount of disk in MiB to check for
4545
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4546
      we cannot check the node
4547

4548
  """
4549
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4550
                                   lu.cfg.GetHypervisorType())
4551
  for node in nodenames:
4552
    info = nodeinfo[node]
4553
    info.Raise("Cannot get current information from node %s" % node,
4554
               prereq=True, ecode=errors.ECODE_ENVIRON)
4555
    vg_free = info.payload.get("vg_free", None)
4556
    if not isinstance(vg_free, int):
4557
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4558
                                 " result was '%s'" % (node, vg_free),
4559
                                 errors.ECODE_ENVIRON)
4560
    if requested > vg_free:
4561
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4562
                                 " required %d MiB, available %d MiB" %
4563
                                 (node, requested, vg_free),
4564
                                 errors.ECODE_NORES)
4565

    
4566

    
4567
class LUStartupInstance(LogicalUnit):
4568
  """Starts an instance.
4569

4570
  """
4571
  HPATH = "instance-start"
4572
  HTYPE = constants.HTYPE_INSTANCE
4573
  _OP_PARAMS = [
4574
    _PInstanceName,
4575
    _PForce,
4576
    ("hvparams", _EmptyDict, _TDict),
4577
    ("beparams", _EmptyDict, _TDict),
4578
    ]
4579
  REQ_BGL = False
4580

    
4581
  def CheckArguments(self):
4582
    # extra beparams
4583
    if self.op.beparams:
4584
      # fill the beparams dict
4585
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4586

    
4587
  def ExpandNames(self):
4588
    self._ExpandAndLockInstance()
4589

    
4590
  def BuildHooksEnv(self):
4591
    """Build hooks env.
4592

4593
    This runs on master, primary and secondary nodes of the instance.
4594

4595
    """
4596
    env = {
4597
      "FORCE": self.op.force,
4598
      }
4599
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4600
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4601
    return env, nl, nl
4602

    
4603
  def CheckPrereq(self):
4604
    """Check prerequisites.
4605

4606
    This checks that the instance is in the cluster.
4607

4608
    """
4609
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4610
    assert self.instance is not None, \
4611
      "Cannot retrieve locked instance %s" % self.op.instance_name
4612

    
4613
    # extra hvparams
4614
    if self.op.hvparams:
4615
      # check hypervisor parameter syntax (locally)
4616
      cluster = self.cfg.GetClusterInfo()
4617
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4618
      filled_hvp = cluster.FillHV(instance)
4619
      filled_hvp.update(self.op.hvparams)
4620
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4621
      hv_type.CheckParameterSyntax(filled_hvp)
4622
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4623

    
4624
    _CheckNodeOnline(self, instance.primary_node)
4625

    
4626
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4627
    # check bridges existence
4628
    _CheckInstanceBridgesExist(self, instance)
4629

    
4630
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4631
                                              instance.name,
4632
                                              instance.hypervisor)
4633
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4634
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4635
    if not remote_info.payload: # not running already
4636
      _CheckNodeFreeMemory(self, instance.primary_node,
4637
                           "starting instance %s" % instance.name,
4638
                           bep[constants.BE_MEMORY], instance.hypervisor)
4639

    
4640
  def Exec(self, feedback_fn):
4641
    """Start the instance.
4642

4643
    """
4644
    instance = self.instance
4645
    force = self.op.force
4646

    
4647
    self.cfg.MarkInstanceUp(instance.name)
4648

    
4649
    node_current = instance.primary_node
4650

    
4651
    _StartInstanceDisks(self, instance, force)
4652

    
4653
    result = self.rpc.call_instance_start(node_current, instance,
4654
                                          self.op.hvparams, self.op.beparams)
4655
    msg = result.fail_msg
4656
    if msg:
4657
      _ShutdownInstanceDisks(self, instance)
4658
      raise errors.OpExecError("Could not start instance: %s" % msg)
4659

    
4660

    
4661
class LURebootInstance(LogicalUnit):
4662
  """Reboot an instance.
4663

4664
  """
4665
  HPATH = "instance-reboot"
4666
  HTYPE = constants.HTYPE_INSTANCE
4667
  _OP_PARAMS = [
4668
    _PInstanceName,
4669
    ("ignore_secondaries", False, _TBool),
4670
    ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4671
    _PShutdownTimeout,
4672
    ]
4673
  REQ_BGL = False
4674

    
4675
  def ExpandNames(self):
4676
    self._ExpandAndLockInstance()
4677

    
4678
  def BuildHooksEnv(self):
4679
    """Build hooks env.
4680

4681
    This runs on master, primary and secondary nodes of the instance.
4682

4683
    """
4684
    env = {
4685
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4686
      "REBOOT_TYPE": self.op.reboot_type,
4687
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4688
      }
4689
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4690
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4691
    return env, nl, nl
4692

    
4693
  def CheckPrereq(self):
4694
    """Check prerequisites.
4695

4696
    This checks that the instance is in the cluster.
4697

4698
    """
4699
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4700
    assert self.instance is not None, \
4701
      "Cannot retrieve locked instance %s" % self.op.instance_name
4702

    
4703
    _CheckNodeOnline(self, instance.primary_node)
4704

    
4705
    # check bridges existence
4706
    _CheckInstanceBridgesExist(self, instance)
4707

    
4708
  def Exec(self, feedback_fn):
4709
    """Reboot the instance.
4710

4711
    """
4712
    instance = self.instance
4713
    ignore_secondaries = self.op.ignore_secondaries
4714
    reboot_type = self.op.reboot_type
4715

    
4716
    node_current = instance.primary_node
4717

    
4718
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4719
                       constants.INSTANCE_REBOOT_HARD]:
4720
      for disk in instance.disks:
4721
        self.cfg.SetDiskID(disk, node_current)
4722
      result = self.rpc.call_instance_reboot(node_current, instance,
4723
                                             reboot_type,
4724
                                             self.op.shutdown_timeout)
4725
      result.Raise("Could not reboot instance")
4726
    else:
4727
      result = self.rpc.call_instance_shutdown(node_current, instance,
4728
                                               self.op.shutdown_timeout)
4729
      result.Raise("Could not shutdown instance for full reboot")
4730
      _ShutdownInstanceDisks(self, instance)
4731
      _StartInstanceDisks(self, instance, ignore_secondaries)
4732
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4733
      msg = result.fail_msg
4734
      if msg:
4735
        _ShutdownInstanceDisks(self, instance)
4736
        raise errors.OpExecError("Could not start instance for"
4737
                                 " full reboot: %s" % msg)
4738

    
4739
    self.cfg.MarkInstanceUp(instance.name)
4740

    
4741

    
4742
class LUShutdownInstance(LogicalUnit):
4743
  """Shutdown an instance.
4744

4745
  """
4746
  HPATH = "instance-stop"
4747
  HTYPE = constants.HTYPE_INSTANCE
4748
  _OP_PARAMS = [
4749
    _PInstanceName,
4750
    ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4751
    ]
4752
  REQ_BGL = False
4753

    
4754
  def ExpandNames(self):
4755
    self._ExpandAndLockInstance()
4756

    
4757
  def BuildHooksEnv(self):
4758
    """Build hooks env.
4759

4760
    This runs on master, primary and secondary nodes of the instance.
4761

4762
    """
4763
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4764
    env["TIMEOUT"] = self.op.timeout
4765
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4766
    return env, nl, nl
4767

    
4768
  def CheckPrereq(self):
4769
    """Check prerequisites.
4770

4771
    This checks that the instance is in the cluster.
4772

4773
    """
4774
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4775
    assert self.instance is not None, \
4776
      "Cannot retrieve locked instance %s" % self.op.instance_name
4777
    _CheckNodeOnline(self, self.instance.primary_node)
4778

    
4779
  def Exec(self, feedback_fn):
4780
    """Shutdown the instance.
4781

4782
    """
4783
    instance = self.instance
4784
    node_current = instance.primary_node
4785
    timeout = self.op.timeout
4786
    self.cfg.MarkInstanceDown(instance.name)
4787
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4788
    msg = result.fail_msg
4789
    if msg:
4790
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4791

    
4792
    _ShutdownInstanceDisks(self, instance)
4793

    
4794

    
4795
class LUReinstallInstance(LogicalUnit):
4796
  """Reinstall an instance.
4797

4798
  """
4799
  HPATH = "instance-reinstall"
4800
  HTYPE = constants.HTYPE_INSTANCE
4801
  _OP_PARAMS = [
4802
    _PInstanceName,
4803
    ("os_type", None, _TMaybeString),
4804
    ("force_variant", False, _TBool),
4805
    ]
4806
  REQ_BGL = False
4807

    
4808
  def ExpandNames(self):
4809
    self._ExpandAndLockInstance()
4810

    
4811
  def BuildHooksEnv(self):
4812
    """Build hooks env.
4813

4814
    This runs on master, primary and secondary nodes of the instance.
4815

4816
    """
4817
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4818
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4819
    return env, nl, nl
4820

    
4821
  def CheckPrereq(self):
4822
    """Check prerequisites.
4823

4824
    This checks that the instance is in the cluster and is not running.
4825

4826
    """
4827
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4828
    assert instance is not None, \
4829
      "Cannot retrieve locked instance %s" % self.op.instance_name
4830
    _CheckNodeOnline(self, instance.primary_node)
4831

    
4832
    if instance.disk_template == constants.DT_DISKLESS:
4833
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4834
                                 self.op.instance_name,
4835
                                 errors.ECODE_INVAL)
4836
    _CheckInstanceDown(self, instance, "cannot reinstall")
4837

    
4838
    if self.op.os_type is not None:
4839
      # OS verification
4840
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4841
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4842

    
4843
    self.instance = instance
4844

    
4845
  def Exec(self, feedback_fn):
4846
    """Reinstall the instance.
4847

4848
    """
4849
    inst = self.instance
4850

    
4851
    if self.op.os_type is not None:
4852
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4853
      inst.os = self.op.os_type
4854
      self.cfg.Update(inst, feedback_fn)
4855

    
4856
    _StartInstanceDisks(self, inst, None)
4857
    try:
4858
      feedback_fn("Running the instance OS create scripts...")
4859
      # FIXME: pass debug option from opcode to backend
4860
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4861
                                             self.op.debug_level)
4862
      result.Raise("Could not install OS for instance %s on node %s" %
4863
                   (inst.name, inst.primary_node))
4864
    finally:
4865
      _ShutdownInstanceDisks(self, inst)
4866

    
4867

    
4868
class LURecreateInstanceDisks(LogicalUnit):
4869
  """Recreate an instance's missing disks.
4870

4871
  """
4872
  HPATH = "instance-recreate-disks"
4873
  HTYPE = constants.HTYPE_INSTANCE
4874
  _OP_PARAMS = [
4875
    _PInstanceName,
4876
    ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4877
    ]
4878
  REQ_BGL = False
4879

    
4880
  def ExpandNames(self):
4881
    self._ExpandAndLockInstance()
4882

    
4883
  def BuildHooksEnv(self):
4884
    """Build hooks env.
4885

4886
    This runs on master, primary and secondary nodes of the instance.
4887

4888
    """
4889
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4890
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4891
    return env, nl, nl
4892

    
4893
  def CheckPrereq(self):
4894
    """Check prerequisites.
4895

4896
    This checks that the instance is in the cluster and is not running.
4897

4898
    """
4899
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4900
    assert instance is not None, \
4901
      "Cannot retrieve locked instance %s" % self.op.instance_name
4902
    _CheckNodeOnline(self, instance.primary_node)
4903

    
4904
    if instance.disk_template == constants.DT_DISKLESS:
4905
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4906
                                 self.op.instance_name, errors.ECODE_INVAL)
4907
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4908

    
4909
    if not self.op.disks:
4910
      self.op.disks = range(len(instance.disks))
4911
    else:
4912
      for idx in self.op.disks:
4913
        if idx >= len(instance.disks):
4914
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4915
                                     errors.ECODE_INVAL)
4916

    
4917
    self.instance = instance
4918

    
4919
  def Exec(self, feedback_fn):
4920
    """Recreate the disks.
4921

4922
    """
4923
    to_skip = []
4924
    for idx, _ in enumerate(self.instance.disks):
4925
      if idx not in self.op.disks: # disk idx has not been passed in
4926
        to_skip.append(idx)
4927
        continue
4928

    
4929
    _CreateDisks(self, self.instance, to_skip=to_skip)
4930

    
4931

    
4932
class LURenameInstance(LogicalUnit):
4933
  """Rename an instance.
4934

4935
  """
4936
  HPATH = "instance-rename"
4937
  HTYPE = constants.HTYPE_INSTANCE
4938
  _OP_PARAMS = [
4939
    _PInstanceName,
4940
    ("new_name", _NoDefault, _TNonEmptyString),
4941
    ("ip_check", False, _TBool),
4942
    ("name_check", True, _TBool),
4943
    ]
4944

    
4945
  def CheckArguments(self):
4946
    """Check arguments.
4947

4948
    """
4949
    if self.op.ip_check and not self.op.name_check:
4950
      # TODO: make the ip check more flexible and not depend on the name check
4951
      raise errors.OpPrereqError("Cannot do ip check without a name check",
4952
                                 errors.ECODE_INVAL)
4953

    
4954
  def BuildHooksEnv(self):
4955
    """Build hooks env.
4956

4957
    This runs on master, primary and secondary nodes of the instance.
4958

4959
    """
4960
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4961
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4962
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4963
    return env, nl, nl
4964

    
4965
  def CheckPrereq(self):
4966
    """Check prerequisites.
4967

4968
    This checks that the instance is in the cluster and is not running.
4969

4970
    """
4971
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4972
                                                self.op.instance_name)
4973
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4974
    assert instance is not None
4975
    _CheckNodeOnline(self, instance.primary_node)
4976
    _CheckInstanceDown(self, instance, "cannot rename")
4977
    self.instance = instance
4978

    
4979
    new_name = self.op.new_name
4980
    if self.op.name_check:
4981
      hostinfo = netutils.HostInfo(netutils.HostInfo.NormalizeName(new_name))
4982
      new_name = hostinfo.name
4983
      if (self.op.ip_check and
4984
          netutils.TcpPing(hostinfo.ip, constants.DEFAULT_NODED_PORT)):
4985
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4986
                                   (hostinfo.ip, new_name),
4987
                                   errors.ECODE_NOTUNIQUE)
4988

    
4989
    instance_list = self.cfg.GetInstanceList()
4990
    if new_name in instance_list:
4991
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4992
                                 new_name, errors.ECODE_EXISTS)
4993

    
4994

    
4995
  def Exec(self, feedback_fn):
4996
    """Reinstall the instance.
4997

4998
    """
4999
    inst = self.instance
5000
    old_name = inst.name
5001

    
5002
    if inst.disk_template == constants.DT_FILE:
5003
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5004

    
5005
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5006
    # Change the instance lock. This is definitely safe while we hold the BGL
5007
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5008
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5009

    
5010
    # re-read the instance from the configuration after rename
5011
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5012

    
5013
    if inst.disk_template == constants.DT_FILE:
5014
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5015
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5016
                                                     old_file_storage_dir,
5017
                                                     new_file_storage_dir)
5018
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5019
                   " (but the instance has been renamed in Ganeti)" %
5020
                   (inst.primary_node, old_file_storage_dir,
5021
                    new_file_storage_dir))
5022

    
5023
    _StartInstanceDisks(self, inst, None)
5024
    try:
5025
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5026
                                                 old_name, self.op.debug_level)
5027
      msg = result.fail_msg
5028
      if msg:
5029
        msg = ("Could not run OS rename script for instance %s on node %s"
5030
               " (but the instance has been renamed in Ganeti): %s" %
5031
               (inst.name, inst.primary_node, msg))
5032
        self.proc.LogWarning(msg)
5033
    finally:
5034
      _ShutdownInstanceDisks(self, inst)
5035

    
5036
    return inst.name
5037

    
5038

    
5039
class LURemoveInstance(LogicalUnit):
5040
  """Remove an instance.
5041

5042
  """
5043
  HPATH = "instance-remove"
5044
  HTYPE = constants.HTYPE_INSTANCE
5045
  _OP_PARAMS = [
5046
    _PInstanceName,
5047
    ("ignore_failures", False, _TBool),
5048
    _PShutdownTimeout,
5049
    ]
5050
  REQ_BGL = False
5051

    
5052
  def ExpandNames(self):
5053
    self._ExpandAndLockInstance()
5054
    self.needed_locks[locking.LEVEL_NODE] = []
5055
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5056

    
5057
  def DeclareLocks(self, level):
5058
    if level == locking.LEVEL_NODE:
5059
      self._LockInstancesNodes()
5060

    
5061
  def BuildHooksEnv(self):
5062
    """Build hooks env.
5063

5064
    This runs on master, primary and secondary nodes of the instance.
5065

5066
    """
5067
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5068
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5069
    nl = [self.cfg.GetMasterNode()]
5070
    nl_post = list(self.instance.all_nodes) + nl
5071
    return env, nl, nl_post
5072

    
5073
  def CheckPrereq(self):
5074
    """Check prerequisites.
5075

5076
    This checks that the instance is in the cluster.
5077

5078
    """
5079
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5080
    assert self.instance is not None, \
5081
      "Cannot retrieve locked instance %s" % self.op.instance_name
5082

    
5083
  def Exec(self, feedback_fn):
5084
    """Remove the instance.
5085

5086
    """
5087
    instance = self.instance
5088
    logging.info("Shutting down instance %s on node %s",
5089
                 instance.name, instance.primary_node)
5090

    
5091
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5092
                                             self.op.shutdown_timeout)
5093
    msg = result.fail_msg
5094
    if msg:
5095
      if self.op.ignore_failures:
5096
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5097
      else:
5098
        raise errors.OpExecError("Could not shutdown instance %s on"
5099
                                 " node %s: %s" %
5100
                                 (instance.name, instance.primary_node, msg))
5101

    
5102
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5103

    
5104

    
5105
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5106
  """Utility function to remove an instance.
5107

5108
  """
5109
  logging.info("Removing block devices for instance %s", instance.name)
5110

    
5111
  if not _RemoveDisks(lu, instance):
5112
    if not ignore_failures:
5113
      raise errors.OpExecError("Can't remove instance's disks")
5114
    feedback_fn("Warning: can't remove instance's disks")
5115

    
5116
  logging.info("Removing instance %s out of cluster config", instance.name)
5117

    
5118
  lu.cfg.RemoveInstance(instance.name)
5119

    
5120
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5121
    "Instance lock removal conflict"
5122

    
5123
  # Remove lock for the instance
5124
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5125

    
5126

    
5127
class LUQueryInstances(NoHooksLU):
5128
  """Logical unit for querying instances.
5129

5130
  """
5131
  # pylint: disable-msg=W0142
5132
  _OP_PARAMS = [
5133
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5134
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5135
    ("use_locking", False, _TBool),
5136
    ]
5137
  REQ_BGL = False
5138
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5139
                    "serial_no", "ctime", "mtime", "uuid"]
5140
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5141
                                    "admin_state",
5142
                                    "disk_template", "ip", "mac", "bridge",
5143
                                    "nic_mode", "nic_link",
5144
                                    "sda_size", "sdb_size", "vcpus", "tags",
5145
                                    "network_port", "beparams",
5146
                                    r"(disk)\.(size)/([0-9]+)",
5147
                                    r"(disk)\.(sizes)", "disk_usage",
5148
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5149
                                    r"(nic)\.(bridge)/([0-9]+)",
5150
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
5151
                                    r"(disk|nic)\.(count)",
5152
                                    "hvparams",
5153
                                    ] + _SIMPLE_FIELDS +
5154
                                  ["hv/%s" % name
5155
                                   for name in constants.HVS_PARAMETERS
5156
                                   if name not in constants.HVC_GLOBALS] +
5157
                                  ["be/%s" % name
5158
                                   for name in constants.BES_PARAMETERS])
5159
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5160
                                   "oper_ram",
5161
                                   "oper_vcpus",
5162
                                   "status")
5163

    
5164

    
5165
  def CheckArguments(self):
5166
    _CheckOutputFields(static=self._FIELDS_STATIC,
5167
                       dynamic=self._FIELDS_DYNAMIC,
5168
                       selected=self.op.output_fields)
5169

    
5170
  def ExpandNames(self):
5171
    self.needed_locks = {}
5172
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5173
    self.share_locks[locking.LEVEL_NODE] = 1
5174

    
5175
    if self.op.names:
5176
      self.wanted = _GetWantedInstances(self, self.op.names)
5177
    else:
5178
      self.wanted = locking.ALL_SET
5179

    
5180
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5181
    self.do_locking = self.do_node_query and self.op.use_locking
5182
    if self.do_locking:
5183
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5184
      self.needed_locks[locking.LEVEL_NODE] = []
5185
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5186

    
5187
  def DeclareLocks(self, level):
5188
    if level == locking.LEVEL_NODE and self.do_locking:
5189
      self._LockInstancesNodes()
5190

    
5191
  def Exec(self, feedback_fn):
5192
    """Computes the list of nodes and their attributes.
5193

5194
    """
5195
    # pylint: disable-msg=R0912
5196
    # way too many branches here
5197
    all_info = self.cfg.GetAllInstancesInfo()
5198
    if self.wanted == locking.ALL_SET:
5199
      # caller didn't specify instance names, so ordering is not important
5200
      if self.do_locking:
5201
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5202
      else:
5203
        instance_names = all_info.keys()
5204
      instance_names = utils.NiceSort(instance_names)
5205
    else:
5206
      # caller did specify names, so we must keep the ordering
5207
      if self.do_locking:
5208
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5209
      else:
5210
        tgt_set = all_info.keys()
5211
      missing = set(self.wanted).difference(tgt_set)
5212
      if missing:
5213
        raise errors.OpExecError("Some instances were removed before"
5214
                                 " retrieving their data: %s" % missing)
5215
      instance_names = self.wanted
5216

    
5217
    instance_list = [all_info[iname] for iname in instance_names]
5218

    
5219
    # begin data gathering
5220

    
5221
    nodes = frozenset([inst.primary_node for inst in instance_list])
5222
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
5223

    
5224
    bad_nodes = []
5225
    off_nodes = []
5226
    if self.do_node_query:
5227
      live_data = {}
5228
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5229
      for name in nodes:
5230
        result = node_data[name]
5231
        if result.offline:
5232
          # offline nodes will be in both lists
5233
          off_nodes.append(name)
5234
        if result.fail_msg:
5235
          bad_nodes.append(name)
5236
        else:
5237
          if result.payload:
5238
            live_data.update(result.payload)
5239
          # else no instance is alive
5240
    else:
5241
      live_data = dict([(name, {}) for name in instance_names])
5242

    
5243
    # end data gathering
5244

    
5245
    HVPREFIX = "hv/"
5246
    BEPREFIX = "be/"
5247
    output = []
5248
    cluster = self.cfg.GetClusterInfo()
5249
    for instance in instance_list:
5250
      iout = []
5251
      i_hv = cluster.FillHV(instance, skip_globals=True)
5252
      i_be = cluster.FillBE(instance)
5253
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5254
      for field in self.op.output_fields:
5255
        st_match = self._FIELDS_STATIC.Matches(field)
5256
        if field in self._SIMPLE_FIELDS:
5257
          val = getattr(instance, field)
5258
        elif field == "pnode":
5259
          val = instance.primary_node
5260
        elif field == "snodes":
5261
          val = list(instance.secondary_nodes)
5262
        elif field == "admin_state":
5263
          val = instance.admin_up
5264
        elif field == "oper_state":
5265
          if instance.primary_node in bad_nodes:
5266
            val = None
5267
          else:
5268
            val = bool(live_data.get(instance.name))
5269
        elif field == "status":
5270
          if instance.primary_node in off_nodes:
5271
            val = "ERROR_nodeoffline"
5272
          elif instance.primary_node in bad_nodes:
5273
            val = "ERROR_nodedown"
5274
          else:
5275
            running = bool(live_data.get(instance.name))
5276
            if running:
5277
              if instance.admin_up:
5278
                val = "running"
5279
              else:
5280
                val = "ERROR_up"
5281
            else:
5282
              if instance.admin_up:
5283
                val = "ERROR_down"
5284
              else:
5285
                val = "ADMIN_down"
5286
        elif field == "oper_ram":
5287
          if instance.primary_node in bad_nodes:
5288
            val = None
5289
          elif instance.name in live_data:
5290
            val = live_data[instance.name].get("memory", "?")
5291
          else:
5292
            val = "-"
5293
        elif field == "oper_vcpus":
5294
          if instance.primary_node in bad_nodes:
5295
            val = None
5296
          elif instance.name in live_data:
5297
            val = live_data[instance.name].get("vcpus", "?")
5298
          else:
5299
            val = "-"
5300
        elif field == "vcpus":
5301
          val = i_be[constants.BE_VCPUS]
5302
        elif field == "disk_template":
5303
          val = instance.disk_template
5304
        elif field == "ip":
5305
          if instance.nics:
5306
            val = instance.nics[0].ip
5307
          else:
5308
            val = None
5309
        elif field == "nic_mode":
5310
          if instance.nics:
5311
            val = i_nicp[0][constants.NIC_MODE]
5312
          else:
5313
            val = None
5314
        elif field == "nic_link":
5315
          if instance.nics:
5316
            val = i_nicp[0][constants.NIC_LINK]
5317
          else:
5318
            val = None
5319
        elif field == "bridge":
5320
          if (instance.nics and
5321
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5322
            val = i_nicp[0][constants.NIC_LINK]
5323
          else:
5324
            val = None
5325
        elif field == "mac":
5326
          if instance.nics:
5327
            val = instance.nics[0].mac
5328
          else:
5329
            val = None
5330
        elif field == "sda_size" or field == "sdb_size":
5331
          idx = ord(field[2]) - ord('a')
5332
          try:
5333
            val = instance.FindDisk(idx).size
5334
          except errors.OpPrereqError:
5335
            val = None
5336
        elif field == "disk_usage": # total disk usage per node
5337
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5338
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5339
        elif field == "tags":
5340
          val = list(instance.GetTags())
5341
        elif field == "hvparams":
5342
          val = i_hv
5343
        elif (field.startswith(HVPREFIX) and
5344
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5345
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5346
          val = i_hv.get(field[len(HVPREFIX):], None)
5347
        elif field == "beparams":
5348
          val = i_be
5349
        elif (field.startswith(BEPREFIX) and
5350
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5351
          val = i_be.get(field[len(BEPREFIX):], None)
5352
        elif st_match and st_match.groups():
5353
          # matches a variable list
5354
          st_groups = st_match.groups()
5355
          if st_groups and st_groups[0] == "disk":
5356
            if st_groups[1] == "count":
5357
              val = len(instance.disks)
5358
            elif st_groups[1] == "sizes":
5359
              val = [disk.size for disk in instance.disks]
5360
            elif st_groups[1] == "size":
5361
              try:
5362
                val = instance.FindDisk(st_groups[2]).size
5363
              except errors.OpPrereqError:
5364
                val = None
5365
            else:
5366
              assert False, "Unhandled disk parameter"
5367
          elif st_groups[0] == "nic":
5368
            if st_groups[1] == "count":
5369
              val = len(instance.nics)
5370
            elif st_groups[1] == "macs":
5371
              val = [nic.mac for nic in instance.nics]
5372
            elif st_groups[1] == "ips":
5373
              val = [nic.ip for nic in instance.nics]
5374
            elif st_groups[1] == "modes":
5375
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5376
            elif st_groups[1] == "links":
5377
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5378
            elif st_groups[1] == "bridges":
5379
              val = []
5380
              for nicp in i_nicp:
5381
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5382
                  val.append(nicp[constants.NIC_LINK])
5383
                else:
5384
                  val.append(None)
5385
            else:
5386
              # index-based item
5387
              nic_idx = int(st_groups[2])
5388
              if nic_idx >= len(instance.nics):
5389
                val = None
5390
              else:
5391
                if st_groups[1] == "mac":
5392
                  val = instance.nics[nic_idx].mac
5393
                elif st_groups[1] == "ip":
5394
                  val = instance.nics[nic_idx].ip
5395
                elif st_groups[1] == "mode":
5396
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5397
                elif st_groups[1] == "link":
5398
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5399
                elif st_groups[1] == "bridge":
5400
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5401
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5402
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5403
                  else:
5404
                    val = None
5405
                else:
5406
                  assert False, "Unhandled NIC parameter"
5407
          else:
5408
            assert False, ("Declared but unhandled variable parameter '%s'" %
5409
                           field)
5410
        else:
5411
          assert False, "Declared but unhandled parameter '%s'" % field
5412
        iout.append(val)
5413
      output.append(iout)
5414

    
5415
    return output
5416

    
5417

    
5418
class LUFailoverInstance(LogicalUnit):
5419
  """Failover an instance.
5420

5421
  """
5422
  HPATH = "instance-failover"
5423
  HTYPE = constants.HTYPE_INSTANCE
5424
  _OP_PARAMS = [
5425
    _PInstanceName,
5426
    ("ignore_consistency", False, _TBool),
5427
    _PShutdownTimeout,
5428
    ]
5429
  REQ_BGL = False
5430

    
5431
  def ExpandNames(self):
5432
    self._ExpandAndLockInstance()
5433
    self.needed_locks[locking.LEVEL_NODE] = []
5434
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5435

    
5436
  def DeclareLocks(self, level):
5437
    if level == locking.LEVEL_NODE:
5438
      self._LockInstancesNodes()
5439

    
5440
  def BuildHooksEnv(self):
5441
    """Build hooks env.
5442

5443
    This runs on master, primary and secondary nodes of the instance.
5444

5445
    """
5446
    instance = self.instance
5447
    source_node = instance.primary_node
5448
    target_node = instance.secondary_nodes[0]
5449
    env = {
5450
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5451
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5452
      "OLD_PRIMARY": source_node,
5453
      "OLD_SECONDARY": target_node,
5454
      "NEW_PRIMARY": target_node,
5455
      "NEW_SECONDARY": source_node,
5456
      }
5457
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5458
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5459
    nl_post = list(nl)
5460
    nl_post.append(source_node)
5461
    return env, nl, nl_post
5462

    
5463
  def CheckPrereq(self):
5464
    """Check prerequisites.
5465

5466
    This checks that the instance is in the cluster.
5467

5468
    """
5469
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5470
    assert self.instance is not None, \
5471
      "Cannot retrieve locked instance %s" % self.op.instance_name
5472

    
5473
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5474
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5475
      raise errors.OpPrereqError("Instance's disk layout is not"
5476
                                 " network mirrored, cannot failover.",
5477
                                 errors.ECODE_STATE)
5478

    
5479
    secondary_nodes = instance.secondary_nodes
5480
    if not secondary_nodes:
5481
      raise errors.ProgrammerError("no secondary node but using "
5482
                                   "a mirrored disk template")
5483

    
5484
    target_node = secondary_nodes[0]
5485
    _CheckNodeOnline(self, target_node)
5486
    _CheckNodeNotDrained(self, target_node)
5487
    if instance.admin_up:
5488
      # check memory requirements on the secondary node
5489
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5490
                           instance.name, bep[constants.BE_MEMORY],
5491
                           instance.hypervisor)
5492
    else:
5493
      self.LogInfo("Not checking memory on the secondary node as"
5494
                   " instance will not be started")
5495

    
5496
    # check bridge existance
5497
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5498

    
5499
  def Exec(self, feedback_fn):
5500
    """Failover an instance.
5501

5502
    The failover is done by shutting it down on its present node and
5503
    starting it on the secondary.
5504

5505
    """
5506
    instance = self.instance
5507

    
5508
    source_node = instance.primary_node
5509
    target_node = instance.secondary_nodes[0]
5510

    
5511
    if instance.admin_up:
5512
      feedback_fn("* checking disk consistency between source and target")
5513
      for dev in instance.disks:
5514
        # for drbd, these are drbd over lvm
5515
        if not _CheckDiskConsistency(self, dev, target_node, False):
5516
          if not self.op.ignore_consistency:
5517
            raise errors.OpExecError("Disk %s is degraded on target node,"
5518
                                     " aborting failover." % dev.iv_name)
5519
    else:
5520
      feedback_fn("* not checking disk consistency as instance is not running")
5521

    
5522
    feedback_fn("* shutting down instance on source node")
5523
    logging.info("Shutting down instance %s on node %s",
5524
                 instance.name, source_node)
5525

    
5526
    result = self.rpc.call_instance_shutdown(source_node, instance,
5527
                                             self.op.shutdown_timeout)
5528
    msg = result.fail_msg
5529
    if msg:
5530
      if self.op.ignore_consistency:
5531
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5532
                             " Proceeding anyway. Please make sure node"
5533
                             " %s is down. Error details: %s",
5534
                             instance.name, source_node, source_node, msg)
5535
      else:
5536
        raise errors.OpExecError("Could not shutdown instance %s on"
5537
                                 " node %s: %s" %
5538
                                 (instance.name, source_node, msg))
5539

    
5540
    feedback_fn("* deactivating the instance's disks on source node")
5541
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5542
      raise errors.OpExecError("Can't shut down the instance's disks.")
5543

    
5544
    instance.primary_node = target_node
5545
    # distribute new instance config to the other nodes
5546
    self.cfg.Update(instance, feedback_fn)
5547

    
5548
    # Only start the instance if it's marked as up
5549
    if instance.admin_up:
5550
      feedback_fn("* activating the instance's disks on target node")
5551
      logging.info("Starting instance %s on node %s",
5552
                   instance.name, target_node)
5553

    
5554
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5555
                                           ignore_secondaries=True)
5556
      if not disks_ok:
5557
        _ShutdownInstanceDisks(self, instance)
5558
        raise errors.OpExecError("Can't activate the instance's disks")
5559

    
5560
      feedback_fn("* starting the instance on the target node")
5561
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5562
      msg = result.fail_msg
5563
      if msg:
5564
        _ShutdownInstanceDisks(self, instance)
5565
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5566
                                 (instance.name, target_node, msg))
5567

    
5568

    
5569
class LUMigrateInstance(LogicalUnit):
5570
  """Migrate an instance.
5571

5572
  This is migration without shutting down, compared to the failover,
5573
  which is done with shutdown.
5574

5575
  """
5576
  HPATH = "instance-migrate"
5577
  HTYPE = constants.HTYPE_INSTANCE
5578
  _OP_PARAMS = [
5579
    _PInstanceName,
5580
    _PMigrationMode,
5581
    _PMigrationLive,
5582
    ("cleanup", False, _TBool),
5583
    ]
5584

    
5585
  REQ_BGL = False
5586

    
5587
  def ExpandNames(self):
5588
    self._ExpandAndLockInstance()
5589

    
5590
    self.needed_locks[locking.LEVEL_NODE] = []
5591
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5592

    
5593
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5594
                                       self.op.cleanup)
5595
    self.tasklets = [self._migrater]
5596

    
5597
  def DeclareLocks(self, level):
5598
    if level == locking.LEVEL_NODE:
5599
      self._LockInstancesNodes()
5600

    
5601
  def BuildHooksEnv(self):
5602
    """Build hooks env.
5603

5604
    This runs on master, primary and secondary nodes of the instance.
5605

5606
    """
5607
    instance = self._migrater.instance
5608
    source_node = instance.primary_node
5609
    target_node = instance.secondary_nodes[0]
5610
    env = _BuildInstanceHookEnvByObject(self, instance)
5611
    env["MIGRATE_LIVE"] = self._migrater.live
5612
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5613
    env.update({
5614
        "OLD_PRIMARY": source_node,
5615
        "OLD_SECONDARY": target_node,
5616
        "NEW_PRIMARY": target_node,
5617
        "NEW_SECONDARY": source_node,
5618
        })
5619
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5620
    nl_post = list(nl)
5621
    nl_post.append(source_node)
5622
    return env, nl, nl_post
5623

    
5624

    
5625
class LUMoveInstance(LogicalUnit):
5626
  """Move an instance by data-copying.
5627

5628
  """
5629
  HPATH = "instance-move"
5630
  HTYPE = constants.HTYPE_INSTANCE
5631
  _OP_PARAMS = [
5632
    _PInstanceName,
5633
    ("target_node", _NoDefault, _TNonEmptyString),
5634
    _PShutdownTimeout,
5635
    ]
5636
  REQ_BGL = False
5637

    
5638
  def ExpandNames(self):
5639
    self._ExpandAndLockInstance()
5640
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5641
    self.op.target_node = target_node
5642
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5643
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5644

    
5645
  def DeclareLocks(self, level):
5646
    if level == locking.LEVEL_NODE:
5647
      self._LockInstancesNodes(primary_only=True)
5648

    
5649
  def BuildHooksEnv(self):
5650
    """Build hooks env.
5651

5652
    This runs on master, primary and secondary nodes of the instance.
5653

5654
    """
5655
    env = {
5656
      "TARGET_NODE": self.op.target_node,
5657
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5658
      }
5659
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5660
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5661
                                       self.op.target_node]
5662
    return env, nl, nl
5663

    
5664
  def CheckPrereq(self):
5665
    """Check prerequisites.
5666

5667
    This checks that the instance is in the cluster.
5668

5669
    """
5670
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5671
    assert self.instance is not None, \
5672
      "Cannot retrieve locked instance %s" % self.op.instance_name
5673

    
5674
    node = self.cfg.GetNodeInfo(self.op.target_node)
5675
    assert node is not None, \
5676
      "Cannot retrieve locked node %s" % self.op.target_node
5677

    
5678
    self.target_node = target_node = node.name
5679

    
5680
    if target_node == instance.primary_node:
5681
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5682
                                 (instance.name, target_node),
5683
                                 errors.ECODE_STATE)
5684

    
5685
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5686

    
5687
    for idx, dsk in enumerate(instance.disks):
5688
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5689
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5690
                                   " cannot copy" % idx, errors.ECODE_STATE)
5691

    
5692
    _CheckNodeOnline(self, target_node)
5693
    _CheckNodeNotDrained(self, target_node)
5694

    
5695
    if instance.admin_up:
5696
      # check memory requirements on the secondary node
5697
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5698
                           instance.name, bep[constants.BE_MEMORY],
5699
                           instance.hypervisor)
5700
    else:
5701
      self.LogInfo("Not checking memory on the secondary node as"
5702
                   " instance will not be started")
5703

    
5704
    # check bridge existance
5705
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5706

    
5707
  def Exec(self, feedback_fn):
5708
    """Move an instance.
5709

5710
    The move is done by shutting it down on its present node, copying
5711
    the data over (slow) and starting it on the new node.
5712

5713
    """
5714
    instance = self.instance
5715

    
5716
    source_node = instance.primary_node
5717
    target_node = self.target_node
5718

    
5719
    self.LogInfo("Shutting down instance %s on source node %s",
5720
                 instance.name, source_node)
5721

    
5722
    result = self.rpc.call_instance_shutdown(source_node, instance,
5723
                                             self.op.shutdown_timeout)
5724
    msg = result.fail_msg
5725
    if msg:
5726
      if self.op.ignore_consistency:
5727
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5728
                             " Proceeding anyway. Please make sure node"
5729
                             " %s is down. Error details: %s",
5730
                             instance.name, source_node, source_node, msg)
5731
      else:
5732
        raise errors.OpExecError("Could not shutdown instance %s on"
5733
                                 " node %s: %s" %
5734
                                 (instance.name, source_node, msg))
5735

    
5736
    # create the target disks
5737
    try:
5738
      _CreateDisks(self, instance, target_node=target_node)
5739
    except errors.OpExecError:
5740
      self.LogWarning("Device creation failed, reverting...")
5741
      try:
5742
        _RemoveDisks(self, instance, target_node=target_node)
5743
      finally:
5744
        self.cfg.ReleaseDRBDMinors(instance.name)
5745
        raise
5746

    
5747
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5748

    
5749
    errs = []
5750
    # activate, get path, copy the data over
5751
    for idx, disk in enumerate(instance.disks):
5752
      self.LogInfo("Copying data for disk %d", idx)
5753
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5754
                                               instance.name, True)
5755
      if result.fail_msg:
5756
        self.LogWarning("Can't assemble newly created disk %d: %s",
5757
                        idx, result.fail_msg)
5758
        errs.append(result.fail_msg)
5759
        break
5760
      dev_path = result.payload
5761
      result = self.rpc.call_blockdev_export(source_node, disk,
5762
                                             target_node, dev_path,
5763
                                             cluster_name)
5764
      if result.fail_msg:
5765
        self.LogWarning("Can't copy data over for disk %d: %s",
5766
                        idx, result.fail_msg)
5767
        errs.append(result.fail_msg)
5768
        break
5769

    
5770
    if errs:
5771
      self.LogWarning("Some disks failed to copy, aborting")
5772
      try:
5773
        _RemoveDisks(self, instance, target_node=target_node)
5774
      finally:
5775
        self.cfg.ReleaseDRBDMinors(instance.name)
5776
        raise errors.OpExecError("Errors during disk copy: %s" %
5777
                                 (",".join(errs),))
5778

    
5779
    instance.primary_node = target_node
5780
    self.cfg.Update(instance, feedback_fn)
5781

    
5782
    self.LogInfo("Removing the disks on the original node")
5783
    _RemoveDisks(self, instance, target_node=source_node)
5784

    
5785
    # Only start the instance if it's marked as up
5786
    if instance.admin_up:
5787
      self.LogInfo("Starting instance %s on node %s",
5788
                   instance.name, target_node)
5789

    
5790
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5791
                                           ignore_secondaries=True)
5792
      if not disks_ok:
5793
        _ShutdownInstanceDisks(self, instance)
5794
        raise errors.OpExecError("Can't activate the instance's disks")
5795

    
5796
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5797
      msg = result.fail_msg
5798
      if msg:
5799
        _ShutdownInstanceDisks(self, instance)
5800
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5801
                                 (instance.name, target_node, msg))
5802

    
5803

    
5804
class LUMigrateNode(LogicalUnit):
5805
  """Migrate all instances from a node.
5806

5807
  """
5808
  HPATH = "node-migrate"
5809
  HTYPE = constants.HTYPE_NODE
5810
  _OP_PARAMS = [
5811
    _PNodeName,
5812
    _PMigrationMode,
5813
    _PMigrationLive,
5814
    ]
5815
  REQ_BGL = False
5816

    
5817
  def ExpandNames(self):
5818
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5819

    
5820
    self.needed_locks = {
5821
      locking.LEVEL_NODE: [self.op.node_name],
5822
      }
5823

    
5824
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5825

    
5826
    # Create tasklets for migrating instances for all instances on this node
5827
    names = []
5828
    tasklets = []
5829

    
5830
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5831
      logging.debug("Migrating instance %s", inst.name)
5832
      names.append(inst.name)
5833

    
5834
      tasklets.append(TLMigrateInstance(self, inst.name, False))
5835

    
5836
    self.tasklets = tasklets
5837

    
5838
    # Declare instance locks
5839
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5840

    
5841
  def DeclareLocks(self, level):
5842
    if level == locking.LEVEL_NODE:
5843
      self._LockInstancesNodes()
5844

    
5845
  def BuildHooksEnv(self):
5846
    """Build hooks env.
5847

5848
    This runs on the master, the primary and all the secondaries.
5849

5850
    """
5851
    env = {
5852
      "NODE_NAME": self.op.node_name,
5853
      }
5854

    
5855
    nl = [self.cfg.GetMasterNode()]
5856

    
5857
    return (env, nl, nl)
5858

    
5859

    
5860
class TLMigrateInstance(Tasklet):
5861
  """Tasklet class for instance migration.
5862

5863
  @type live: boolean
5864
  @ivar live: whether the migration will be done live or non-live;
5865
      this variable is initalized only after CheckPrereq has run
5866

5867
  """
5868
  def __init__(self, lu, instance_name, cleanup):
5869
    """Initializes this class.
5870

5871
    """
5872
    Tasklet.__init__(self, lu)
5873

    
5874
    # Parameters
5875
    self.instance_name = instance_name
5876
    self.cleanup = cleanup
5877
    self.live = False # will be overridden later
5878

    
5879
  def CheckPrereq(self):
5880
    """Check prerequisites.
5881

5882
    This checks that the instance is in the cluster.
5883

5884
    """
5885
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5886
    instance = self.cfg.GetInstanceInfo(instance_name)
5887
    assert instance is not None
5888

    
5889
    if instance.disk_template != constants.DT_DRBD8:
5890
      raise errors.OpPrereqError("Instance's disk layout is not"
5891
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5892

    
5893
    secondary_nodes = instance.secondary_nodes
5894
    if not secondary_nodes:
5895
      raise errors.ConfigurationError("No secondary node but using"
5896
                                      " drbd8 disk template")
5897

    
5898
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5899

    
5900
    target_node = secondary_nodes[0]
5901
    # check memory requirements on the secondary node
5902
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5903
                         instance.name, i_be[constants.BE_MEMORY],
5904
                         instance.hypervisor)
5905

    
5906
    # check bridge existance
5907
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5908

    
5909
    if not self.cleanup:
5910
      _CheckNodeNotDrained(self.lu, target_node)
5911
      result = self.rpc.call_instance_migratable(instance.primary_node,
5912
                                                 instance)
5913
      result.Raise("Can't migrate, please use failover",
5914
                   prereq=True, ecode=errors.ECODE_STATE)
5915

    
5916
    self.instance = instance
5917

    
5918
    if self.lu.op.live is not None and self.lu.op.mode is not None:
5919
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5920
                                 " parameters are accepted",
5921
                                 errors.ECODE_INVAL)
5922
    if self.lu.op.live is not None:
5923
      if self.lu.op.live:
5924
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
5925
      else:
5926
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5927
      # reset the 'live' parameter to None so that repeated
5928
      # invocations of CheckPrereq do not raise an exception
5929
      self.lu.op.live = None
5930
    elif self.lu.op.mode is None:
5931
      # read the default value from the hypervisor
5932
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5933
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5934

    
5935
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5936

    
5937
  def _WaitUntilSync(self):
5938
    """Poll with custom rpc for disk sync.
5939

5940
    This uses our own step-based rpc call.
5941

5942
    """
5943
    self.feedback_fn("* wait until resync is done")
5944
    all_done = False
5945
    while not all_done:
5946
      all_done = True
5947
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5948
                                            self.nodes_ip,
5949
                                            self.instance.disks)
5950
      min_percent = 100
5951
      for node, nres in result.items():
5952
        nres.Raise("Cannot resync disks on node %s" % node)
5953
        node_done, node_percent = nres.payload
5954
        all_done = all_done and node_done
5955
        if node_percent is not None:
5956
          min_percent = min(min_percent, node_percent)
5957
      if not all_done:
5958
        if min_percent < 100:
5959
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5960
        time.sleep(2)
5961

    
5962
  def _EnsureSecondary(self, node):
5963
    """Demote a node to secondary.
5964

5965
    """
5966
    self.feedback_fn("* switching node %s to secondary mode" % node)
5967

    
5968
    for dev in self.instance.disks:
5969
      self.cfg.SetDiskID(dev, node)
5970

    
5971
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5972
                                          self.instance.disks)
5973
    result.Raise("Cannot change disk to secondary on node %s" % node)
5974

    
5975
  def _GoStandalone(self):
5976
    """Disconnect from the network.
5977

5978
    """
5979
    self.feedback_fn("* changing into standalone mode")
5980
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5981
                                               self.instance.disks)
5982
    for node, nres in result.items():
5983
      nres.Raise("Cannot disconnect disks node %s" % node)
5984

    
5985
  def _GoReconnect(self, multimaster):
5986
    """Reconnect to the network.
5987

5988
    """
5989
    if multimaster:
5990
      msg = "dual-master"
5991
    else:
5992
      msg = "single-master"
5993
    self.feedback_fn("* changing disks into %s mode" % msg)
5994
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5995
                                           self.instance.disks,
5996
                                           self.instance.name, multimaster)
5997
    for node, nres in result.items():
5998
      nres.Raise("Cannot change disks config on node %s" % node)
5999

    
6000
  def _ExecCleanup(self):
6001
    """Try to cleanup after a failed migration.
6002

6003
    The cleanup is done by:
6004
      - check that the instance is running only on one node
6005
        (and update the config if needed)
6006
      - change disks on its secondary node to secondary
6007
      - wait until disks are fully synchronized
6008
      - disconnect from the network
6009
      - change disks into single-master mode
6010
      - wait again until disks are fully synchronized
6011

6012
    """
6013
    instance = self.instance
6014
    target_node = self.target_node
6015
    source_node = self.source_node
6016

    
6017
    # check running on only one node
6018
    self.feedback_fn("* checking where the instance actually runs"
6019
                     " (if this hangs, the hypervisor might be in"
6020
                     " a bad state)")
6021
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6022
    for node, result in ins_l.items():
6023
      result.Raise("Can't contact node %s" % node)
6024

    
6025
    runningon_source = instance.name in ins_l[source_node].payload
6026
    runningon_target = instance.name in ins_l[target_node].payload
6027

    
6028
    if runningon_source and runningon_target:
6029
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6030
                               " or the hypervisor is confused. You will have"
6031
                               " to ensure manually that it runs only on one"
6032
                               " and restart this operation.")
6033

    
6034
    if not (runningon_source or runningon_target):
6035
      raise errors.OpExecError("Instance does not seem to be running at all."
6036
                               " In this case, it's safer to repair by"
6037
                               " running 'gnt-instance stop' to ensure disk"
6038
                               " shutdown, and then restarting it.")
6039

    
6040
    if runningon_target:
6041
      # the migration has actually succeeded, we need to update the config
6042
      self.feedback_fn("* instance running on secondary node (%s),"
6043
                       " updating config" % target_node)
6044
      instance.primary_node = target_node
6045
      self.cfg.Update(instance, self.feedback_fn)
6046
      demoted_node = source_node
6047
    else:
6048
      self.feedback_fn("* instance confirmed to be running on its"
6049
                       " primary node (%s)" % source_node)
6050
      demoted_node = target_node
6051

    
6052
    self._EnsureSecondary(demoted_node)
6053
    try:
6054
      self._WaitUntilSync()
6055
    except errors.OpExecError:
6056
      # we ignore here errors, since if the device is standalone, it
6057
      # won't be able to sync
6058
      pass
6059
    self._GoStandalone()
6060
    self._GoReconnect(False)
6061
    self._WaitUntilSync()
6062

    
6063
    self.feedback_fn("* done")
6064

    
6065
  def _RevertDiskStatus(self):
6066
    """Try to revert the disk status after a failed migration.
6067

6068
    """
6069
    target_node = self.target_node
6070
    try:
6071
      self._EnsureSecondary(target_node)
6072
      self._GoStandalone()
6073
      self._GoReconnect(False)
6074
      self._WaitUntilSync()
6075
    except errors.OpExecError, err:
6076
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6077
                         " drives: error '%s'\n"
6078
                         "Please look and recover the instance status" %
6079
                         str(err))
6080

    
6081
  def _AbortMigration(self):
6082
    """Call the hypervisor code to abort a started migration.
6083

6084
    """
6085
    instance = self.instance
6086
    target_node = self.target_node
6087
    migration_info = self.migration_info
6088

    
6089
    abort_result = self.rpc.call_finalize_migration(target_node,
6090
                                                    instance,
6091
                                                    migration_info,
6092
                                                    False)
6093
    abort_msg = abort_result.fail_msg
6094
    if abort_msg:
6095
      logging.error("Aborting migration failed on target node %s: %s",
6096
                    target_node, abort_msg)
6097
      # Don't raise an exception here, as we stil have to try to revert the
6098
      # disk status, even if this step failed.
6099

    
6100
  def _ExecMigration(self):
6101
    """Migrate an instance.
6102

6103
    The migrate is done by:
6104
      - change the disks into dual-master mode
6105
      - wait until disks are fully synchronized again
6106
      - migrate the instance
6107
      - change disks on the new secondary node (the old primary) to secondary
6108
      - wait until disks are fully synchronized
6109
      - change disks into single-master mode
6110

6111
    """
6112
    instance = self.instance
6113
    target_node = self.target_node
6114
    source_node = self.source_node
6115

    
6116
    self.feedback_fn("* checking disk consistency between source and target")
6117
    for dev in instance.disks:
6118
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6119
        raise errors.OpExecError("Disk %s is degraded or not fully"
6120
                                 " synchronized on target node,"
6121
                                 " aborting migrate." % dev.iv_name)
6122

    
6123
    # First get the migration information from the remote node
6124
    result = self.rpc.call_migration_info(source_node, instance)
6125
    msg = result.fail_msg
6126
    if msg:
6127
      log_err = ("Failed fetching source migration information from %s: %s" %
6128
                 (source_node, msg))
6129
      logging.error(log_err)
6130
      raise errors.OpExecError(log_err)
6131

    
6132
    self.migration_info = migration_info = result.payload
6133

    
6134
    # Then switch the disks to master/master mode
6135
    self._EnsureSecondary(target_node)
6136
    self._GoStandalone()
6137
    self._GoReconnect(True)
6138
    self._WaitUntilSync()
6139

    
6140
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6141
    result = self.rpc.call_accept_instance(target_node,
6142
                                           instance,
6143
                                           migration_info,
6144
                                           self.nodes_ip[target_node])
6145

    
6146
    msg = result.fail_msg
6147
    if msg:
6148
      logging.error("Instance pre-migration failed, trying to revert"
6149
                    " disk status: %s", msg)
6150
      self.feedback_fn("Pre-migration failed, aborting")
6151
      self._AbortMigration()
6152
      self._RevertDiskStatus()
6153
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6154
                               (instance.name, msg))
6155

    
6156
    self.feedback_fn("* migrating instance to %s" % target_node)
6157
    time.sleep(10)
6158
    result = self.rpc.call_instance_migrate(source_node, instance,
6159
                                            self.nodes_ip[target_node],
6160
                                            self.live)
6161
    msg = result.fail_msg
6162
    if msg:
6163
      logging.error("Instance migration failed, trying to revert"
6164
                    " disk status: %s", msg)
6165
      self.feedback_fn("Migration failed, aborting")
6166
      self._AbortMigration()
6167
      self._RevertDiskStatus()
6168
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6169
                               (instance.name, msg))
6170
    time.sleep(10)
6171

    
6172
    instance.primary_node = target_node
6173
    # distribute new instance config to the other nodes
6174
    self.cfg.Update(instance, self.feedback_fn)
6175

    
6176
    result = self.rpc.call_finalize_migration(target_node,
6177
                                              instance,
6178
                                              migration_info,
6179
                                              True)
6180
    msg = result.fail_msg
6181
    if msg:
6182
      logging.error("Instance migration succeeded, but finalization failed:"
6183
                    " %s", msg)
6184
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6185
                               msg)
6186

    
6187
    self._EnsureSecondary(source_node)
6188
    self._WaitUntilSync()
6189
    self._GoStandalone()
6190
    self._GoReconnect(False)
6191
    self._WaitUntilSync()
6192

    
6193
    self.feedback_fn("* done")
6194

    
6195
  def Exec(self, feedback_fn):
6196
    """Perform the migration.
6197

6198
    """
6199
    feedback_fn("Migrating instance %s" % self.instance.name)
6200

    
6201
    self.feedback_fn = feedback_fn
6202

    
6203
    self.source_node = self.instance.primary_node
6204
    self.target_node = self.instance.secondary_nodes[0]
6205
    self.all_nodes = [self.source_node, self.target_node]
6206
    self.nodes_ip = {
6207
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6208
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6209
      }
6210

    
6211
    if self.cleanup:
6212
      return self._ExecCleanup()
6213
    else:
6214
      return self._ExecMigration()
6215

    
6216

    
6217
def _CreateBlockDev(lu, node, instance, device, force_create,
6218
                    info, force_open):
6219
  """Create a tree of block devices on a given node.
6220

6221
  If this device type has to be created on secondaries, create it and
6222
  all its children.
6223

6224
  If not, just recurse to children keeping the same 'force' value.
6225

6226
  @param lu: the lu on whose behalf we execute
6227
  @param node: the node on which to create the device
6228
  @type instance: L{objects.Instance}
6229
  @param instance: the instance which owns the device
6230
  @type device: L{objects.Disk}
6231
  @param device: the device to create
6232
  @type force_create: boolean
6233
  @param force_create: whether to force creation of this device; this
6234
      will be change to True whenever we find a device which has
6235
      CreateOnSecondary() attribute
6236
  @param info: the extra 'metadata' we should attach to the device
6237
      (this will be represented as a LVM tag)
6238
  @type force_open: boolean
6239
  @param force_open: this parameter will be passes to the
6240
      L{backend.BlockdevCreate} function where it specifies
6241
      whether we run on primary or not, and it affects both
6242
      the child assembly and the device own Open() execution
6243

6244
  """
6245
  if device.CreateOnSecondary():
6246
    force_create = True
6247

    
6248
  if device.children:
6249
    for child in device.children:
6250
      _CreateBlockDev(lu, node, instance, child, force_create,
6251
                      info, force_open)
6252

    
6253
  if not force_create:
6254
    return
6255

    
6256
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6257

    
6258

    
6259
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6260
  """Create a single block device on a given node.
6261

6262
  This will not recurse over children of the device, so they must be
6263
  created in advance.
6264

6265
  @param lu: the lu on whose behalf we execute
6266
  @param node: the node on which to create the device
6267
  @type instance: L{objects.Instance}
6268
  @param instance: the instance which owns the device
6269
  @type device: L{objects.Disk}
6270
  @param device: the device to create
6271
  @param info: the extra 'metadata' we should attach to the device
6272
      (this will be represented as a LVM tag)
6273
  @type force_open: boolean
6274
  @param force_open: this parameter will be passes to the
6275
      L{backend.BlockdevCreate} function where it specifies
6276
      whether we run on primary or not, and it affects both
6277
      the child assembly and the device own Open() execution
6278

6279
  """
6280
  lu.cfg.SetDiskID(device, node)
6281
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6282
                                       instance.name, force_open, info)
6283
  result.Raise("Can't create block device %s on"
6284
               " node %s for instance %s" % (device, node, instance.name))
6285
  if device.physical_id is None:
6286
    device.physical_id = result.payload
6287

    
6288

    
6289
def _GenerateUniqueNames(lu, exts):
6290
  """Generate a suitable LV name.
6291

6292
  This will generate a logical volume name for the given instance.
6293

6294
  """
6295
  results = []
6296
  for val in exts:
6297
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6298
    results.append("%s%s" % (new_id, val))
6299
  return results
6300

    
6301

    
6302
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6303
                         p_minor, s_minor):
6304
  """Generate a drbd8 device complete with its children.
6305

6306
  """
6307
  port = lu.cfg.AllocatePort()
6308
  vgname = lu.cfg.GetVGName()
6309
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6310
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6311
                          logical_id=(vgname, names[0]))
6312
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6313
                          logical_id=(vgname, names[1]))
6314
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6315
                          logical_id=(primary, secondary, port,
6316
                                      p_minor, s_minor,
6317
                                      shared_secret),
6318
                          children=[dev_data, dev_meta],
6319
                          iv_name=iv_name)
6320
  return drbd_dev
6321

    
6322

    
6323
def _GenerateDiskTemplate(lu, template_name,
6324
                          instance_name, primary_node,
6325
                          secondary_nodes, disk_info,
6326
                          file_storage_dir, file_driver,
6327
                          base_index):
6328
  """Generate the entire disk layout for a given template type.
6329

6330
  """
6331
  #TODO: compute space requirements
6332

    
6333
  vgname = lu.cfg.GetVGName()
6334
  disk_count = len(disk_info)
6335
  disks = []
6336
  if template_name == constants.DT_DISKLESS:
6337
    pass
6338
  elif template_name == constants.DT_PLAIN:
6339
    if len(secondary_nodes) != 0:
6340
      raise errors.ProgrammerError("Wrong template configuration")
6341

    
6342
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6343
                                      for i in range(disk_count)])
6344
    for idx, disk in enumerate(disk_info):
6345
      disk_index = idx + base_index
6346
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6347
                              logical_id=(vgname, names[idx]),
6348
                              iv_name="disk/%d" % disk_index,
6349
                              mode=disk["mode"])
6350
      disks.append(disk_dev)
6351
  elif template_name == constants.DT_DRBD8:
6352
    if len(secondary_nodes) != 1:
6353
      raise errors.ProgrammerError("Wrong template configuration")
6354
    remote_node = secondary_nodes[0]
6355
    minors = lu.cfg.AllocateDRBDMinor(
6356
      [primary_node, remote_node] * len(disk_info), instance_name)
6357

    
6358
    names = []
6359
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6360
                                               for i in range(disk_count)]):
6361
      names.append(lv_prefix + "_data")
6362
      names.append(lv_prefix + "_meta")
6363
    for idx, disk in enumerate(disk_info):
6364
      disk_index = idx + base_index
6365
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6366
                                      disk["size"], names[idx*2:idx*2+2],
6367
                                      "disk/%d" % disk_index,
6368
                                      minors[idx*2], minors[idx*2+1])
6369
      disk_dev.mode = disk["mode"]
6370
      disks.append(disk_dev)
6371
  elif template_name == constants.DT_FILE:
6372
    if len(secondary_nodes) != 0:
6373
      raise errors.ProgrammerError("Wrong template configuration")
6374

    
6375
    _RequireFileStorage()
6376

    
6377
    for idx, disk in enumerate(disk_info):
6378
      disk_index = idx + base_index
6379
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6380
                              iv_name="disk/%d" % disk_index,
6381
                              logical_id=(file_driver,
6382
                                          "%s/disk%d" % (file_storage_dir,
6383
                                                         disk_index)),
6384
                              mode=disk["mode"])
6385
      disks.append(disk_dev)
6386
  else:
6387
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6388
  return disks
6389

    
6390

    
6391
def _GetInstanceInfoText(instance):
6392
  """Compute that text that should be added to the disk's metadata.
6393

6394
  """
6395
  return "originstname+%s" % instance.name
6396

    
6397

    
6398
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6399
  """Create all disks for an instance.
6400

6401
  This abstracts away some work from AddInstance.
6402

6403
  @type lu: L{LogicalUnit}
6404
  @param lu: the logical unit on whose behalf we execute
6405
  @type instance: L{objects.Instance}
6406
  @param instance: the instance whose disks we should create
6407
  @type to_skip: list
6408
  @param to_skip: list of indices to skip
6409
  @type target_node: string
6410
  @param target_node: if passed, overrides the target node for creation
6411
  @rtype: boolean
6412
  @return: the success of the creation
6413

6414
  """
6415
  info = _GetInstanceInfoText(instance)
6416
  if target_node is None:
6417
    pnode = instance.primary_node
6418
    all_nodes = instance.all_nodes
6419
  else:
6420
    pnode = target_node
6421
    all_nodes = [pnode]
6422

    
6423
  if instance.disk_template == constants.DT_FILE:
6424
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6425
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6426

    
6427
    result.Raise("Failed to create directory '%s' on"
6428
                 " node %s" % (file_storage_dir, pnode))
6429

    
6430
  # Note: this needs to be kept in sync with adding of disks in
6431
  # LUSetInstanceParams
6432
  for idx, device in enumerate(instance.disks):
6433
    if to_skip and idx in to_skip:
6434
      continue
6435
    logging.info("Creating volume %s for instance %s",
6436
                 device.iv_name, instance.name)
6437
    #HARDCODE
6438
    for node in all_nodes:
6439
      f_create = node == pnode
6440
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6441

    
6442

    
6443
def _RemoveDisks(lu, instance, target_node=None):
6444
  """Remove all disks for an instance.
6445

6446
  This abstracts away some work from `AddInstance()` and
6447
  `RemoveInstance()`. Note that in case some of the devices couldn't
6448
  be removed, the removal will continue with the other ones (compare
6449
  with `_CreateDisks()`).
6450

6451
  @type lu: L{LogicalUnit}
6452
  @param lu: the logical unit on whose behalf we execute
6453
  @type instance: L{objects.Instance}
6454
  @param instance: the instance whose disks we should remove
6455
  @type target_node: string
6456
  @param target_node: used to override the node on which to remove the disks
6457
  @rtype: boolean
6458
  @return: the success of the removal
6459

6460
  """
6461
  logging.info("Removing block devices for instance %s", instance.name)
6462

    
6463
  all_result = True
6464
  for device in instance.disks:
6465
    if target_node:
6466
      edata = [(target_node, device)]
6467
    else:
6468
      edata = device.ComputeNodeTree(instance.primary_node)
6469
    for node, disk in edata:
6470
      lu.cfg.SetDiskID(disk, node)
6471
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6472
      if msg:
6473
        lu.LogWarning("Could not remove block device %s on node %s,"
6474
                      " continuing anyway: %s", device.iv_name, node, msg)
6475
        all_result = False
6476

    
6477
  if instance.disk_template == constants.DT_FILE:
6478
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6479
    if target_node:
6480
      tgt = target_node
6481
    else:
6482
      tgt = instance.primary_node
6483
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6484
    if result.fail_msg:
6485
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6486
                    file_storage_dir, instance.primary_node, result.fail_msg)
6487
      all_result = False
6488

    
6489
  return all_result
6490

    
6491

    
6492
def _ComputeDiskSize(disk_template, disks):
6493
  """Compute disk size requirements in the volume group
6494

6495
  """
6496
  # Required free disk space as a function of disk and swap space
6497
  req_size_dict = {
6498
    constants.DT_DISKLESS: None,
6499
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6500
    # 128 MB are added for drbd metadata for each disk
6501
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6502
    constants.DT_FILE: None,
6503
  }
6504

    
6505
  if disk_template not in req_size_dict:
6506
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6507
                                 " is unknown" %  disk_template)
6508

    
6509
  return req_size_dict[disk_template]
6510

    
6511

    
6512
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6513
  """Hypervisor parameter validation.
6514

6515
  This function abstract the hypervisor parameter validation to be
6516
  used in both instance create and instance modify.
6517

6518
  @type lu: L{LogicalUnit}
6519
  @param lu: the logical unit for which we check
6520
  @type nodenames: list
6521
  @param nodenames: the list of nodes on which we should check
6522
  @type hvname: string
6523
  @param hvname: the name of the hypervisor we should use
6524
  @type hvparams: dict
6525
  @param hvparams: the parameters which we need to check
6526
  @raise errors.OpPrereqError: if the parameters are not valid
6527

6528
  """
6529
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6530
                                                  hvname,
6531
                                                  hvparams)
6532
  for node in nodenames:
6533
    info = hvinfo[node]
6534
    if info.offline:
6535
      continue
6536
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6537

    
6538

    
6539
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6540
  """OS parameters validation.
6541

6542
  @type lu: L{LogicalUnit}
6543
  @param lu: the logical unit for which we check
6544
  @type required: boolean
6545
  @param required: whether the validation should fail if the OS is not
6546
      found
6547
  @type nodenames: list
6548
  @param nodenames: the list of nodes on which we should check
6549
  @type osname: string
6550
  @param osname: the name of the hypervisor we should use
6551
  @type osparams: dict
6552
  @param osparams: the parameters which we need to check
6553
  @raise errors.OpPrereqError: if the parameters are not valid
6554

6555
  """
6556
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6557
                                   [constants.OS_VALIDATE_PARAMETERS],
6558
                                   osparams)
6559
  for node, nres in result.items():
6560
    # we don't check for offline cases since this should be run only
6561
    # against the master node and/or an instance's nodes
6562
    nres.Raise("OS Parameters validation failed on node %s" % node)
6563
    if not nres.payload:
6564
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6565
                 osname, node)
6566

    
6567

    
6568
class LUCreateInstance(LogicalUnit):
6569
  """Create an instance.
6570

6571
  """
6572
  HPATH = "instance-add"
6573
  HTYPE = constants.HTYPE_INSTANCE
6574
  _OP_PARAMS = [
6575
    _PInstanceName,
6576
    ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6577
    ("start", True, _TBool),
6578
    ("wait_for_sync", True, _TBool),
6579
    ("ip_check", True, _TBool),
6580
    ("name_check", True, _TBool),
6581
    ("disks", _NoDefault, _TListOf(_TDict)),
6582
    ("nics", _NoDefault, _TListOf(_TDict)),
6583
    ("hvparams", _EmptyDict, _TDict),
6584
    ("beparams", _EmptyDict, _TDict),
6585
    ("osparams", _EmptyDict, _TDict),
6586
    ("no_install", None, _TMaybeBool),
6587
    ("os_type", None, _TMaybeString),
6588
    ("force_variant", False, _TBool),
6589
    ("source_handshake", None, _TOr(_TList, _TNone)),
6590
    ("source_x509_ca", None, _TMaybeString),
6591
    ("source_instance_name", None, _TMaybeString),
6592
    ("src_node", None, _TMaybeString),
6593
    ("src_path", None, _TMaybeString),
6594
    ("pnode", None, _TMaybeString),
6595
    ("snode", None, _TMaybeString),
6596
    ("iallocator", None, _TMaybeString),
6597
    ("hypervisor", None, _TMaybeString),
6598
    ("disk_template", _NoDefault, _CheckDiskTemplate),
6599
    ("identify_defaults", False, _TBool),
6600
    ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6601
    ("file_storage_dir", None, _TMaybeString),
6602
    ]
6603
  REQ_BGL = False
6604

    
6605
  def CheckArguments(self):
6606
    """Check arguments.
6607

6608
    """
6609
    # do not require name_check to ease forward/backward compatibility
6610
    # for tools
6611
    if self.op.no_install and self.op.start:
6612
      self.LogInfo("No-installation mode selected, disabling startup")
6613
      self.op.start = False
6614
    # validate/normalize the instance name
6615
    self.op.instance_name = \
6616
      netutils.HostInfo.NormalizeName(self.op.instance_name)
6617

    
6618
    if self.op.ip_check and not self.op.name_check:
6619
      # TODO: make the ip check more flexible and not depend on the name check
6620
      raise errors.OpPrereqError("Cannot do ip check without a name check",
6621
                                 errors.ECODE_INVAL)
6622

    
6623
    # check nics' parameter names
6624
    for nic in self.op.nics:
6625
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6626

    
6627
    # check disks. parameter names and consistent adopt/no-adopt strategy
6628
    has_adopt = has_no_adopt = False
6629
    for disk in self.op.disks:
6630
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6631
      if "adopt" in disk:
6632
        has_adopt = True
6633
      else:
6634
        has_no_adopt = True
6635
    if has_adopt and has_no_adopt:
6636
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6637
                                 errors.ECODE_INVAL)
6638
    if has_adopt:
6639
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6640
        raise errors.OpPrereqError("Disk adoption is not supported for the"
6641
                                   " '%s' disk template" %
6642
                                   self.op.disk_template,
6643
                                   errors.ECODE_INVAL)
6644
      if self.op.iallocator is not None:
6645
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6646
                                   " iallocator script", errors.ECODE_INVAL)
6647
      if self.op.mode == constants.INSTANCE_IMPORT:
6648
        raise errors.OpPrereqError("Disk adoption not allowed for"
6649
                                   " instance import", errors.ECODE_INVAL)
6650

    
6651
    self.adopt_disks = has_adopt
6652

    
6653
    # instance name verification
6654
    if self.op.name_check:
6655
      self.hostname1 = netutils.GetHostInfo(self.op.instance_name)
6656
      self.op.instance_name = self.hostname1.name
6657
      # used in CheckPrereq for ip ping check
6658
      self.check_ip = self.hostname1.ip
6659
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6660
      raise errors.OpPrereqError("Remote imports require names to be checked" %
6661
                                 errors.ECODE_INVAL)
6662
    else:
6663
      self.check_ip = None
6664

    
6665
    # file storage checks
6666
    if (self.op.file_driver and
6667
        not self.op.file_driver in constants.FILE_DRIVER):
6668
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6669
                                 self.op.file_driver, errors.ECODE_INVAL)
6670

    
6671
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6672
      raise errors.OpPrereqError("File storage directory path not absolute",
6673
                                 errors.ECODE_INVAL)
6674

    
6675
    ### Node/iallocator related checks
6676
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6677

    
6678
    if self.op.pnode is not None:
6679
      if self.op.disk_template in constants.DTS_NET_MIRROR:
6680
        if self.op.snode is None:
6681
          raise errors.OpPrereqError("The networked disk templates need"
6682
                                     " a mirror node", errors.ECODE_INVAL)
6683
      elif self.op.snode:
6684
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6685
                        " template")
6686
        self.op.snode = None
6687

    
6688
    self._cds = _GetClusterDomainSecret()
6689

    
6690
    if self.op.mode == constants.INSTANCE_IMPORT:
6691
      # On import force_variant must be True, because if we forced it at
6692
      # initial install, our only chance when importing it back is that it
6693
      # works again!
6694
      self.op.force_variant = True
6695

    
6696
      if self.op.no_install:
6697
        self.LogInfo("No-installation mode has no effect during import")
6698

    
6699
    elif self.op.mode == constants.INSTANCE_CREATE:
6700
      if self.op.os_type is None:
6701
        raise errors.OpPrereqError("No guest OS specified",
6702
                                   errors.ECODE_INVAL)
6703
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_oss:
6704
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6705
                                   " installation" % self.op.os_type,
6706
                                   errors.ECODE_STATE)
6707
      if self.op.disk_template is None:
6708
        raise errors.OpPrereqError("No disk template specified",
6709
                                   errors.ECODE_INVAL)
6710

    
6711
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6712
      # Check handshake to ensure both clusters have the same domain secret
6713
      src_handshake = self.op.source_handshake
6714
      if not src_handshake:
6715
        raise errors.OpPrereqError("Missing source handshake",
6716
                                   errors.ECODE_INVAL)
6717

    
6718
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6719
                                                           src_handshake)
6720
      if errmsg:
6721
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6722
                                   errors.ECODE_INVAL)
6723

    
6724
      # Load and check source CA
6725
      self.source_x509_ca_pem = self.op.source_x509_ca
6726
      if not self.source_x509_ca_pem:
6727
        raise errors.OpPrereqError("Missing source X509 CA",
6728
                                   errors.ECODE_INVAL)
6729

    
6730
      try:
6731
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6732
                                                    self._cds)
6733
      except OpenSSL.crypto.Error, err:
6734
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6735
                                   (err, ), errors.ECODE_INVAL)
6736

    
6737
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6738
      if errcode is not None:
6739
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6740
                                   errors.ECODE_INVAL)
6741

    
6742
      self.source_x509_ca = cert
6743

    
6744
      src_instance_name = self.op.source_instance_name
6745
      if not src_instance_name:
6746
        raise errors.OpPrereqError("Missing source instance name",
6747
                                   errors.ECODE_INVAL)
6748

    
6749
      norm_name = netutils.HostInfo.NormalizeName(src_instance_name)
6750
      self.source_instance_name = netutils.GetHostInfo(norm_name).name
6751

    
6752
    else:
6753
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6754
                                 self.op.mode, errors.ECODE_INVAL)
6755

    
6756
  def ExpandNames(self):
6757
    """ExpandNames for CreateInstance.
6758

6759
    Figure out the right locks for instance creation.
6760

6761
    """
6762
    self.needed_locks = {}
6763

    
6764
    instance_name = self.op.instance_name
6765
    # this is just a preventive check, but someone might still add this
6766
    # instance in the meantime, and creation will fail at lock-add time
6767
    if instance_name in self.cfg.GetInstanceList():
6768
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6769
                                 instance_name, errors.ECODE_EXISTS)
6770

    
6771
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6772

    
6773
    if self.op.iallocator:
6774
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6775
    else:
6776
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6777
      nodelist = [self.op.pnode]
6778
      if self.op.snode is not None:
6779
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6780
        nodelist.append(self.op.snode)
6781
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6782

    
6783
    # in case of import lock the source node too
6784
    if self.op.mode == constants.INSTANCE_IMPORT:
6785
      src_node = self.op.src_node
6786
      src_path = self.op.src_path
6787

    
6788
      if src_path is None:
6789
        self.op.src_path = src_path = self.op.instance_name
6790

    
6791
      if src_node is None:
6792
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6793
        self.op.src_node = None
6794
        if os.path.isabs(src_path):
6795
          raise errors.OpPrereqError("Importing an instance from an absolute"
6796
                                     " path requires a source node option.",
6797
                                     errors.ECODE_INVAL)
6798
      else:
6799
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6800
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6801
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6802
        if not os.path.isabs(src_path):
6803
          self.op.src_path = src_path = \
6804
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6805

    
6806
  def _RunAllocator(self):
6807
    """Run the allocator based on input opcode.
6808

6809
    """
6810
    nics = [n.ToDict() for n in self.nics]
6811
    ial = IAllocator(self.cfg, self.rpc,
6812
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6813
                     name=self.op.instance_name,
6814
                     disk_template=self.op.disk_template,
6815
                     tags=[],
6816
                     os=self.op.os_type,
6817
                     vcpus=self.be_full[constants.BE_VCPUS],
6818
                     mem_size=self.be_full[constants.BE_MEMORY],
6819
                     disks=self.disks,
6820
                     nics=nics,
6821
                     hypervisor=self.op.hypervisor,
6822
                     )
6823

    
6824
    ial.Run(self.op.iallocator)
6825

    
6826
    if not ial.success:
6827
      raise errors.OpPrereqError("Can't compute nodes using"
6828
                                 " iallocator '%s': %s" %
6829
                                 (self.op.iallocator, ial.info),
6830
                                 errors.ECODE_NORES)
6831
    if len(ial.result) != ial.required_nodes:
6832
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6833
                                 " of nodes (%s), required %s" %
6834
                                 (self.op.iallocator, len(ial.result),
6835
                                  ial.required_nodes), errors.ECODE_FAULT)
6836
    self.op.pnode = ial.result[0]
6837
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6838
                 self.op.instance_name, self.op.iallocator,
6839
                 utils.CommaJoin(ial.result))
6840
    if ial.required_nodes == 2:
6841
      self.op.snode = ial.result[1]
6842

    
6843
  def BuildHooksEnv(self):
6844
    """Build hooks env.
6845

6846
    This runs on master, primary and secondary nodes of the instance.
6847

6848
    """
6849
    env = {
6850
      "ADD_MODE": self.op.mode,
6851
      }
6852
    if self.op.mode == constants.INSTANCE_IMPORT:
6853
      env["SRC_NODE"] = self.op.src_node
6854
      env["SRC_PATH"] = self.op.src_path
6855
      env["SRC_IMAGES"] = self.src_images
6856

    
6857
    env.update(_BuildInstanceHookEnv(
6858
      name=self.op.instance_name,
6859
      primary_node=self.op.pnode,
6860
      secondary_nodes=self.secondaries,
6861
      status=self.op.start,
6862
      os_type=self.op.os_type,
6863
      memory=self.be_full[constants.BE_MEMORY],
6864
      vcpus=self.be_full[constants.BE_VCPUS],
6865
      nics=_NICListToTuple(self, self.nics),
6866
      disk_template=self.op.disk_template,
6867
      disks=[(d["size"], d["mode"]) for d in self.disks],
6868
      bep=self.be_full,
6869
      hvp=self.hv_full,
6870
      hypervisor_name=self.op.hypervisor,
6871
    ))
6872

    
6873
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6874
          self.secondaries)
6875
    return env, nl, nl
6876

    
6877
  def _ReadExportInfo(self):
6878
    """Reads the export information from disk.
6879

6880
    It will override the opcode source node and path with the actual
6881
    information, if these two were not specified before.
6882

6883
    @return: the export information
6884

6885
    """
6886
    assert self.op.mode == constants.INSTANCE_IMPORT
6887

    
6888
    src_node = self.op.src_node
6889
    src_path = self.op.src_path
6890

    
6891
    if src_node is None:
6892
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6893
      exp_list = self.rpc.call_export_list(locked_nodes)
6894
      found = False
6895
      for node in exp_list:
6896
        if exp_list[node].fail_msg:
6897
          continue
6898
        if src_path in exp_list[node].payload:
6899
          found = True
6900
          self.op.src_node = src_node = node
6901
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6902
                                                       src_path)
6903
          break
6904
      if not found:
6905
        raise errors.OpPrereqError("No export found for relative path %s" %
6906
                                    src_path, errors.ECODE_INVAL)
6907

    
6908
    _CheckNodeOnline(self, src_node)
6909
    result = self.rpc.call_export_info(src_node, src_path)
6910
    result.Raise("No export or invalid export found in dir %s" % src_path)
6911

    
6912
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6913
    if not export_info.has_section(constants.INISECT_EXP):
6914
      raise errors.ProgrammerError("Corrupted export config",
6915
                                   errors.ECODE_ENVIRON)
6916

    
6917
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6918
    if (int(ei_version) != constants.EXPORT_VERSION):
6919
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6920
                                 (ei_version, constants.EXPORT_VERSION),
6921
                                 errors.ECODE_ENVIRON)
6922
    return export_info
6923

    
6924
  def _ReadExportParams(self, einfo):
6925
    """Use export parameters as defaults.
6926

6927
    In case the opcode doesn't specify (as in override) some instance
6928
    parameters, then try to use them from the export information, if
6929
    that declares them.
6930

6931
    """
6932
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6933

    
6934
    if self.op.disk_template is None:
6935
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6936
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6937
                                          "disk_template")
6938
      else:
6939
        raise errors.OpPrereqError("No disk template specified and the export"
6940
                                   " is missing the disk_template information",
6941
                                   errors.ECODE_INVAL)
6942

    
6943
    if not self.op.disks:
6944
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6945
        disks = []
6946
        # TODO: import the disk iv_name too
6947
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6948
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6949
          disks.append({"size": disk_sz})
6950
        self.op.disks = disks
6951
      else:
6952
        raise errors.OpPrereqError("No disk info specified and the export"
6953
                                   " is missing the disk information",
6954
                                   errors.ECODE_INVAL)
6955

    
6956
    if (not self.op.nics and
6957
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6958
      nics = []
6959
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6960
        ndict = {}
6961
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6962
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6963
          ndict[name] = v
6964
        nics.append(ndict)
6965
      self.op.nics = nics
6966

    
6967
    if (self.op.hypervisor is None and
6968
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6969
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6970
    if einfo.has_section(constants.INISECT_HYP):
6971
      # use the export parameters but do not override the ones
6972
      # specified by the user
6973
      for name, value in einfo.items(constants.INISECT_HYP):
6974
        if name not in self.op.hvparams:
6975
          self.op.hvparams[name] = value
6976

    
6977
    if einfo.has_section(constants.INISECT_BEP):
6978
      # use the parameters, without overriding
6979
      for name, value in einfo.items(constants.INISECT_BEP):
6980
        if name not in self.op.beparams:
6981
          self.op.beparams[name] = value
6982
    else:
6983
      # try to read the parameters old style, from the main section
6984
      for name in constants.BES_PARAMETERS:
6985
        if (name not in self.op.beparams and
6986
            einfo.has_option(constants.INISECT_INS, name)):
6987
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6988

    
6989
    if einfo.has_section(constants.INISECT_OSP):
6990
      # use the parameters, without overriding
6991
      for name, value in einfo.items(constants.INISECT_OSP):
6992
        if name not in self.op.osparams:
6993
          self.op.osparams[name] = value
6994

    
6995
  def _RevertToDefaults(self, cluster):
6996
    """Revert the instance parameters to the default values.
6997

6998
    """
6999
    # hvparams
7000
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7001
    for name in self.op.hvparams.keys():
7002
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7003
        del self.op.hvparams[name]
7004
    # beparams
7005
    be_defs = cluster.SimpleFillBE({})
7006
    for name in self.op.beparams.keys():
7007
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7008
        del self.op.beparams[name]
7009
    # nic params
7010
    nic_defs = cluster.SimpleFillNIC({})
7011
    for nic in self.op.nics:
7012
      for name in constants.NICS_PARAMETERS:
7013
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7014
          del nic[name]
7015
    # osparams
7016
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7017
    for name in self.op.osparams.keys():
7018
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7019
        del self.op.osparams[name]
7020

    
7021
  def CheckPrereq(self):
7022
    """Check prerequisites.
7023

7024
    """
7025
    if self.op.mode == constants.INSTANCE_IMPORT:
7026
      export_info = self._ReadExportInfo()
7027
      self._ReadExportParams(export_info)
7028

    
7029
    _CheckDiskTemplate(self.op.disk_template)
7030

    
7031
    if (not self.cfg.GetVGName() and
7032
        self.op.disk_template not in constants.DTS_NOT_LVM):
7033
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7034
                                 " instances", errors.ECODE_STATE)
7035

    
7036
    if self.op.hypervisor is None:
7037
      self.op.hypervisor = self.cfg.GetHypervisorType()
7038

    
7039
    cluster = self.cfg.GetClusterInfo()
7040
    enabled_hvs = cluster.enabled_hypervisors
7041
    if self.op.hypervisor not in enabled_hvs:
7042
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7043
                                 " cluster (%s)" % (self.op.hypervisor,
7044
                                  ",".join(enabled_hvs)),
7045
                                 errors.ECODE_STATE)
7046

    
7047
    # check hypervisor parameter syntax (locally)
7048
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7049
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7050
                                      self.op.hvparams)
7051
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7052
    hv_type.CheckParameterSyntax(filled_hvp)
7053
    self.hv_full = filled_hvp
7054
    # check that we don't specify global parameters on an instance
7055
    _CheckGlobalHvParams(self.op.hvparams)
7056

    
7057
    # fill and remember the beparams dict
7058
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7059
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7060

    
7061
    # build os parameters
7062
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7063

    
7064
    # now that hvp/bep are in final format, let's reset to defaults,
7065
    # if told to do so
7066
    if self.op.identify_defaults:
7067
      self._RevertToDefaults(cluster)
7068

    
7069
    # NIC buildup
7070
    self.nics = []
7071
    for idx, nic in enumerate(self.op.nics):
7072
      nic_mode_req = nic.get("mode", None)
7073
      nic_mode = nic_mode_req
7074
      if nic_mode is None:
7075
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7076

    
7077
      # in routed mode, for the first nic, the default ip is 'auto'
7078
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7079
        default_ip_mode = constants.VALUE_AUTO
7080
      else:
7081
        default_ip_mode = constants.VALUE_NONE
7082

    
7083
      # ip validity checks
7084
      ip = nic.get("ip", default_ip_mode)
7085
      if ip is None or ip.lower() == constants.VALUE_NONE:
7086
        nic_ip = None
7087
      elif ip.lower() == constants.VALUE_AUTO:
7088
        if not self.op.name_check:
7089
          raise errors.OpPrereqError("IP address set to auto but name checks"
7090
                                     " have been skipped. Aborting.",
7091
                                     errors.ECODE_INVAL)
7092
        nic_ip = self.hostname1.ip
7093
      else:
7094
        if not netutils.IsValidIP4(ip):
7095
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
7096
                                     " like a valid IP" % ip,
7097
                                     errors.ECODE_INVAL)
7098
        nic_ip = ip
7099

    
7100
      # TODO: check the ip address for uniqueness
7101
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7102
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7103
                                   errors.ECODE_INVAL)
7104

    
7105
      # MAC address verification
7106
      mac = nic.get("mac", constants.VALUE_AUTO)
7107
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7108
        mac = utils.NormalizeAndValidateMac(mac)
7109

    
7110
        try:
7111
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7112
        except errors.ReservationError:
7113
          raise errors.OpPrereqError("MAC address %s already in use"
7114
                                     " in cluster" % mac,
7115
                                     errors.ECODE_NOTUNIQUE)
7116

    
7117
      # bridge verification
7118
      bridge = nic.get("bridge", None)
7119
      link = nic.get("link", None)
7120
      if bridge and link:
7121
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7122
                                   " at the same time", errors.ECODE_INVAL)
7123
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7124
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7125
                                   errors.ECODE_INVAL)
7126
      elif bridge:
7127
        link = bridge
7128

    
7129
      nicparams = {}
7130
      if nic_mode_req:
7131
        nicparams[constants.NIC_MODE] = nic_mode_req
7132
      if link:
7133
        nicparams[constants.NIC_LINK] = link
7134

    
7135
      check_params = cluster.SimpleFillNIC(nicparams)
7136
      objects.NIC.CheckParameterSyntax(check_params)
7137
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7138

    
7139
    # disk checks/pre-build
7140
    self.disks = []
7141
    for disk in self.op.disks:
7142
      mode = disk.get("mode", constants.DISK_RDWR)
7143
      if mode not in constants.DISK_ACCESS_SET:
7144
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7145
                                   mode, errors.ECODE_INVAL)
7146
      size = disk.get("size", None)
7147
      if size is None:
7148
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7149
      try:
7150
        size = int(size)
7151
      except (TypeError, ValueError):
7152
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7153
                                   errors.ECODE_INVAL)
7154
      new_disk = {"size": size, "mode": mode}
7155
      if "adopt" in disk:
7156
        new_disk["adopt"] = disk["adopt"]
7157
      self.disks.append(new_disk)
7158

    
7159
    if self.op.mode == constants.INSTANCE_IMPORT:
7160

    
7161
      # Check that the new instance doesn't have less disks than the export
7162
      instance_disks = len(self.disks)
7163
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7164
      if instance_disks < export_disks:
7165
        raise errors.OpPrereqError("Not enough disks to import."
7166
                                   " (instance: %d, export: %d)" %
7167
                                   (instance_disks, export_disks),
7168
                                   errors.ECODE_INVAL)
7169

    
7170
      disk_images = []
7171
      for idx in range(export_disks):
7172
        option = 'disk%d_dump' % idx
7173
        if export_info.has_option(constants.INISECT_INS, option):
7174
          # FIXME: are the old os-es, disk sizes, etc. useful?
7175
          export_name = export_info.get(constants.INISECT_INS, option)
7176
          image = utils.PathJoin(self.op.src_path, export_name)
7177
          disk_images.append(image)
7178
        else:
7179
          disk_images.append(False)
7180

    
7181
      self.src_images = disk_images
7182

    
7183
      old_name = export_info.get(constants.INISECT_INS, 'name')
7184
      try:
7185
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7186
      except (TypeError, ValueError), err:
7187
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7188
                                   " an integer: %s" % str(err),
7189
                                   errors.ECODE_STATE)
7190
      if self.op.instance_name == old_name:
7191
        for idx, nic in enumerate(self.nics):
7192
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7193
            nic_mac_ini = 'nic%d_mac' % idx
7194
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7195

    
7196
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7197

    
7198
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7199
    if self.op.ip_check:
7200
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7201
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7202
                                   (self.check_ip, self.op.instance_name),
7203
                                   errors.ECODE_NOTUNIQUE)
7204

    
7205
    #### mac address generation
7206
    # By generating here the mac address both the allocator and the hooks get
7207
    # the real final mac address rather than the 'auto' or 'generate' value.
7208
    # There is a race condition between the generation and the instance object
7209
    # creation, which means that we know the mac is valid now, but we're not
7210
    # sure it will be when we actually add the instance. If things go bad
7211
    # adding the instance will abort because of a duplicate mac, and the
7212
    # creation job will fail.
7213
    for nic in self.nics:
7214
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7215
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7216

    
7217
    #### allocator run
7218

    
7219
    if self.op.iallocator is not None:
7220
      self._RunAllocator()
7221

    
7222
    #### node related checks
7223

    
7224
    # check primary node
7225
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7226
    assert self.pnode is not None, \
7227
      "Cannot retrieve locked node %s" % self.op.pnode
7228
    if pnode.offline:
7229
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7230
                                 pnode.name, errors.ECODE_STATE)
7231
    if pnode.drained:
7232
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7233
                                 pnode.name, errors.ECODE_STATE)
7234

    
7235
    self.secondaries = []
7236

    
7237
    # mirror node verification
7238
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7239
      if self.op.snode == pnode.name:
7240
        raise errors.OpPrereqError("The secondary node cannot be the"
7241
                                   " primary node.", errors.ECODE_INVAL)
7242
      _CheckNodeOnline(self, self.op.snode)
7243
      _CheckNodeNotDrained(self, self.op.snode)
7244
      self.secondaries.append(self.op.snode)
7245

    
7246
    nodenames = [pnode.name] + self.secondaries
7247

    
7248
    req_size = _ComputeDiskSize(self.op.disk_template,
7249
                                self.disks)
7250

    
7251
    # Check lv size requirements, if not adopting
7252
    if req_size is not None and not self.adopt_disks:
7253
      _CheckNodesFreeDisk(self, nodenames, req_size)
7254

    
7255
    if self.adopt_disks: # instead, we must check the adoption data
7256
      all_lvs = set([i["adopt"] for i in self.disks])
7257
      if len(all_lvs) != len(self.disks):
7258
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7259
                                   errors.ECODE_INVAL)
7260
      for lv_name in all_lvs:
7261
        try:
7262
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7263
        except errors.ReservationError:
7264
          raise errors.OpPrereqError("LV named %s used by another instance" %
7265
                                     lv_name, errors.ECODE_NOTUNIQUE)
7266

    
7267
      node_lvs = self.rpc.call_lv_list([pnode.name],
7268
                                       self.cfg.GetVGName())[pnode.name]
7269
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7270
      node_lvs = node_lvs.payload
7271
      delta = all_lvs.difference(node_lvs.keys())
7272
      if delta:
7273
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7274
                                   utils.CommaJoin(delta),
7275
                                   errors.ECODE_INVAL)
7276
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7277
      if online_lvs:
7278
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7279
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7280
                                   errors.ECODE_STATE)
7281
      # update the size of disk based on what is found
7282
      for dsk in self.disks:
7283
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7284

    
7285
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7286

    
7287
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7288
    # check OS parameters (remotely)
7289
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7290

    
7291
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7292

    
7293
    # memory check on primary node
7294
    if self.op.start:
7295
      _CheckNodeFreeMemory(self, self.pnode.name,
7296
                           "creating instance %s" % self.op.instance_name,
7297
                           self.be_full[constants.BE_MEMORY],
7298
                           self.op.hypervisor)
7299

    
7300
    self.dry_run_result = list(nodenames)
7301

    
7302
  def Exec(self, feedback_fn):
7303
    """Create and add the instance to the cluster.
7304

7305
    """
7306
    instance = self.op.instance_name
7307
    pnode_name = self.pnode.name
7308

    
7309
    ht_kind = self.op.hypervisor
7310
    if ht_kind in constants.HTS_REQ_PORT:
7311
      network_port = self.cfg.AllocatePort()
7312
    else:
7313
      network_port = None
7314

    
7315
    if constants.ENABLE_FILE_STORAGE:
7316
      # this is needed because os.path.join does not accept None arguments
7317
      if self.op.file_storage_dir is None:
7318
        string_file_storage_dir = ""
7319
      else:
7320
        string_file_storage_dir = self.op.file_storage_dir
7321

    
7322
      # build the full file storage dir path
7323
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7324
                                        string_file_storage_dir, instance)
7325
    else:
7326
      file_storage_dir = ""
7327

    
7328
    disks = _GenerateDiskTemplate(self,
7329
                                  self.op.disk_template,
7330
                                  instance, pnode_name,
7331
                                  self.secondaries,
7332
                                  self.disks,
7333
                                  file_storage_dir,
7334
                                  self.op.file_driver,
7335
                                  0)
7336

    
7337
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7338
                            primary_node=pnode_name,
7339
                            nics=self.nics, disks=disks,
7340
                            disk_template=self.op.disk_template,
7341
                            admin_up=False,
7342
                            network_port=network_port,
7343
                            beparams=self.op.beparams,
7344
                            hvparams=self.op.hvparams,
7345
                            hypervisor=self.op.hypervisor,
7346
                            osparams=self.op.osparams,
7347
                            )
7348

    
7349
    if self.adopt_disks:
7350
      # rename LVs to the newly-generated names; we need to construct
7351
      # 'fake' LV disks with the old data, plus the new unique_id
7352
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7353
      rename_to = []
7354
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7355
        rename_to.append(t_dsk.logical_id)
7356
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7357
        self.cfg.SetDiskID(t_dsk, pnode_name)
7358
      result = self.rpc.call_blockdev_rename(pnode_name,
7359
                                             zip(tmp_disks, rename_to))
7360
      result.Raise("Failed to rename adoped LVs")
7361
    else:
7362
      feedback_fn("* creating instance disks...")
7363
      try:
7364
        _CreateDisks(self, iobj)
7365
      except errors.OpExecError:
7366
        self.LogWarning("Device creation failed, reverting...")
7367
        try:
7368
          _RemoveDisks(self, iobj)
7369
        finally:
7370
          self.cfg.ReleaseDRBDMinors(instance)
7371
          raise
7372

    
7373
    feedback_fn("adding instance %s to cluster config" % instance)
7374

    
7375
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7376

    
7377
    # Declare that we don't want to remove the instance lock anymore, as we've
7378
    # added the instance to the config
7379
    del self.remove_locks[locking.LEVEL_INSTANCE]
7380
    # Unlock all the nodes
7381
    if self.op.mode == constants.INSTANCE_IMPORT:
7382
      nodes_keep = [self.op.src_node]
7383
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7384
                       if node != self.op.src_node]
7385
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7386
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7387
    else:
7388
      self.context.glm.release(locking.LEVEL_NODE)
7389
      del self.acquired_locks[locking.LEVEL_NODE]
7390

    
7391
    if self.op.wait_for_sync:
7392
      disk_abort = not _WaitForSync(self, iobj)
7393
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7394
      # make sure the disks are not degraded (still sync-ing is ok)
7395
      time.sleep(15)
7396
      feedback_fn("* checking mirrors status")
7397
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7398
    else:
7399
      disk_abort = False
7400

    
7401
    if disk_abort:
7402
      _RemoveDisks(self, iobj)
7403
      self.cfg.RemoveInstance(iobj.name)
7404
      # Make sure the instance lock gets removed
7405
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7406
      raise errors.OpExecError("There are some degraded disks for"
7407
                               " this instance")
7408

    
7409
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7410
      if self.op.mode == constants.INSTANCE_CREATE:
7411
        if not self.op.no_install:
7412
          feedback_fn("* running the instance OS create scripts...")
7413
          # FIXME: pass debug option from opcode to backend
7414
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7415
                                                 self.op.debug_level)
7416
          result.Raise("Could not add os for instance %s"
7417
                       " on node %s" % (instance, pnode_name))
7418

    
7419
      elif self.op.mode == constants.INSTANCE_IMPORT:
7420
        feedback_fn("* running the instance OS import scripts...")
7421

    
7422
        transfers = []
7423

    
7424
        for idx, image in enumerate(self.src_images):
7425
          if not image:
7426
            continue
7427

    
7428
          # FIXME: pass debug option from opcode to backend
7429
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7430
                                             constants.IEIO_FILE, (image, ),
7431
                                             constants.IEIO_SCRIPT,
7432
                                             (iobj.disks[idx], idx),
7433
                                             None)
7434
          transfers.append(dt)
7435

    
7436
        import_result = \
7437
          masterd.instance.TransferInstanceData(self, feedback_fn,
7438
                                                self.op.src_node, pnode_name,
7439
                                                self.pnode.secondary_ip,
7440
                                                iobj, transfers)
7441
        if not compat.all(import_result):
7442
          self.LogWarning("Some disks for instance %s on node %s were not"
7443
                          " imported successfully" % (instance, pnode_name))
7444

    
7445
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7446
        feedback_fn("* preparing remote import...")
7447
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
7448
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7449

    
7450
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7451
                                                     self.source_x509_ca,
7452
                                                     self._cds, timeouts)
7453
        if not compat.all(disk_results):
7454
          # TODO: Should the instance still be started, even if some disks
7455
          # failed to import (valid for local imports, too)?
7456
          self.LogWarning("Some disks for instance %s on node %s were not"
7457
                          " imported successfully" % (instance, pnode_name))
7458

    
7459
        # Run rename script on newly imported instance
7460
        assert iobj.name == instance
7461
        feedback_fn("Running rename script for %s" % instance)
7462
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7463
                                                   self.source_instance_name,
7464
                                                   self.op.debug_level)
7465
        if result.fail_msg:
7466
          self.LogWarning("Failed to run rename script for %s on node"
7467
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7468

    
7469
      else:
7470
        # also checked in the prereq part
7471
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7472
                                     % self.op.mode)
7473

    
7474
    if self.op.start:
7475
      iobj.admin_up = True
7476
      self.cfg.Update(iobj, feedback_fn)
7477
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7478
      feedback_fn("* starting instance...")
7479
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7480
      result.Raise("Could not start instance")
7481

    
7482
    return list(iobj.all_nodes)
7483

    
7484

    
7485
class LUConnectConsole(NoHooksLU):
7486
  """Connect to an instance's console.
7487

7488
  This is somewhat special in that it returns the command line that
7489
  you need to run on the master node in order to connect to the
7490
  console.
7491

7492
  """
7493
  _OP_PARAMS = [
7494
    _PInstanceName
7495
    ]
7496
  REQ_BGL = False
7497

    
7498
  def ExpandNames(self):
7499
    self._ExpandAndLockInstance()
7500

    
7501
  def CheckPrereq(self):
7502
    """Check prerequisites.
7503

7504
    This checks that the instance is in the cluster.
7505

7506
    """
7507
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7508
    assert self.instance is not None, \
7509
      "Cannot retrieve locked instance %s" % self.op.instance_name
7510
    _CheckNodeOnline(self, self.instance.primary_node)
7511

    
7512
  def Exec(self, feedback_fn):
7513
    """Connect to the console of an instance
7514

7515
    """
7516
    instance = self.instance
7517
    node = instance.primary_node
7518

    
7519
    node_insts = self.rpc.call_instance_list([node],
7520
                                             [instance.hypervisor])[node]
7521
    node_insts.Raise("Can't get node information from %s" % node)
7522

    
7523
    if instance.name not in node_insts.payload:
7524
      raise errors.OpExecError("Instance %s is not running." % instance.name)
7525

    
7526
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7527

    
7528
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7529
    cluster = self.cfg.GetClusterInfo()
7530
    # beparams and hvparams are passed separately, to avoid editing the
7531
    # instance and then saving the defaults in the instance itself.
7532
    hvparams = cluster.FillHV(instance)
7533
    beparams = cluster.FillBE(instance)
7534
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7535

    
7536
    # build ssh cmdline
7537
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7538

    
7539

    
7540
class LUReplaceDisks(LogicalUnit):
7541
  """Replace the disks of an instance.
7542

7543
  """
7544
  HPATH = "mirrors-replace"
7545
  HTYPE = constants.HTYPE_INSTANCE
7546
  _OP_PARAMS = [
7547
    _PInstanceName,
7548
    ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7549
    ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7550
    ("remote_node", None, _TMaybeString),
7551
    ("iallocator", None, _TMaybeString),
7552
    ("early_release", False, _TBool),
7553
    ]
7554
  REQ_BGL = False
7555

    
7556
  def CheckArguments(self):
7557
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7558
                                  self.op.iallocator)
7559

    
7560
  def ExpandNames(self):
7561
    self._ExpandAndLockInstance()
7562

    
7563
    if self.op.iallocator is not None:
7564
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7565

    
7566
    elif self.op.remote_node is not None:
7567
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7568
      self.op.remote_node = remote_node
7569

    
7570
      # Warning: do not remove the locking of the new secondary here
7571
      # unless DRBD8.AddChildren is changed to work in parallel;
7572
      # currently it doesn't since parallel invocations of
7573
      # FindUnusedMinor will conflict
7574
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7575
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7576

    
7577
    else:
7578
      self.needed_locks[locking.LEVEL_NODE] = []
7579
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7580

    
7581
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7582
                                   self.op.iallocator, self.op.remote_node,
7583
                                   self.op.disks, False, self.op.early_release)
7584

    
7585
    self.tasklets = [self.replacer]
7586

    
7587
  def DeclareLocks(self, level):
7588
    # If we're not already locking all nodes in the set we have to declare the
7589
    # instance's primary/secondary nodes.
7590
    if (level == locking.LEVEL_NODE and
7591
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7592
      self._LockInstancesNodes()
7593

    
7594
  def BuildHooksEnv(self):
7595
    """Build hooks env.
7596

7597
    This runs on the master, the primary and all the secondaries.
7598

7599
    """
7600
    instance = self.replacer.instance
7601
    env = {
7602
      "MODE": self.op.mode,
7603
      "NEW_SECONDARY": self.op.remote_node,
7604
      "OLD_SECONDARY": instance.secondary_nodes[0],
7605
      }
7606
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7607
    nl = [
7608
      self.cfg.GetMasterNode(),
7609
      instance.primary_node,
7610
      ]
7611
    if self.op.remote_node is not None:
7612
      nl.append(self.op.remote_node)
7613
    return env, nl, nl
7614

    
7615

    
7616
class TLReplaceDisks(Tasklet):
7617
  """Replaces disks for an instance.
7618

7619
  Note: Locking is not within the scope of this class.
7620

7621
  """
7622
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7623
               disks, delay_iallocator, early_release):
7624
    """Initializes this class.
7625

7626
    """
7627
    Tasklet.__init__(self, lu)
7628

    
7629
    # Parameters
7630
    self.instance_name = instance_name
7631
    self.mode = mode
7632
    self.iallocator_name = iallocator_name
7633
    self.remote_node = remote_node
7634
    self.disks = disks
7635
    self.delay_iallocator = delay_iallocator
7636
    self.early_release = early_release
7637

    
7638
    # Runtime data
7639
    self.instance = None
7640
    self.new_node = None
7641
    self.target_node = None
7642
    self.other_node = None
7643
    self.remote_node_info = None
7644
    self.node_secondary_ip = None
7645

    
7646
  @staticmethod
7647
  def CheckArguments(mode, remote_node, iallocator):
7648
    """Helper function for users of this class.
7649

7650
    """
7651
    # check for valid parameter combination
7652
    if mode == constants.REPLACE_DISK_CHG:
7653
      if remote_node is None and iallocator is None:
7654
        raise errors.OpPrereqError("When changing the secondary either an"
7655
                                   " iallocator script must be used or the"
7656
                                   " new node given", errors.ECODE_INVAL)
7657

    
7658
      if remote_node is not None and iallocator is not None:
7659
        raise errors.OpPrereqError("Give either the iallocator or the new"
7660
                                   " secondary, not both", errors.ECODE_INVAL)
7661

    
7662
    elif remote_node is not None or iallocator is not None:
7663
      # Not replacing the secondary
7664
      raise errors.OpPrereqError("The iallocator and new node options can"
7665
                                 " only be used when changing the"
7666
                                 " secondary node", errors.ECODE_INVAL)
7667

    
7668
  @staticmethod
7669
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7670
    """Compute a new secondary node using an IAllocator.
7671

7672
    """
7673
    ial = IAllocator(lu.cfg, lu.rpc,
7674
                     mode=constants.IALLOCATOR_MODE_RELOC,
7675
                     name=instance_name,
7676
                     relocate_from=relocate_from)
7677

    
7678
    ial.Run(iallocator_name)
7679

    
7680
    if not ial.success:
7681
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7682
                                 " %s" % (iallocator_name, ial.info),
7683
                                 errors.ECODE_NORES)
7684

    
7685
    if len(ial.result) != ial.required_nodes:
7686
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7687
                                 " of nodes (%s), required %s" %
7688
                                 (iallocator_name,
7689
                                  len(ial.result), ial.required_nodes),
7690
                                 errors.ECODE_FAULT)
7691

    
7692
    remote_node_name = ial.result[0]
7693

    
7694
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7695
               instance_name, remote_node_name)
7696

    
7697
    return remote_node_name
7698

    
7699
  def _FindFaultyDisks(self, node_name):
7700
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7701
                                    node_name, True)
7702

    
7703
  def CheckPrereq(self):
7704
    """Check prerequisites.
7705

7706
    This checks that the instance is in the cluster.
7707

7708
    """
7709
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7710
    assert instance is not None, \
7711
      "Cannot retrieve locked instance %s" % self.instance_name
7712

    
7713
    if instance.disk_template != constants.DT_DRBD8:
7714
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7715
                                 " instances", errors.ECODE_INVAL)
7716

    
7717
    if len(instance.secondary_nodes) != 1:
7718
      raise errors.OpPrereqError("The instance has a strange layout,"
7719
                                 " expected one secondary but found %d" %
7720
                                 len(instance.secondary_nodes),
7721
                                 errors.ECODE_FAULT)
7722

    
7723
    if not self.delay_iallocator:
7724
      self._CheckPrereq2()
7725

    
7726
  def _CheckPrereq2(self):
7727
    """Check prerequisites, second part.
7728

7729
    This function should always be part of CheckPrereq. It was separated and is
7730
    now called from Exec because during node evacuation iallocator was only
7731
    called with an unmodified cluster model, not taking planned changes into
7732
    account.
7733

7734
    """
7735
    instance = self.instance
7736
    secondary_node = instance.secondary_nodes[0]
7737

    
7738
    if self.iallocator_name is None:
7739
      remote_node = self.remote_node
7740
    else:
7741
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7742
                                       instance.name, instance.secondary_nodes)
7743

    
7744
    if remote_node is not None:
7745
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7746
      assert self.remote_node_info is not None, \
7747
        "Cannot retrieve locked node %s" % remote_node
7748
    else:
7749
      self.remote_node_info = None
7750

    
7751
    if remote_node == self.instance.primary_node:
7752
      raise errors.OpPrereqError("The specified node is the primary node of"
7753
                                 " the instance.", errors.ECODE_INVAL)
7754

    
7755
    if remote_node == secondary_node:
7756
      raise errors.OpPrereqError("The specified node is already the"
7757
                                 " secondary node of the instance.",
7758
                                 errors.ECODE_INVAL)
7759

    
7760
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7761
                                    constants.REPLACE_DISK_CHG):
7762
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7763
                                 errors.ECODE_INVAL)
7764

    
7765
    if self.mode == constants.REPLACE_DISK_AUTO:
7766
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7767
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7768

    
7769
      if faulty_primary and faulty_secondary:
7770
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7771
                                   " one node and can not be repaired"
7772
                                   " automatically" % self.instance_name,
7773
                                   errors.ECODE_STATE)
7774

    
7775
      if faulty_primary:
7776
        self.disks = faulty_primary
7777
        self.target_node = instance.primary_node
7778
        self.other_node = secondary_node
7779
        check_nodes = [self.target_node, self.other_node]
7780
      elif faulty_secondary:
7781
        self.disks = faulty_secondary
7782
        self.target_node = secondary_node
7783
        self.other_node = instance.primary_node
7784
        check_nodes = [self.target_node, self.other_node]
7785
      else:
7786
        self.disks = []
7787
        check_nodes = []
7788

    
7789
    else:
7790
      # Non-automatic modes
7791
      if self.mode == constants.REPLACE_DISK_PRI:
7792
        self.target_node = instance.primary_node
7793
        self.other_node = secondary_node
7794
        check_nodes = [self.target_node, self.other_node]
7795

    
7796
      elif self.mode == constants.REPLACE_DISK_SEC:
7797
        self.target_node = secondary_node
7798
        self.other_node = instance.primary_node
7799
        check_nodes = [self.target_node, self.other_node]
7800

    
7801
      elif self.mode == constants.REPLACE_DISK_CHG:
7802
        self.new_node = remote_node
7803
        self.other_node = instance.primary_node
7804
        self.target_node = secondary_node
7805
        check_nodes = [self.new_node, self.other_node]
7806

    
7807
        _CheckNodeNotDrained(self.lu, remote_node)
7808

    
7809
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7810
        assert old_node_info is not None
7811
        if old_node_info.offline and not self.early_release:
7812
          # doesn't make sense to delay the release
7813
          self.early_release = True
7814
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7815
                          " early-release mode", secondary_node)
7816

    
7817
      else:
7818
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7819
                                     self.mode)
7820

    
7821
      # If not specified all disks should be replaced
7822
      if not self.disks:
7823
        self.disks = range(len(self.instance.disks))
7824

    
7825
    for node in check_nodes:
7826
      _CheckNodeOnline(self.lu, node)
7827

    
7828
    # Check whether disks are valid
7829
    for disk_idx in self.disks:
7830
      instance.FindDisk(disk_idx)
7831

    
7832
    # Get secondary node IP addresses
7833
    node_2nd_ip = {}
7834

    
7835
    for node_name in [self.target_node, self.other_node, self.new_node]:
7836
      if node_name is not None:
7837
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7838

    
7839
    self.node_secondary_ip = node_2nd_ip
7840

    
7841
  def Exec(self, feedback_fn):
7842
    """Execute disk replacement.
7843

7844
    This dispatches the disk replacement to the appropriate handler.
7845

7846
    """
7847
    if self.delay_iallocator:
7848
      self._CheckPrereq2()
7849

    
7850
    if not self.disks:
7851
      feedback_fn("No disks need replacement")
7852
      return
7853

    
7854
    feedback_fn("Replacing disk(s) %s for %s" %
7855
                (utils.CommaJoin(self.disks), self.instance.name))
7856

    
7857
    activate_disks = (not self.instance.admin_up)
7858

    
7859
    # Activate the instance disks if we're replacing them on a down instance
7860
    if activate_disks:
7861
      _StartInstanceDisks(self.lu, self.instance, True)
7862

    
7863
    try:
7864
      # Should we replace the secondary node?
7865
      if self.new_node is not None:
7866
        fn = self._ExecDrbd8Secondary
7867
      else:
7868
        fn = self._ExecDrbd8DiskOnly
7869

    
7870
      return fn(feedback_fn)
7871

    
7872
    finally:
7873
      # Deactivate the instance disks if we're replacing them on a
7874
      # down instance
7875
      if activate_disks:
7876
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7877

    
7878
  def _CheckVolumeGroup(self, nodes):
7879
    self.lu.LogInfo("Checking volume groups")
7880

    
7881
    vgname = self.cfg.GetVGName()
7882

    
7883
    # Make sure volume group exists on all involved nodes
7884
    results = self.rpc.call_vg_list(nodes)
7885
    if not results:
7886
      raise errors.OpExecError("Can't list volume groups on the nodes")
7887

    
7888
    for node in nodes:
7889
      res = results[node]
7890
      res.Raise("Error checking node %s" % node)
7891
      if vgname not in res.payload:
7892
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7893
                                 (vgname, node))
7894

    
7895
  def _CheckDisksExistence(self, nodes):
7896
    # Check disk existence
7897
    for idx, dev in enumerate(self.instance.disks):
7898
      if idx not in self.disks:
7899
        continue
7900

    
7901
      for node in nodes:
7902
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7903
        self.cfg.SetDiskID(dev, node)
7904

    
7905
        result = self.rpc.call_blockdev_find(node, dev)
7906

    
7907
        msg = result.fail_msg
7908
        if msg or not result.payload:
7909
          if not msg:
7910
            msg = "disk not found"
7911
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7912
                                   (idx, node, msg))
7913

    
7914
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7915
    for idx, dev in enumerate(self.instance.disks):
7916
      if idx not in self.disks:
7917
        continue
7918

    
7919
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7920
                      (idx, node_name))
7921

    
7922
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7923
                                   ldisk=ldisk):
7924
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7925
                                 " replace disks for instance %s" %
7926
                                 (node_name, self.instance.name))
7927

    
7928
  def _CreateNewStorage(self, node_name):
7929
    vgname = self.cfg.GetVGName()
7930
    iv_names = {}
7931

    
7932
    for idx, dev in enumerate(self.instance.disks):
7933
      if idx not in self.disks:
7934
        continue
7935

    
7936
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7937

    
7938
      self.cfg.SetDiskID(dev, node_name)
7939

    
7940
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7941
      names = _GenerateUniqueNames(self.lu, lv_names)
7942

    
7943
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7944
                             logical_id=(vgname, names[0]))
7945
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7946
                             logical_id=(vgname, names[1]))
7947

    
7948
      new_lvs = [lv_data, lv_meta]
7949
      old_lvs = dev.children
7950
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7951

    
7952
      # we pass force_create=True to force the LVM creation
7953
      for new_lv in new_lvs:
7954
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7955
                        _GetInstanceInfoText(self.instance), False)
7956

    
7957
    return iv_names
7958

    
7959
  def _CheckDevices(self, node_name, iv_names):
7960
    for name, (dev, _, _) in iv_names.iteritems():
7961
      self.cfg.SetDiskID(dev, node_name)
7962

    
7963
      result = self.rpc.call_blockdev_find(node_name, dev)
7964

    
7965
      msg = result.fail_msg
7966
      if msg or not result.payload:
7967
        if not msg:
7968
          msg = "disk not found"
7969
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7970
                                 (name, msg))
7971

    
7972
      if result.payload.is_degraded:
7973
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7974

    
7975
  def _RemoveOldStorage(self, node_name, iv_names):
7976
    for name, (_, old_lvs, _) in iv_names.iteritems():
7977
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7978

    
7979
      for lv in old_lvs:
7980
        self.cfg.SetDiskID(lv, node_name)
7981

    
7982
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7983
        if msg:
7984
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7985
                             hint="remove unused LVs manually")
7986

    
7987
  def _ReleaseNodeLock(self, node_name):
7988
    """Releases the lock for a given node."""
7989
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7990

    
7991
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7992
    """Replace a disk on the primary or secondary for DRBD 8.
7993

7994
    The algorithm for replace is quite complicated:
7995

7996
      1. for each disk to be replaced:
7997

7998
        1. create new LVs on the target node with unique names
7999
        1. detach old LVs from the drbd device
8000
        1. rename old LVs to name_replaced.<time_t>
8001
        1. rename new LVs to old LVs
8002
        1. attach the new LVs (with the old names now) to the drbd device
8003

8004
      1. wait for sync across all devices
8005

8006
      1. for each modified disk:
8007

8008
        1. remove old LVs (which have the name name_replaces.<time_t>)
8009

8010
    Failures are not very well handled.
8011

8012
    """
8013
    steps_total = 6
8014

    
8015
    # Step: check device activation
8016
    self.lu.LogStep(1, steps_total, "Check device existence")
8017
    self._CheckDisksExistence([self.other_node, self.target_node])
8018
    self._CheckVolumeGroup([self.target_node, self.other_node])
8019

    
8020
    # Step: check other node consistency
8021
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8022
    self._CheckDisksConsistency(self.other_node,
8023
                                self.other_node == self.instance.primary_node,
8024
                                False)
8025

    
8026
    # Step: create new storage
8027
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8028
    iv_names = self._CreateNewStorage(self.target_node)
8029

    
8030
    # Step: for each lv, detach+rename*2+attach
8031
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8032
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8033
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8034

    
8035
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8036
                                                     old_lvs)
8037
      result.Raise("Can't detach drbd from local storage on node"
8038
                   " %s for device %s" % (self.target_node, dev.iv_name))
8039
      #dev.children = []
8040
      #cfg.Update(instance)
8041

    
8042
      # ok, we created the new LVs, so now we know we have the needed
8043
      # storage; as such, we proceed on the target node to rename
8044
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8045
      # using the assumption that logical_id == physical_id (which in
8046
      # turn is the unique_id on that node)
8047

    
8048
      # FIXME(iustin): use a better name for the replaced LVs
8049
      temp_suffix = int(time.time())
8050
      ren_fn = lambda d, suff: (d.physical_id[0],
8051
                                d.physical_id[1] + "_replaced-%s" % suff)
8052

    
8053
      # Build the rename list based on what LVs exist on the node
8054
      rename_old_to_new = []
8055
      for to_ren in old_lvs:
8056
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8057
        if not result.fail_msg and result.payload:
8058
          # device exists
8059
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8060

    
8061
      self.lu.LogInfo("Renaming the old LVs on the target node")
8062
      result = self.rpc.call_blockdev_rename(self.target_node,
8063
                                             rename_old_to_new)
8064
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8065

    
8066
      # Now we rename the new LVs to the old LVs
8067
      self.lu.LogInfo("Renaming the new LVs on the target node")
8068
      rename_new_to_old = [(new, old.physical_id)
8069
                           for old, new in zip(old_lvs, new_lvs)]
8070
      result = self.rpc.call_blockdev_rename(self.target_node,
8071
                                             rename_new_to_old)
8072
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8073

    
8074
      for old, new in zip(old_lvs, new_lvs):
8075
        new.logical_id = old.logical_id
8076
        self.cfg.SetDiskID(new, self.target_node)
8077

    
8078
      for disk in old_lvs:
8079
        disk.logical_id = ren_fn(disk, temp_suffix)
8080
        self.cfg.SetDiskID(disk, self.target_node)
8081

    
8082
      # Now that the new lvs have the old name, we can add them to the device
8083
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8084
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8085
                                                  new_lvs)
8086
      msg = result.fail_msg
8087
      if msg:
8088
        for new_lv in new_lvs:
8089
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8090
                                               new_lv).fail_msg
8091
          if msg2:
8092
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8093
                               hint=("cleanup manually the unused logical"
8094
                                     "volumes"))
8095
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8096

    
8097
      dev.children = new_lvs
8098

    
8099
      self.cfg.Update(self.instance, feedback_fn)
8100

    
8101
    cstep = 5
8102
    if self.early_release:
8103
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8104
      cstep += 1
8105
      self._RemoveOldStorage(self.target_node, iv_names)
8106
      # WARNING: we release both node locks here, do not do other RPCs
8107
      # than WaitForSync to the primary node
8108
      self._ReleaseNodeLock([self.target_node, self.other_node])
8109

    
8110
    # Wait for sync
8111
    # This can fail as the old devices are degraded and _WaitForSync
8112
    # does a combined result over all disks, so we don't check its return value
8113
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8114
    cstep += 1
8115
    _WaitForSync(self.lu, self.instance)
8116

    
8117
    # Check all devices manually
8118
    self._CheckDevices(self.instance.primary_node, iv_names)
8119

    
8120
    # Step: remove old storage
8121
    if not self.early_release:
8122
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8123
      cstep += 1
8124
      self._RemoveOldStorage(self.target_node, iv_names)
8125

    
8126
  def _ExecDrbd8Secondary(self, feedback_fn):
8127
    """Replace the secondary node for DRBD 8.
8128

8129
    The algorithm for replace is quite complicated:
8130
      - for all disks of the instance:
8131
        - create new LVs on the new node with same names
8132
        - shutdown the drbd device on the old secondary
8133
        - disconnect the drbd network on the primary
8134
        - create the drbd device on the new secondary
8135
        - network attach the drbd on the primary, using an artifice:
8136
          the drbd code for Attach() will connect to the network if it
8137
          finds a device which is connected to the good local disks but
8138
          not network enabled
8139
      - wait for sync across all devices
8140
      - remove all disks from the old secondary
8141

8142
    Failures are not very well handled.
8143

8144
    """
8145
    steps_total = 6
8146

    
8147
    # Step: check device activation
8148
    self.lu.LogStep(1, steps_total, "Check device existence")
8149
    self._CheckDisksExistence([self.instance.primary_node])
8150
    self._CheckVolumeGroup([self.instance.primary_node])
8151

    
8152
    # Step: check other node consistency
8153
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8154
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8155

    
8156
    # Step: create new storage
8157
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8158
    for idx, dev in enumerate(self.instance.disks):
8159
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8160
                      (self.new_node, idx))
8161
      # we pass force_create=True to force LVM creation
8162
      for new_lv in dev.children:
8163
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8164
                        _GetInstanceInfoText(self.instance), False)
8165

    
8166
    # Step 4: dbrd minors and drbd setups changes
8167
    # after this, we must manually remove the drbd minors on both the
8168
    # error and the success paths
8169
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8170
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8171
                                         for dev in self.instance.disks],
8172
                                        self.instance.name)
8173
    logging.debug("Allocated minors %r", minors)
8174

    
8175
    iv_names = {}
8176
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8177
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8178
                      (self.new_node, idx))
8179
      # create new devices on new_node; note that we create two IDs:
8180
      # one without port, so the drbd will be activated without
8181
      # networking information on the new node at this stage, and one
8182
      # with network, for the latter activation in step 4
8183
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8184
      if self.instance.primary_node == o_node1:
8185
        p_minor = o_minor1
8186
      else:
8187
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8188
        p_minor = o_minor2
8189

    
8190
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8191
                      p_minor, new_minor, o_secret)
8192
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8193
                    p_minor, new_minor, o_secret)
8194

    
8195
      iv_names[idx] = (dev, dev.children, new_net_id)
8196
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8197
                    new_net_id)
8198
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8199
                              logical_id=new_alone_id,
8200
                              children=dev.children,
8201
                              size=dev.size)
8202
      try:
8203
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8204
                              _GetInstanceInfoText(self.instance), False)
8205
      except errors.GenericError:
8206
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8207
        raise
8208

    
8209
    # We have new devices, shutdown the drbd on the old secondary
8210
    for idx, dev in enumerate(self.instance.disks):
8211
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8212
      self.cfg.SetDiskID(dev, self.target_node)
8213
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8214
      if msg:
8215
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8216
                           "node: %s" % (idx, msg),
8217
                           hint=("Please cleanup this device manually as"
8218
                                 " soon as possible"))
8219

    
8220
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8221
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8222
                                               self.node_secondary_ip,
8223
                                               self.instance.disks)\
8224
                                              [self.instance.primary_node]
8225

    
8226
    msg = result.fail_msg
8227
    if msg:
8228
      # detaches didn't succeed (unlikely)
8229
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8230
      raise errors.OpExecError("Can't detach the disks from the network on"
8231
                               " old node: %s" % (msg,))
8232

    
8233
    # if we managed to detach at least one, we update all the disks of
8234
    # the instance to point to the new secondary
8235
    self.lu.LogInfo("Updating instance configuration")
8236
    for dev, _, new_logical_id in iv_names.itervalues():
8237
      dev.logical_id = new_logical_id
8238
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8239

    
8240
    self.cfg.Update(self.instance, feedback_fn)
8241

    
8242
    # and now perform the drbd attach
8243
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8244
                    " (standalone => connected)")
8245
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8246
                                            self.new_node],
8247
                                           self.node_secondary_ip,
8248
                                           self.instance.disks,
8249
                                           self.instance.name,
8250
                                           False)
8251
    for to_node, to_result in result.items():
8252
      msg = to_result.fail_msg
8253
      if msg:
8254
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8255
                           to_node, msg,
8256
                           hint=("please do a gnt-instance info to see the"
8257
                                 " status of disks"))
8258
    cstep = 5
8259
    if self.early_release:
8260
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8261
      cstep += 1
8262
      self._RemoveOldStorage(self.target_node, iv_names)
8263
      # WARNING: we release all node locks here, do not do other RPCs
8264
      # than WaitForSync to the primary node
8265
      self._ReleaseNodeLock([self.instance.primary_node,
8266
                             self.target_node,
8267
                             self.new_node])
8268

    
8269
    # Wait for sync
8270
    # This can fail as the old devices are degraded and _WaitForSync
8271
    # does a combined result over all disks, so we don't check its return value
8272
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8273
    cstep += 1
8274
    _WaitForSync(self.lu, self.instance)
8275

    
8276
    # Check all devices manually
8277
    self._CheckDevices(self.instance.primary_node, iv_names)
8278

    
8279
    # Step: remove old storage
8280
    if not self.early_release:
8281
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8282
      self._RemoveOldStorage(self.target_node, iv_names)
8283

    
8284

    
8285
class LURepairNodeStorage(NoHooksLU):
8286
  """Repairs the volume group on a node.
8287

8288
  """
8289
  _OP_PARAMS = [
8290
    _PNodeName,
8291
    ("storage_type", _NoDefault, _CheckStorageType),
8292
    ("name", _NoDefault, _TNonEmptyString),
8293
    ("ignore_consistency", False, _TBool),
8294
    ]
8295
  REQ_BGL = False
8296

    
8297
  def CheckArguments(self):
8298
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8299

    
8300
    storage_type = self.op.storage_type
8301

    
8302
    if (constants.SO_FIX_CONSISTENCY not in
8303
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8304
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8305
                                 " repaired" % storage_type,
8306
                                 errors.ECODE_INVAL)
8307

    
8308
  def ExpandNames(self):
8309
    self.needed_locks = {
8310
      locking.LEVEL_NODE: [self.op.node_name],
8311
      }
8312

    
8313
  def _CheckFaultyDisks(self, instance, node_name):
8314
    """Ensure faulty disks abort the opcode or at least warn."""
8315
    try:
8316
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8317
                                  node_name, True):
8318
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8319
                                   " node '%s'" % (instance.name, node_name),
8320
                                   errors.ECODE_STATE)
8321
    except errors.OpPrereqError, err:
8322
      if self.op.ignore_consistency:
8323
        self.proc.LogWarning(str(err.args[0]))
8324
      else:
8325
        raise
8326

    
8327
  def CheckPrereq(self):
8328
    """Check prerequisites.
8329

8330
    """
8331
    # Check whether any instance on this node has faulty disks
8332
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8333
      if not inst.admin_up:
8334
        continue
8335
      check_nodes = set(inst.all_nodes)
8336
      check_nodes.discard(self.op.node_name)
8337
      for inst_node_name in check_nodes:
8338
        self._CheckFaultyDisks(inst, inst_node_name)
8339

    
8340
  def Exec(self, feedback_fn):
8341
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8342
                (self.op.name, self.op.node_name))
8343

    
8344
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8345
    result = self.rpc.call_storage_execute(self.op.node_name,
8346
                                           self.op.storage_type, st_args,
8347
                                           self.op.name,
8348
                                           constants.SO_FIX_CONSISTENCY)
8349
    result.Raise("Failed to repair storage unit '%s' on %s" %
8350
                 (self.op.name, self.op.node_name))
8351

    
8352

    
8353
class LUNodeEvacuationStrategy(NoHooksLU):
8354
  """Computes the node evacuation strategy.
8355

8356
  """
8357
  _OP_PARAMS = [
8358
    ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8359
    ("remote_node", None, _TMaybeString),
8360
    ("iallocator", None, _TMaybeString),
8361
    ]
8362
  REQ_BGL = False
8363

    
8364
  def CheckArguments(self):
8365
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8366

    
8367
  def ExpandNames(self):
8368
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8369
    self.needed_locks = locks = {}
8370
    if self.op.remote_node is None:
8371
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8372
    else:
8373
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8374
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8375

    
8376
  def Exec(self, feedback_fn):
8377
    if self.op.remote_node is not None:
8378
      instances = []
8379
      for node in self.op.nodes:
8380
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8381
      result = []
8382
      for i in instances:
8383
        if i.primary_node == self.op.remote_node:
8384
          raise errors.OpPrereqError("Node %s is the primary node of"
8385
                                     " instance %s, cannot use it as"
8386
                                     " secondary" %
8387
                                     (self.op.remote_node, i.name),
8388
                                     errors.ECODE_INVAL)
8389
        result.append([i.name, self.op.remote_node])
8390
    else:
8391
      ial = IAllocator(self.cfg, self.rpc,
8392
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8393
                       evac_nodes=self.op.nodes)
8394
      ial.Run(self.op.iallocator, validate=True)
8395
      if not ial.success:
8396
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8397
                                 errors.ECODE_NORES)
8398
      result = ial.result
8399
    return result
8400

    
8401

    
8402
class LUGrowDisk(LogicalUnit):
8403
  """Grow a disk of an instance.
8404

8405
  """
8406
  HPATH = "disk-grow"
8407
  HTYPE = constants.HTYPE_INSTANCE
8408
  _OP_PARAMS = [
8409
    _PInstanceName,
8410
    ("disk", _NoDefault, _TInt),
8411
    ("amount", _NoDefault, _TInt),
8412
    ("wait_for_sync", True, _TBool),
8413
    ]
8414
  REQ_BGL = False
8415

    
8416
  def ExpandNames(self):
8417
    self._ExpandAndLockInstance()
8418
    self.needed_locks[locking.LEVEL_NODE] = []
8419
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8420

    
8421
  def DeclareLocks(self, level):
8422
    if level == locking.LEVEL_NODE:
8423
      self._LockInstancesNodes()
8424

    
8425
  def BuildHooksEnv(self):
8426
    """Build hooks env.
8427

8428
    This runs on the master, the primary and all the secondaries.
8429

8430
    """
8431
    env = {
8432
      "DISK": self.op.disk,
8433
      "AMOUNT": self.op.amount,
8434
      }
8435
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8436
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8437
    return env, nl, nl
8438

    
8439
  def CheckPrereq(self):
8440
    """Check prerequisites.
8441

8442
    This checks that the instance is in the cluster.
8443

8444
    """
8445
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8446
    assert instance is not None, \
8447
      "Cannot retrieve locked instance %s" % self.op.instance_name
8448
    nodenames = list(instance.all_nodes)
8449
    for node in nodenames:
8450
      _CheckNodeOnline(self, node)
8451

    
8452
    self.instance = instance
8453

    
8454
    if instance.disk_template not in constants.DTS_GROWABLE:
8455
      raise errors.OpPrereqError("Instance's disk layout does not support"
8456
                                 " growing.", errors.ECODE_INVAL)
8457

    
8458
    self.disk = instance.FindDisk(self.op.disk)
8459

    
8460
    if instance.disk_template != constants.DT_FILE:
8461
      # TODO: check the free disk space for file, when that feature will be
8462
      # supported
8463
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8464

    
8465
  def Exec(self, feedback_fn):
8466
    """Execute disk grow.
8467

8468
    """
8469
    instance = self.instance
8470
    disk = self.disk
8471

    
8472
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8473
    if not disks_ok:
8474
      raise errors.OpExecError("Cannot activate block device to grow")
8475

    
8476
    for node in instance.all_nodes:
8477
      self.cfg.SetDiskID(disk, node)
8478
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8479
      result.Raise("Grow request failed to node %s" % node)
8480

    
8481
      # TODO: Rewrite code to work properly
8482
      # DRBD goes into sync mode for a short amount of time after executing the
8483
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8484
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8485
      # time is a work-around.
8486
      time.sleep(5)
8487

    
8488
    disk.RecordGrow(self.op.amount)
8489
    self.cfg.Update(instance, feedback_fn)
8490
    if self.op.wait_for_sync:
8491
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8492
      if disk_abort:
8493
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8494
                             " status.\nPlease check the instance.")
8495
      if not instance.admin_up:
8496
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8497
    elif not instance.admin_up:
8498
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8499
                           " not supposed to be running because no wait for"
8500
                           " sync mode was requested.")
8501

    
8502

    
8503
class LUQueryInstanceData(NoHooksLU):
8504
  """Query runtime instance data.
8505

8506
  """
8507
  _OP_PARAMS = [
8508
    ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8509
    ("static", False, _TBool),
8510
    ]
8511
  REQ_BGL = False
8512

    
8513
  def ExpandNames(self):
8514
    self.needed_locks = {}
8515
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8516

    
8517
    if self.op.instances:
8518
      self.wanted_names = []
8519
      for name in self.op.instances:
8520
        full_name = _ExpandInstanceName(self.cfg, name)
8521
        self.wanted_names.append(full_name)
8522
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8523
    else:
8524
      self.wanted_names = None
8525
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8526

    
8527
    self.needed_locks[locking.LEVEL_NODE] = []
8528
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8529

    
8530
  def DeclareLocks(self, level):
8531
    if level == locking.LEVEL_NODE:
8532
      self._LockInstancesNodes()
8533

    
8534
  def CheckPrereq(self):
8535
    """Check prerequisites.
8536

8537
    This only checks the optional instance list against the existing names.
8538

8539
    """
8540
    if self.wanted_names is None:
8541
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8542

    
8543
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8544
                             in self.wanted_names]
8545

    
8546
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8547
    """Returns the status of a block device
8548

8549
    """
8550
    if self.op.static or not node:
8551
      return None
8552

    
8553
    self.cfg.SetDiskID(dev, node)
8554

    
8555
    result = self.rpc.call_blockdev_find(node, dev)
8556
    if result.offline:
8557
      return None
8558

    
8559
    result.Raise("Can't compute disk status for %s" % instance_name)
8560

    
8561
    status = result.payload
8562
    if status is None:
8563
      return None
8564

    
8565
    return (status.dev_path, status.major, status.minor,
8566
            status.sync_percent, status.estimated_time,
8567
            status.is_degraded, status.ldisk_status)
8568

    
8569
  def _ComputeDiskStatus(self, instance, snode, dev):
8570
    """Compute block device status.
8571

8572
    """
8573
    if dev.dev_type in constants.LDS_DRBD:
8574
      # we change the snode then (otherwise we use the one passed in)
8575
      if dev.logical_id[0] == instance.primary_node:
8576
        snode = dev.logical_id[1]
8577
      else:
8578
        snode = dev.logical_id[0]
8579

    
8580
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8581
                                              instance.name, dev)
8582
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8583

    
8584
    if dev.children:
8585
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8586
                      for child in dev.children]
8587
    else:
8588
      dev_children = []
8589

    
8590
    data = {
8591
      "iv_name": dev.iv_name,
8592
      "dev_type": dev.dev_type,
8593
      "logical_id": dev.logical_id,
8594
      "physical_id": dev.physical_id,
8595
      "pstatus": dev_pstatus,
8596
      "sstatus": dev_sstatus,
8597
      "children": dev_children,
8598
      "mode": dev.mode,
8599
      "size": dev.size,
8600
      }
8601

    
8602
    return data
8603

    
8604
  def Exec(self, feedback_fn):
8605
    """Gather and return data"""
8606
    result = {}
8607

    
8608
    cluster = self.cfg.GetClusterInfo()
8609

    
8610
    for instance in self.wanted_instances:
8611
      if not self.op.static:
8612
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8613
                                                  instance.name,
8614
                                                  instance.hypervisor)
8615
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8616
        remote_info = remote_info.payload
8617
        if remote_info and "state" in remote_info:
8618
          remote_state = "up"
8619
        else:
8620
          remote_state = "down"
8621
      else:
8622
        remote_state = None
8623
      if instance.admin_up:
8624
        config_state = "up"
8625
      else:
8626
        config_state = "down"
8627

    
8628
      disks = [self._ComputeDiskStatus(instance, None, device)
8629
               for device in instance.disks]
8630

    
8631
      idict = {
8632
        "name": instance.name,
8633
        "config_state": config_state,
8634
        "run_state": remote_state,
8635
        "pnode": instance.primary_node,
8636
        "snodes": instance.secondary_nodes,
8637
        "os": instance.os,
8638
        # this happens to be the same format used for hooks
8639
        "nics": _NICListToTuple(self, instance.nics),
8640
        "disk_template": instance.disk_template,
8641
        "disks": disks,
8642
        "hypervisor": instance.hypervisor,
8643
        "network_port": instance.network_port,
8644
        "hv_instance": instance.hvparams,
8645
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8646
        "be_instance": instance.beparams,
8647
        "be_actual": cluster.FillBE(instance),
8648
        "os_instance": instance.osparams,
8649
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8650
        "serial_no": instance.serial_no,
8651
        "mtime": instance.mtime,
8652
        "ctime": instance.ctime,
8653
        "uuid": instance.uuid,
8654
        }
8655

    
8656
      result[instance.name] = idict
8657

    
8658
    return result
8659

    
8660

    
8661
class LUSetInstanceParams(LogicalUnit):
8662
  """Modifies an instances's parameters.
8663

8664
  """
8665
  HPATH = "instance-modify"
8666
  HTYPE = constants.HTYPE_INSTANCE
8667
  _OP_PARAMS = [
8668
    _PInstanceName,
8669
    ("nics", _EmptyList, _TList),
8670
    ("disks", _EmptyList, _TList),
8671
    ("beparams", _EmptyDict, _TDict),
8672
    ("hvparams", _EmptyDict, _TDict),
8673
    ("disk_template", None, _TMaybeString),
8674
    ("remote_node", None, _TMaybeString),
8675
    ("os_name", None, _TMaybeString),
8676
    ("force_variant", False, _TBool),
8677
    ("osparams", None, _TOr(_TDict, _TNone)),
8678
    _PForce,
8679
    ]
8680
  REQ_BGL = False
8681

    
8682
  def CheckArguments(self):
8683
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8684
            self.op.hvparams or self.op.beparams or self.op.os_name):
8685
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8686

    
8687
    if self.op.hvparams:
8688
      _CheckGlobalHvParams(self.op.hvparams)
8689

    
8690
    # Disk validation
8691
    disk_addremove = 0
8692
    for disk_op, disk_dict in self.op.disks:
8693
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8694
      if disk_op == constants.DDM_REMOVE:
8695
        disk_addremove += 1
8696
        continue
8697
      elif disk_op == constants.DDM_ADD:
8698
        disk_addremove += 1
8699
      else:
8700
        if not isinstance(disk_op, int):
8701
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8702
        if not isinstance(disk_dict, dict):
8703
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8704
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8705

    
8706
      if disk_op == constants.DDM_ADD:
8707
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8708
        if mode not in constants.DISK_ACCESS_SET:
8709
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8710
                                     errors.ECODE_INVAL)
8711
        size = disk_dict.get('size', None)
8712
        if size is None:
8713
          raise errors.OpPrereqError("Required disk parameter size missing",
8714
                                     errors.ECODE_INVAL)
8715
        try:
8716
          size = int(size)
8717
        except (TypeError, ValueError), err:
8718
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8719
                                     str(err), errors.ECODE_INVAL)
8720
        disk_dict['size'] = size
8721
      else:
8722
        # modification of disk
8723
        if 'size' in disk_dict:
8724
          raise errors.OpPrereqError("Disk size change not possible, use"
8725
                                     " grow-disk", errors.ECODE_INVAL)
8726

    
8727
    if disk_addremove > 1:
8728
      raise errors.OpPrereqError("Only one disk add or remove operation"
8729
                                 " supported at a time", errors.ECODE_INVAL)
8730

    
8731
    if self.op.disks and self.op.disk_template is not None:
8732
      raise errors.OpPrereqError("Disk template conversion and other disk"
8733
                                 " changes not supported at the same time",
8734
                                 errors.ECODE_INVAL)
8735

    
8736
    if self.op.disk_template:
8737
      _CheckDiskTemplate(self.op.disk_template)
8738
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8739
          self.op.remote_node is None):
8740
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8741
                                   " one requires specifying a secondary node",
8742
                                   errors.ECODE_INVAL)
8743

    
8744
    # NIC validation
8745
    nic_addremove = 0
8746
    for nic_op, nic_dict in self.op.nics:
8747
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8748
      if nic_op == constants.DDM_REMOVE:
8749
        nic_addremove += 1
8750
        continue
8751
      elif nic_op == constants.DDM_ADD:
8752
        nic_addremove += 1
8753
      else:
8754
        if not isinstance(nic_op, int):
8755
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8756
        if not isinstance(nic_dict, dict):
8757
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8758
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8759

    
8760
      # nic_dict should be a dict
8761
      nic_ip = nic_dict.get('ip', None)
8762
      if nic_ip is not None:
8763
        if nic_ip.lower() == constants.VALUE_NONE:
8764
          nic_dict['ip'] = None
8765
        else:
8766
          if not netutils.IsValidIP4(nic_ip):
8767
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8768
                                       errors.ECODE_INVAL)
8769

    
8770
      nic_bridge = nic_dict.get('bridge', None)
8771
      nic_link = nic_dict.get('link', None)
8772
      if nic_bridge and nic_link:
8773
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8774
                                   " at the same time", errors.ECODE_INVAL)
8775
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8776
        nic_dict['bridge'] = None
8777
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8778
        nic_dict['link'] = None
8779

    
8780
      if nic_op == constants.DDM_ADD:
8781
        nic_mac = nic_dict.get('mac', None)
8782
        if nic_mac is None:
8783
          nic_dict['mac'] = constants.VALUE_AUTO
8784

    
8785
      if 'mac' in nic_dict:
8786
        nic_mac = nic_dict['mac']
8787
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8788
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8789

    
8790
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8791
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8792
                                     " modifying an existing nic",
8793
                                     errors.ECODE_INVAL)
8794

    
8795
    if nic_addremove > 1:
8796
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8797
                                 " supported at a time", errors.ECODE_INVAL)
8798

    
8799
  def ExpandNames(self):
8800
    self._ExpandAndLockInstance()
8801
    self.needed_locks[locking.LEVEL_NODE] = []
8802
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8803

    
8804
  def DeclareLocks(self, level):
8805
    if level == locking.LEVEL_NODE:
8806
      self._LockInstancesNodes()
8807
      if self.op.disk_template and self.op.remote_node:
8808
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8809
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8810

    
8811
  def BuildHooksEnv(self):
8812
    """Build hooks env.
8813

8814
    This runs on the master, primary and secondaries.
8815

8816
    """
8817
    args = dict()
8818
    if constants.BE_MEMORY in self.be_new:
8819
      args['memory'] = self.be_new[constants.BE_MEMORY]
8820
    if constants.BE_VCPUS in self.be_new:
8821
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8822
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8823
    # information at all.
8824
    if self.op.nics:
8825
      args['nics'] = []
8826
      nic_override = dict(self.op.nics)
8827
      for idx, nic in enumerate(self.instance.nics):
8828
        if idx in nic_override:
8829
          this_nic_override = nic_override[idx]
8830
        else:
8831
          this_nic_override = {}
8832
        if 'ip' in this_nic_override:
8833
          ip = this_nic_override['ip']
8834
        else:
8835
          ip = nic.ip
8836
        if 'mac' in this_nic_override:
8837
          mac = this_nic_override['mac']
8838
        else:
8839
          mac = nic.mac
8840
        if idx in self.nic_pnew:
8841
          nicparams = self.nic_pnew[idx]
8842
        else:
8843
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8844
        mode = nicparams[constants.NIC_MODE]
8845
        link = nicparams[constants.NIC_LINK]
8846
        args['nics'].append((ip, mac, mode, link))
8847
      if constants.DDM_ADD in nic_override:
8848
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8849
        mac = nic_override[constants.DDM_ADD]['mac']
8850
        nicparams = self.nic_pnew[constants.DDM_ADD]
8851
        mode = nicparams[constants.NIC_MODE]
8852
        link = nicparams[constants.NIC_LINK]
8853
        args['nics'].append((ip, mac, mode, link))
8854
      elif constants.DDM_REMOVE in nic_override:
8855
        del args['nics'][-1]
8856

    
8857
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8858
    if self.op.disk_template:
8859
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8860
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8861
    return env, nl, nl
8862

    
8863
  def CheckPrereq(self):
8864
    """Check prerequisites.
8865

8866
    This only checks the instance list against the existing names.
8867

8868
    """
8869
    # checking the new params on the primary/secondary nodes
8870

    
8871
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8872
    cluster = self.cluster = self.cfg.GetClusterInfo()
8873
    assert self.instance is not None, \
8874
      "Cannot retrieve locked instance %s" % self.op.instance_name
8875
    pnode = instance.primary_node
8876
    nodelist = list(instance.all_nodes)
8877

    
8878
    # OS change
8879
    if self.op.os_name and not self.op.force:
8880
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8881
                      self.op.force_variant)
8882
      instance_os = self.op.os_name
8883
    else:
8884
      instance_os = instance.os
8885

    
8886
    if self.op.disk_template:
8887
      if instance.disk_template == self.op.disk_template:
8888
        raise errors.OpPrereqError("Instance already has disk template %s" %
8889
                                   instance.disk_template, errors.ECODE_INVAL)
8890

    
8891
      if (instance.disk_template,
8892
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8893
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8894
                                   " %s to %s" % (instance.disk_template,
8895
                                                  self.op.disk_template),
8896
                                   errors.ECODE_INVAL)
8897
      _CheckInstanceDown(self, instance, "cannot change disk template")
8898
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8899
        if self.op.remote_node == pnode:
8900
          raise errors.OpPrereqError("Given new secondary node %s is the same"
8901
                                     " as the primary node of the instance" %
8902
                                     self.op.remote_node, errors.ECODE_STATE)
8903
        _CheckNodeOnline(self, self.op.remote_node)
8904
        _CheckNodeNotDrained(self, self.op.remote_node)
8905
        disks = [{"size": d.size} for d in instance.disks]
8906
        required = _ComputeDiskSize(self.op.disk_template, disks)
8907
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8908

    
8909
    # hvparams processing
8910
    if self.op.hvparams:
8911
      hv_type = instance.hypervisor
8912
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8913
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8914
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8915

    
8916
      # local check
8917
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8918
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8919
      self.hv_new = hv_new # the new actual values
8920
      self.hv_inst = i_hvdict # the new dict (without defaults)
8921
    else:
8922
      self.hv_new = self.hv_inst = {}
8923

    
8924
    # beparams processing
8925
    if self.op.beparams:
8926
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8927
                                   use_none=True)
8928
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8929
      be_new = cluster.SimpleFillBE(i_bedict)
8930
      self.be_new = be_new # the new actual values
8931
      self.be_inst = i_bedict # the new dict (without defaults)
8932
    else:
8933
      self.be_new = self.be_inst = {}
8934

    
8935
    # osparams processing
8936
    if self.op.osparams:
8937
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8938
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8939
      self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8940
      self.os_inst = i_osdict # the new dict (without defaults)
8941
    else:
8942
      self.os_new = self.os_inst = {}
8943

    
8944
    self.warn = []
8945

    
8946
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8947
      mem_check_list = [pnode]
8948
      if be_new[constants.BE_AUTO_BALANCE]:
8949
        # either we changed auto_balance to yes or it was from before
8950
        mem_check_list.extend(instance.secondary_nodes)
8951
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8952
                                                  instance.hypervisor)
8953
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8954
                                         instance.hypervisor)
8955
      pninfo = nodeinfo[pnode]
8956
      msg = pninfo.fail_msg
8957
      if msg:
8958
        # Assume the primary node is unreachable and go ahead
8959
        self.warn.append("Can't get info from primary node %s: %s" %
8960
                         (pnode,  msg))
8961
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8962
        self.warn.append("Node data from primary node %s doesn't contain"
8963
                         " free memory information" % pnode)
8964
      elif instance_info.fail_msg:
8965
        self.warn.append("Can't get instance runtime information: %s" %
8966
                        instance_info.fail_msg)
8967
      else:
8968
        if instance_info.payload:
8969
          current_mem = int(instance_info.payload['memory'])
8970
        else:
8971
          # Assume instance not running
8972
          # (there is a slight race condition here, but it's not very probable,
8973
          # and we have no other way to check)
8974
          current_mem = 0
8975
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8976
                    pninfo.payload['memory_free'])
8977
        if miss_mem > 0:
8978
          raise errors.OpPrereqError("This change will prevent the instance"
8979
                                     " from starting, due to %d MB of memory"
8980
                                     " missing on its primary node" % miss_mem,
8981
                                     errors.ECODE_NORES)
8982

    
8983
      if be_new[constants.BE_AUTO_BALANCE]:
8984
        for node, nres in nodeinfo.items():
8985
          if node not in instance.secondary_nodes:
8986
            continue
8987
          msg = nres.fail_msg
8988
          if msg:
8989
            self.warn.append("Can't get info from secondary node %s: %s" %
8990
                             (node, msg))
8991
          elif not isinstance(nres.payload.get('memory_free', None), int):
8992
            self.warn.append("Secondary node %s didn't return free"
8993
                             " memory information" % node)
8994
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8995
            self.warn.append("Not enough memory to failover instance to"
8996
                             " secondary node %s" % node)
8997

    
8998
    # NIC processing
8999
    self.nic_pnew = {}
9000
    self.nic_pinst = {}
9001
    for nic_op, nic_dict in self.op.nics:
9002
      if nic_op == constants.DDM_REMOVE:
9003
        if not instance.nics:
9004
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9005
                                     errors.ECODE_INVAL)
9006
        continue
9007
      if nic_op != constants.DDM_ADD:
9008
        # an existing nic
9009
        if not instance.nics:
9010
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9011
                                     " no NICs" % nic_op,
9012
                                     errors.ECODE_INVAL)
9013
        if nic_op < 0 or nic_op >= len(instance.nics):
9014
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9015
                                     " are 0 to %d" %
9016
                                     (nic_op, len(instance.nics) - 1),
9017
                                     errors.ECODE_INVAL)
9018
        old_nic_params = instance.nics[nic_op].nicparams
9019
        old_nic_ip = instance.nics[nic_op].ip
9020
      else:
9021
        old_nic_params = {}
9022
        old_nic_ip = None
9023

    
9024
      update_params_dict = dict([(key, nic_dict[key])
9025
                                 for key in constants.NICS_PARAMETERS
9026
                                 if key in nic_dict])
9027

    
9028
      if 'bridge' in nic_dict:
9029
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9030

    
9031
      new_nic_params = _GetUpdatedParams(old_nic_params,
9032
                                         update_params_dict)
9033
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9034
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9035
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9036
      self.nic_pinst[nic_op] = new_nic_params
9037
      self.nic_pnew[nic_op] = new_filled_nic_params
9038
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9039

    
9040
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9041
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9042
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9043
        if msg:
9044
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9045
          if self.op.force:
9046
            self.warn.append(msg)
9047
          else:
9048
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9049
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9050
        if 'ip' in nic_dict:
9051
          nic_ip = nic_dict['ip']
9052
        else:
9053
          nic_ip = old_nic_ip
9054
        if nic_ip is None:
9055
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9056
                                     ' on a routed nic', errors.ECODE_INVAL)
9057
      if 'mac' in nic_dict:
9058
        nic_mac = nic_dict['mac']
9059
        if nic_mac is None:
9060
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9061
                                     errors.ECODE_INVAL)
9062
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9063
          # otherwise generate the mac
9064
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9065
        else:
9066
          # or validate/reserve the current one
9067
          try:
9068
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9069
          except errors.ReservationError:
9070
            raise errors.OpPrereqError("MAC address %s already in use"
9071
                                       " in cluster" % nic_mac,
9072
                                       errors.ECODE_NOTUNIQUE)
9073

    
9074
    # DISK processing
9075
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9076
      raise errors.OpPrereqError("Disk operations not supported for"
9077
                                 " diskless instances",
9078
                                 errors.ECODE_INVAL)
9079
    for disk_op, _ in self.op.disks:
9080
      if disk_op == constants.DDM_REMOVE:
9081
        if len(instance.disks) == 1:
9082
          raise errors.OpPrereqError("Cannot remove the last disk of"
9083
                                     " an instance", errors.ECODE_INVAL)
9084
        _CheckInstanceDown(self, instance, "cannot remove disks")
9085

    
9086
      if (disk_op == constants.DDM_ADD and
9087
          len(instance.nics) >= constants.MAX_DISKS):
9088
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9089
                                   " add more" % constants.MAX_DISKS,
9090
                                   errors.ECODE_STATE)
9091
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9092
        # an existing disk
9093
        if disk_op < 0 or disk_op >= len(instance.disks):
9094
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9095
                                     " are 0 to %d" %
9096
                                     (disk_op, len(instance.disks)),
9097
                                     errors.ECODE_INVAL)
9098

    
9099
    return
9100

    
9101
  def _ConvertPlainToDrbd(self, feedback_fn):
9102
    """Converts an instance from plain to drbd.
9103

9104
    """
9105
    feedback_fn("Converting template to drbd")
9106
    instance = self.instance
9107
    pnode = instance.primary_node
9108
    snode = self.op.remote_node
9109

    
9110
    # create a fake disk info for _GenerateDiskTemplate
9111
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9112
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9113
                                      instance.name, pnode, [snode],
9114
                                      disk_info, None, None, 0)
9115
    info = _GetInstanceInfoText(instance)
9116
    feedback_fn("Creating aditional volumes...")
9117
    # first, create the missing data and meta devices
9118
    for disk in new_disks:
9119
      # unfortunately this is... not too nice
9120
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9121
                            info, True)
9122
      for child in disk.children:
9123
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9124
    # at this stage, all new LVs have been created, we can rename the
9125
    # old ones
9126
    feedback_fn("Renaming original volumes...")
9127
    rename_list = [(o, n.children[0].logical_id)
9128
                   for (o, n) in zip(instance.disks, new_disks)]
9129
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9130
    result.Raise("Failed to rename original LVs")
9131

    
9132
    feedback_fn("Initializing DRBD devices...")
9133
    # all child devices are in place, we can now create the DRBD devices
9134
    for disk in new_disks:
9135
      for node in [pnode, snode]:
9136
        f_create = node == pnode
9137
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9138

    
9139
    # at this point, the instance has been modified
9140
    instance.disk_template = constants.DT_DRBD8
9141
    instance.disks = new_disks
9142
    self.cfg.Update(instance, feedback_fn)
9143

    
9144
    # disks are created, waiting for sync
9145
    disk_abort = not _WaitForSync(self, instance)
9146
    if disk_abort:
9147
      raise errors.OpExecError("There are some degraded disks for"
9148
                               " this instance, please cleanup manually")
9149

    
9150
  def _ConvertDrbdToPlain(self, feedback_fn):
9151
    """Converts an instance from drbd to plain.
9152

9153
    """
9154
    instance = self.instance
9155
    assert len(instance.secondary_nodes) == 1
9156
    pnode = instance.primary_node
9157
    snode = instance.secondary_nodes[0]
9158
    feedback_fn("Converting template to plain")
9159

    
9160
    old_disks = instance.disks
9161
    new_disks = [d.children[0] for d in old_disks]
9162

    
9163
    # copy over size and mode
9164
    for parent, child in zip(old_disks, new_disks):
9165
      child.size = parent.size
9166
      child.mode = parent.mode
9167

    
9168
    # update instance structure
9169
    instance.disks = new_disks
9170
    instance.disk_template = constants.DT_PLAIN
9171
    self.cfg.Update(instance, feedback_fn)
9172

    
9173
    feedback_fn("Removing volumes on the secondary node...")
9174
    for disk in old_disks:
9175
      self.cfg.SetDiskID(disk, snode)
9176
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9177
      if msg:
9178
        self.LogWarning("Could not remove block device %s on node %s,"
9179
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9180

    
9181
    feedback_fn("Removing unneeded volumes on the primary node...")
9182
    for idx, disk in enumerate(old_disks):
9183
      meta = disk.children[1]
9184
      self.cfg.SetDiskID(meta, pnode)
9185
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9186
      if msg:
9187
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9188
                        " continuing anyway: %s", idx, pnode, msg)
9189

    
9190

    
9191
  def Exec(self, feedback_fn):
9192
    """Modifies an instance.
9193

9194
    All parameters take effect only at the next restart of the instance.
9195

9196
    """
9197
    # Process here the warnings from CheckPrereq, as we don't have a
9198
    # feedback_fn there.
9199
    for warn in self.warn:
9200
      feedback_fn("WARNING: %s" % warn)
9201

    
9202
    result = []
9203
    instance = self.instance
9204
    # disk changes
9205
    for disk_op, disk_dict in self.op.disks:
9206
      if disk_op == constants.DDM_REMOVE:
9207
        # remove the last disk
9208
        device = instance.disks.pop()
9209
        device_idx = len(instance.disks)
9210
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9211
          self.cfg.SetDiskID(disk, node)
9212
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9213
          if msg:
9214
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9215
                            " continuing anyway", device_idx, node, msg)
9216
        result.append(("disk/%d" % device_idx, "remove"))
9217
      elif disk_op == constants.DDM_ADD:
9218
        # add a new disk
9219
        if instance.disk_template == constants.DT_FILE:
9220
          file_driver, file_path = instance.disks[0].logical_id
9221
          file_path = os.path.dirname(file_path)
9222
        else:
9223
          file_driver = file_path = None
9224
        disk_idx_base = len(instance.disks)
9225
        new_disk = _GenerateDiskTemplate(self,
9226
                                         instance.disk_template,
9227
                                         instance.name, instance.primary_node,
9228
                                         instance.secondary_nodes,
9229
                                         [disk_dict],
9230
                                         file_path,
9231
                                         file_driver,
9232
                                         disk_idx_base)[0]
9233
        instance.disks.append(new_disk)
9234
        info = _GetInstanceInfoText(instance)
9235

    
9236
        logging.info("Creating volume %s for instance %s",
9237
                     new_disk.iv_name, instance.name)
9238
        # Note: this needs to be kept in sync with _CreateDisks
9239
        #HARDCODE
9240
        for node in instance.all_nodes:
9241
          f_create = node == instance.primary_node
9242
          try:
9243
            _CreateBlockDev(self, node, instance, new_disk,
9244
                            f_create, info, f_create)
9245
          except errors.OpExecError, err:
9246
            self.LogWarning("Failed to create volume %s (%s) on"
9247
                            " node %s: %s",
9248
                            new_disk.iv_name, new_disk, node, err)
9249
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9250
                       (new_disk.size, new_disk.mode)))
9251
      else:
9252
        # change a given disk
9253
        instance.disks[disk_op].mode = disk_dict['mode']
9254
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9255

    
9256
    if self.op.disk_template:
9257
      r_shut = _ShutdownInstanceDisks(self, instance)
9258
      if not r_shut:
9259
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9260
                                 " proceed with disk template conversion")
9261
      mode = (instance.disk_template, self.op.disk_template)
9262
      try:
9263
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9264
      except:
9265
        self.cfg.ReleaseDRBDMinors(instance.name)
9266
        raise
9267
      result.append(("disk_template", self.op.disk_template))
9268

    
9269
    # NIC changes
9270
    for nic_op, nic_dict in self.op.nics:
9271
      if nic_op == constants.DDM_REMOVE:
9272
        # remove the last nic
9273
        del instance.nics[-1]
9274
        result.append(("nic.%d" % len(instance.nics), "remove"))
9275
      elif nic_op == constants.DDM_ADD:
9276
        # mac and bridge should be set, by now
9277
        mac = nic_dict['mac']
9278
        ip = nic_dict.get('ip', None)
9279
        nicparams = self.nic_pinst[constants.DDM_ADD]
9280
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9281
        instance.nics.append(new_nic)
9282
        result.append(("nic.%d" % (len(instance.nics) - 1),
9283
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9284
                       (new_nic.mac, new_nic.ip,
9285
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9286
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9287
                       )))
9288
      else:
9289
        for key in 'mac', 'ip':
9290
          if key in nic_dict:
9291
            setattr(instance.nics[nic_op], key, nic_dict[key])
9292
        if nic_op in self.nic_pinst:
9293
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9294
        for key, val in nic_dict.iteritems():
9295
          result.append(("nic.%s/%d" % (key, nic_op), val))
9296

    
9297
    # hvparams changes
9298
    if self.op.hvparams:
9299
      instance.hvparams = self.hv_inst
9300
      for key, val in self.op.hvparams.iteritems():
9301
        result.append(("hv/%s" % key, val))
9302

    
9303
    # beparams changes
9304
    if self.op.beparams:
9305
      instance.beparams = self.be_inst
9306
      for key, val in self.op.beparams.iteritems():
9307
        result.append(("be/%s" % key, val))
9308

    
9309
    # OS change
9310
    if self.op.os_name:
9311
      instance.os = self.op.os_name
9312

    
9313
    # osparams changes
9314
    if self.op.osparams:
9315
      instance.osparams = self.os_inst
9316
      for key, val in self.op.osparams.iteritems():
9317
        result.append(("os/%s" % key, val))
9318

    
9319
    self.cfg.Update(instance, feedback_fn)
9320

    
9321
    return result
9322

    
9323
  _DISK_CONVERSIONS = {
9324
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9325
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9326
    }
9327

    
9328

    
9329
class LUQueryExports(NoHooksLU):
9330
  """Query the exports list
9331

9332
  """
9333
  _OP_PARAMS = [
9334
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9335
    ("use_locking", False, _TBool),
9336
    ]
9337
  REQ_BGL = False
9338

    
9339
  def ExpandNames(self):
9340
    self.needed_locks = {}
9341
    self.share_locks[locking.LEVEL_NODE] = 1
9342
    if not self.op.nodes:
9343
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9344
    else:
9345
      self.needed_locks[locking.LEVEL_NODE] = \
9346
        _GetWantedNodes(self, self.op.nodes)
9347

    
9348
  def Exec(self, feedback_fn):
9349
    """Compute the list of all the exported system images.
9350

9351
    @rtype: dict
9352
    @return: a dictionary with the structure node->(export-list)
9353
        where export-list is a list of the instances exported on
9354
        that node.
9355

9356
    """
9357
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9358
    rpcresult = self.rpc.call_export_list(self.nodes)
9359
    result = {}
9360
    for node in rpcresult:
9361
      if rpcresult[node].fail_msg:
9362
        result[node] = False
9363
      else:
9364
        result[node] = rpcresult[node].payload
9365

    
9366
    return result
9367

    
9368

    
9369
class LUPrepareExport(NoHooksLU):
9370
  """Prepares an instance for an export and returns useful information.
9371

9372
  """
9373
  _OP_PARAMS = [
9374
    _PInstanceName,
9375
    ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9376
    ]
9377
  REQ_BGL = False
9378

    
9379
  def ExpandNames(self):
9380
    self._ExpandAndLockInstance()
9381

    
9382
  def CheckPrereq(self):
9383
    """Check prerequisites.
9384

9385
    """
9386
    instance_name = self.op.instance_name
9387

    
9388
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9389
    assert self.instance is not None, \
9390
          "Cannot retrieve locked instance %s" % self.op.instance_name
9391
    _CheckNodeOnline(self, self.instance.primary_node)
9392

    
9393
    self._cds = _GetClusterDomainSecret()
9394

    
9395
  def Exec(self, feedback_fn):
9396
    """Prepares an instance for an export.
9397

9398
    """
9399
    instance = self.instance
9400

    
9401
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9402
      salt = utils.GenerateSecret(8)
9403

    
9404
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9405
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9406
                                              constants.RIE_CERT_VALIDITY)
9407
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9408

    
9409
      (name, cert_pem) = result.payload
9410

    
9411
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9412
                                             cert_pem)
9413

    
9414
      return {
9415
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9416
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9417
                          salt),
9418
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9419
        }
9420

    
9421
    return None
9422

    
9423

    
9424
class LUExportInstance(LogicalUnit):
9425
  """Export an instance to an image in the cluster.
9426

9427
  """
9428
  HPATH = "instance-export"
9429
  HTYPE = constants.HTYPE_INSTANCE
9430
  _OP_PARAMS = [
9431
    _PInstanceName,
9432
    ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9433
    ("shutdown", True, _TBool),
9434
    _PShutdownTimeout,
9435
    ("remove_instance", False, _TBool),
9436
    ("ignore_remove_failures", False, _TBool),
9437
    ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9438
    ("x509_key_name", None, _TOr(_TList, _TNone)),
9439
    ("destination_x509_ca", None, _TMaybeString),
9440
    ]
9441
  REQ_BGL = False
9442

    
9443
  def CheckArguments(self):
9444
    """Check the arguments.
9445

9446
    """
9447
    self.x509_key_name = self.op.x509_key_name
9448
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9449

    
9450
    if self.op.remove_instance and not self.op.shutdown:
9451
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9452
                                 " down before")
9453

    
9454
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9455
      if not self.x509_key_name:
9456
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9457
                                   errors.ECODE_INVAL)
9458

    
9459
      if not self.dest_x509_ca_pem:
9460
        raise errors.OpPrereqError("Missing destination X509 CA",
9461
                                   errors.ECODE_INVAL)
9462

    
9463
  def ExpandNames(self):
9464
    self._ExpandAndLockInstance()
9465

    
9466
    # Lock all nodes for local exports
9467
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9468
      # FIXME: lock only instance primary and destination node
9469
      #
9470
      # Sad but true, for now we have do lock all nodes, as we don't know where
9471
      # the previous export might be, and in this LU we search for it and
9472
      # remove it from its current node. In the future we could fix this by:
9473
      #  - making a tasklet to search (share-lock all), then create the
9474
      #    new one, then one to remove, after
9475
      #  - removing the removal operation altogether
9476
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9477

    
9478
  def DeclareLocks(self, level):
9479
    """Last minute lock declaration."""
9480
    # All nodes are locked anyway, so nothing to do here.
9481

    
9482
  def BuildHooksEnv(self):
9483
    """Build hooks env.
9484

9485
    This will run on the master, primary node and target node.
9486

9487
    """
9488
    env = {
9489
      "EXPORT_MODE": self.op.mode,
9490
      "EXPORT_NODE": self.op.target_node,
9491
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9492
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9493
      # TODO: Generic function for boolean env variables
9494
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9495
      }
9496

    
9497
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9498

    
9499
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9500

    
9501
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9502
      nl.append(self.op.target_node)
9503

    
9504
    return env, nl, nl
9505

    
9506
  def CheckPrereq(self):
9507
    """Check prerequisites.
9508

9509
    This checks that the instance and node names are valid.
9510

9511
    """
9512
    instance_name = self.op.instance_name
9513

    
9514
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9515
    assert self.instance is not None, \
9516
          "Cannot retrieve locked instance %s" % self.op.instance_name
9517
    _CheckNodeOnline(self, self.instance.primary_node)
9518

    
9519
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9520
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9521
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9522
      assert self.dst_node is not None
9523

    
9524
      _CheckNodeOnline(self, self.dst_node.name)
9525
      _CheckNodeNotDrained(self, self.dst_node.name)
9526

    
9527
      self._cds = None
9528
      self.dest_disk_info = None
9529
      self.dest_x509_ca = None
9530

    
9531
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9532
      self.dst_node = None
9533

    
9534
      if len(self.op.target_node) != len(self.instance.disks):
9535
        raise errors.OpPrereqError(("Received destination information for %s"
9536
                                    " disks, but instance %s has %s disks") %
9537
                                   (len(self.op.target_node), instance_name,
9538
                                    len(self.instance.disks)),
9539
                                   errors.ECODE_INVAL)
9540

    
9541
      cds = _GetClusterDomainSecret()
9542

    
9543
      # Check X509 key name
9544
      try:
9545
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9546
      except (TypeError, ValueError), err:
9547
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9548

    
9549
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9550
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9551
                                   errors.ECODE_INVAL)
9552

    
9553
      # Load and verify CA
9554
      try:
9555
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9556
      except OpenSSL.crypto.Error, err:
9557
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9558
                                   (err, ), errors.ECODE_INVAL)
9559

    
9560
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9561
      if errcode is not None:
9562
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9563
                                   (msg, ), errors.ECODE_INVAL)
9564

    
9565
      self.dest_x509_ca = cert
9566

    
9567
      # Verify target information
9568
      disk_info = []
9569
      for idx, disk_data in enumerate(self.op.target_node):
9570
        try:
9571
          (host, port, magic) = \
9572
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9573
        except errors.GenericError, err:
9574
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9575
                                     (idx, err), errors.ECODE_INVAL)
9576

    
9577
        disk_info.append((host, port, magic))
9578

    
9579
      assert len(disk_info) == len(self.op.target_node)
9580
      self.dest_disk_info = disk_info
9581

    
9582
    else:
9583
      raise errors.ProgrammerError("Unhandled export mode %r" %
9584
                                   self.op.mode)
9585

    
9586
    # instance disk type verification
9587
    # TODO: Implement export support for file-based disks
9588
    for disk in self.instance.disks:
9589
      if disk.dev_type == constants.LD_FILE:
9590
        raise errors.OpPrereqError("Export not supported for instances with"
9591
                                   " file-based disks", errors.ECODE_INVAL)
9592

    
9593
  def _CleanupExports(self, feedback_fn):
9594
    """Removes exports of current instance from all other nodes.
9595

9596
    If an instance in a cluster with nodes A..D was exported to node C, its
9597
    exports will be removed from the nodes A, B and D.
9598

9599
    """
9600
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9601

    
9602
    nodelist = self.cfg.GetNodeList()
9603
    nodelist.remove(self.dst_node.name)
9604

    
9605
    # on one-node clusters nodelist will be empty after the removal
9606
    # if we proceed the backup would be removed because OpQueryExports
9607
    # substitutes an empty list with the full cluster node list.
9608
    iname = self.instance.name
9609
    if nodelist:
9610
      feedback_fn("Removing old exports for instance %s" % iname)
9611
      exportlist = self.rpc.call_export_list(nodelist)
9612
      for node in exportlist:
9613
        if exportlist[node].fail_msg:
9614
          continue
9615
        if iname in exportlist[node].payload:
9616
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9617
          if msg:
9618
            self.LogWarning("Could not remove older export for instance %s"
9619
                            " on node %s: %s", iname, node, msg)
9620

    
9621
  def Exec(self, feedback_fn):
9622
    """Export an instance to an image in the cluster.
9623

9624
    """
9625
    assert self.op.mode in constants.EXPORT_MODES
9626

    
9627
    instance = self.instance
9628
    src_node = instance.primary_node
9629

    
9630
    if self.op.shutdown:
9631
      # shutdown the instance, but not the disks
9632
      feedback_fn("Shutting down instance %s" % instance.name)
9633
      result = self.rpc.call_instance_shutdown(src_node, instance,
9634
                                               self.op.shutdown_timeout)
9635
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9636
      result.Raise("Could not shutdown instance %s on"
9637
                   " node %s" % (instance.name, src_node))
9638

    
9639
    # set the disks ID correctly since call_instance_start needs the
9640
    # correct drbd minor to create the symlinks
9641
    for disk in instance.disks:
9642
      self.cfg.SetDiskID(disk, src_node)
9643

    
9644
    activate_disks = (not instance.admin_up)
9645

    
9646
    if activate_disks:
9647
      # Activate the instance disks if we'exporting a stopped instance
9648
      feedback_fn("Activating disks for %s" % instance.name)
9649
      _StartInstanceDisks(self, instance, None)
9650

    
9651
    try:
9652
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9653
                                                     instance)
9654

    
9655
      helper.CreateSnapshots()
9656
      try:
9657
        if (self.op.shutdown and instance.admin_up and
9658
            not self.op.remove_instance):
9659
          assert not activate_disks
9660
          feedback_fn("Starting instance %s" % instance.name)
9661
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9662
          msg = result.fail_msg
9663
          if msg:
9664
            feedback_fn("Failed to start instance: %s" % msg)
9665
            _ShutdownInstanceDisks(self, instance)
9666
            raise errors.OpExecError("Could not start instance: %s" % msg)
9667

    
9668
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9669
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9670
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9671
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9672
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9673

    
9674
          (key_name, _, _) = self.x509_key_name
9675

    
9676
          dest_ca_pem = \
9677
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9678
                                            self.dest_x509_ca)
9679

    
9680
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9681
                                                     key_name, dest_ca_pem,
9682
                                                     timeouts)
9683
      finally:
9684
        helper.Cleanup()
9685

    
9686
      # Check for backwards compatibility
9687
      assert len(dresults) == len(instance.disks)
9688
      assert compat.all(isinstance(i, bool) for i in dresults), \
9689
             "Not all results are boolean: %r" % dresults
9690

    
9691
    finally:
9692
      if activate_disks:
9693
        feedback_fn("Deactivating disks for %s" % instance.name)
9694
        _ShutdownInstanceDisks(self, instance)
9695

    
9696
    if not (compat.all(dresults) and fin_resu):
9697
      failures = []
9698
      if not fin_resu:
9699
        failures.append("export finalization")
9700
      if not compat.all(dresults):
9701
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9702
                               if not dsk)
9703
        failures.append("disk export: disk(s) %s" % fdsk)
9704

    
9705
      raise errors.OpExecError("Export failed, errors in %s" %
9706
                               utils.CommaJoin(failures))
9707

    
9708
    # At this point, the export was successful, we can cleanup/finish
9709

    
9710
    # Remove instance if requested
9711
    if self.op.remove_instance:
9712
      feedback_fn("Removing instance %s" % instance.name)
9713
      _RemoveInstance(self, feedback_fn, instance,
9714
                      self.op.ignore_remove_failures)
9715

    
9716
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9717
      self._CleanupExports(feedback_fn)
9718

    
9719
    return fin_resu, dresults
9720

    
9721

    
9722
class LURemoveExport(NoHooksLU):
9723
  """Remove exports related to the named instance.
9724

9725
  """
9726
  _OP_PARAMS = [
9727
    _PInstanceName,
9728
    ]
9729
  REQ_BGL = False
9730

    
9731
  def ExpandNames(self):
9732
    self.needed_locks = {}
9733
    # We need all nodes to be locked in order for RemoveExport to work, but we
9734
    # don't need to lock the instance itself, as nothing will happen to it (and
9735
    # we can remove exports also for a removed instance)
9736
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9737

    
9738
  def Exec(self, feedback_fn):
9739
    """Remove any export.
9740

9741
    """
9742
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9743
    # If the instance was not found we'll try with the name that was passed in.
9744
    # This will only work if it was an FQDN, though.
9745
    fqdn_warn = False
9746
    if not instance_name:
9747
      fqdn_warn = True
9748
      instance_name = self.op.instance_name
9749

    
9750
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9751
    exportlist = self.rpc.call_export_list(locked_nodes)
9752
    found = False
9753
    for node in exportlist:
9754
      msg = exportlist[node].fail_msg
9755
      if msg:
9756
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9757
        continue
9758
      if instance_name in exportlist[node].payload:
9759
        found = True
9760
        result = self.rpc.call_export_remove(node, instance_name)
9761
        msg = result.fail_msg
9762
        if msg:
9763
          logging.error("Could not remove export for instance %s"
9764
                        " on node %s: %s", instance_name, node, msg)
9765

    
9766
    if fqdn_warn and not found:
9767
      feedback_fn("Export not found. If trying to remove an export belonging"
9768
                  " to a deleted instance please use its Fully Qualified"
9769
                  " Domain Name.")
9770

    
9771

    
9772
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9773
  """Generic tags LU.
9774

9775
  This is an abstract class which is the parent of all the other tags LUs.
9776

9777
  """
9778

    
9779
  def ExpandNames(self):
9780
    self.needed_locks = {}
9781
    if self.op.kind == constants.TAG_NODE:
9782
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9783
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9784
    elif self.op.kind == constants.TAG_INSTANCE:
9785
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9786
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9787

    
9788
  def CheckPrereq(self):
9789
    """Check prerequisites.
9790

9791
    """
9792
    if self.op.kind == constants.TAG_CLUSTER:
9793
      self.target = self.cfg.GetClusterInfo()
9794
    elif self.op.kind == constants.TAG_NODE:
9795
      self.target = self.cfg.GetNodeInfo(self.op.name)
9796
    elif self.op.kind == constants.TAG_INSTANCE:
9797
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9798
    else:
9799
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9800
                                 str(self.op.kind), errors.ECODE_INVAL)
9801

    
9802

    
9803
class LUGetTags(TagsLU):
9804
  """Returns the tags of a given object.
9805

9806
  """
9807
  _OP_PARAMS = [
9808
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9809
    # Name is only meaningful for nodes and instances
9810
    ("name", _NoDefault, _TMaybeString),
9811
    ]
9812
  REQ_BGL = False
9813

    
9814
  def Exec(self, feedback_fn):
9815
    """Returns the tag list.
9816

9817
    """
9818
    return list(self.target.GetTags())
9819

    
9820

    
9821
class LUSearchTags(NoHooksLU):
9822
  """Searches the tags for a given pattern.
9823

9824
  """
9825
  _OP_PARAMS = [
9826
    ("pattern", _NoDefault, _TNonEmptyString),
9827
    ]
9828
  REQ_BGL = False
9829

    
9830
  def ExpandNames(self):
9831
    self.needed_locks = {}
9832

    
9833
  def CheckPrereq(self):
9834
    """Check prerequisites.
9835

9836
    This checks the pattern passed for validity by compiling it.
9837

9838
    """
9839
    try:
9840
      self.re = re.compile(self.op.pattern)
9841
    except re.error, err:
9842
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9843
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9844

    
9845
  def Exec(self, feedback_fn):
9846
    """Returns the tag list.
9847

9848
    """
9849
    cfg = self.cfg
9850
    tgts = [("/cluster", cfg.GetClusterInfo())]
9851
    ilist = cfg.GetAllInstancesInfo().values()
9852
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9853
    nlist = cfg.GetAllNodesInfo().values()
9854
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9855
    results = []
9856
    for path, target in tgts:
9857
      for tag in target.GetTags():
9858
        if self.re.search(tag):
9859
          results.append((path, tag))
9860
    return results
9861

    
9862

    
9863
class LUAddTags(TagsLU):
9864
  """Sets a tag on a given object.
9865

9866
  """
9867
  _OP_PARAMS = [
9868
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9869
    # Name is only meaningful for nodes and instances
9870
    ("name", _NoDefault, _TMaybeString),
9871
    ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9872
    ]
9873
  REQ_BGL = False
9874

    
9875
  def CheckPrereq(self):
9876
    """Check prerequisites.
9877

9878
    This checks the type and length of the tag name and value.
9879

9880
    """
9881
    TagsLU.CheckPrereq(self)
9882
    for tag in self.op.tags:
9883
      objects.TaggableObject.ValidateTag(tag)
9884

    
9885
  def Exec(self, feedback_fn):
9886
    """Sets the tag.
9887

9888
    """
9889
    try:
9890
      for tag in self.op.tags:
9891
        self.target.AddTag(tag)
9892
    except errors.TagError, err:
9893
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9894
    self.cfg.Update(self.target, feedback_fn)
9895

    
9896

    
9897
class LUDelTags(TagsLU):
9898
  """Delete a list of tags from a given object.
9899

9900
  """
9901
  _OP_PARAMS = [
9902
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9903
    # Name is only meaningful for nodes and instances
9904
    ("name", _NoDefault, _TMaybeString),
9905
    ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9906
    ]
9907
  REQ_BGL = False
9908

    
9909
  def CheckPrereq(self):
9910
    """Check prerequisites.
9911

9912
    This checks that we have the given tag.
9913

9914
    """
9915
    TagsLU.CheckPrereq(self)
9916
    for tag in self.op.tags:
9917
      objects.TaggableObject.ValidateTag(tag)
9918
    del_tags = frozenset(self.op.tags)
9919
    cur_tags = self.target.GetTags()
9920
    if not del_tags <= cur_tags:
9921
      diff_tags = del_tags - cur_tags
9922
      diff_names = ["'%s'" % tag for tag in diff_tags]
9923
      diff_names.sort()
9924
      raise errors.OpPrereqError("Tag(s) %s not found" %
9925
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9926

    
9927
  def Exec(self, feedback_fn):
9928
    """Remove the tag from the object.
9929

9930
    """
9931
    for tag in self.op.tags:
9932
      self.target.RemoveTag(tag)
9933
    self.cfg.Update(self.target, feedback_fn)
9934

    
9935

    
9936
class LUTestDelay(NoHooksLU):
9937
  """Sleep for a specified amount of time.
9938

9939
  This LU sleeps on the master and/or nodes for a specified amount of
9940
  time.
9941

9942
  """
9943
  _OP_PARAMS = [
9944
    ("duration", _NoDefault, _TFloat),
9945
    ("on_master", True, _TBool),
9946
    ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9947
    ("repeat", 0, _TPositiveInt)
9948
    ]
9949
  REQ_BGL = False
9950

    
9951
  def ExpandNames(self):
9952
    """Expand names and set required locks.
9953

9954
    This expands the node list, if any.
9955

9956
    """
9957
    self.needed_locks = {}
9958
    if self.op.on_nodes:
9959
      # _GetWantedNodes can be used here, but is not always appropriate to use
9960
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9961
      # more information.
9962
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9963
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9964

    
9965
  def _TestDelay(self):
9966
    """Do the actual sleep.
9967

9968
    """
9969
    if self.op.on_master:
9970
      if not utils.TestDelay(self.op.duration):
9971
        raise errors.OpExecError("Error during master delay test")
9972
    if self.op.on_nodes:
9973
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9974
      for node, node_result in result.items():
9975
        node_result.Raise("Failure during rpc call to node %s" % node)
9976

    
9977
  def Exec(self, feedback_fn):
9978
    """Execute the test delay opcode, with the wanted repetitions.
9979

9980
    """
9981
    if self.op.repeat == 0:
9982
      self._TestDelay()
9983
    else:
9984
      top_value = self.op.repeat - 1
9985
      for i in range(self.op.repeat):
9986
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9987
        self._TestDelay()
9988

    
9989

    
9990
class LUTestJobqueue(NoHooksLU):
9991
  """Utility LU to test some aspects of the job queue.
9992

9993
  """
9994
  _OP_PARAMS = [
9995
    ("notify_waitlock", False, _TBool),
9996
    ("notify_exec", False, _TBool),
9997
    ("log_messages", _EmptyList, _TListOf(_TString)),
9998
    ("fail", False, _TBool),
9999
    ]
10000
  REQ_BGL = False
10001

    
10002
  # Must be lower than default timeout for WaitForJobChange to see whether it
10003
  # notices changed jobs
10004
  _CLIENT_CONNECT_TIMEOUT = 20.0
10005
  _CLIENT_CONFIRM_TIMEOUT = 60.0
10006

    
10007
  @classmethod
10008
  def _NotifyUsingSocket(cls, cb, errcls):
10009
    """Opens a Unix socket and waits for another program to connect.
10010

10011
    @type cb: callable
10012
    @param cb: Callback to send socket name to client
10013
    @type errcls: class
10014
    @param errcls: Exception class to use for errors
10015

10016
    """
10017
    # Using a temporary directory as there's no easy way to create temporary
10018
    # sockets without writing a custom loop around tempfile.mktemp and
10019
    # socket.bind
10020
    tmpdir = tempfile.mkdtemp()
10021
    try:
10022
      tmpsock = utils.PathJoin(tmpdir, "sock")
10023

    
10024
      logging.debug("Creating temporary socket at %s", tmpsock)
10025
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10026
      try:
10027
        sock.bind(tmpsock)
10028
        sock.listen(1)
10029

    
10030
        # Send details to client
10031
        cb(tmpsock)
10032

    
10033
        # Wait for client to connect before continuing
10034
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10035
        try:
10036
          (conn, _) = sock.accept()
10037
        except socket.error, err:
10038
          raise errcls("Client didn't connect in time (%s)" % err)
10039
      finally:
10040
        sock.close()
10041
    finally:
10042
      # Remove as soon as client is connected
10043
      shutil.rmtree(tmpdir)
10044

    
10045
    # Wait for client to close
10046
    try:
10047
      try:
10048
        # pylint: disable-msg=E1101
10049
        # Instance of '_socketobject' has no ... member
10050
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10051
        conn.recv(1)
10052
      except socket.error, err:
10053
        raise errcls("Client failed to confirm notification (%s)" % err)
10054
    finally:
10055
      conn.close()
10056

    
10057
  def _SendNotification(self, test, arg, sockname):
10058
    """Sends a notification to the client.
10059

10060
    @type test: string
10061
    @param test: Test name
10062
    @param arg: Test argument (depends on test)
10063
    @type sockname: string
10064
    @param sockname: Socket path
10065

10066
    """
10067
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10068

    
10069
  def _Notify(self, prereq, test, arg):
10070
    """Notifies the client of a test.
10071

10072
    @type prereq: bool
10073
    @param prereq: Whether this is a prereq-phase test
10074
    @type test: string
10075
    @param test: Test name
10076
    @param arg: Test argument (depends on test)
10077

10078
    """
10079
    if prereq:
10080
      errcls = errors.OpPrereqError
10081
    else:
10082
      errcls = errors.OpExecError
10083

    
10084
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10085
                                                  test, arg),
10086
                                   errcls)
10087

    
10088
  def CheckArguments(self):
10089
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10090
    self.expandnames_calls = 0
10091

    
10092
  def ExpandNames(self):
10093
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10094
    if checkargs_calls < 1:
10095
      raise errors.ProgrammerError("CheckArguments was not called")
10096

    
10097
    self.expandnames_calls += 1
10098

    
10099
    if self.op.notify_waitlock:
10100
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10101

    
10102
    self.LogInfo("Expanding names")
10103

    
10104
    # Get lock on master node (just to get a lock, not for a particular reason)
10105
    self.needed_locks = {
10106
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10107
      }
10108

    
10109
  def Exec(self, feedback_fn):
10110
    if self.expandnames_calls < 1:
10111
      raise errors.ProgrammerError("ExpandNames was not called")
10112

    
10113
    if self.op.notify_exec:
10114
      self._Notify(False, constants.JQT_EXEC, None)
10115

    
10116
    self.LogInfo("Executing")
10117

    
10118
    if self.op.log_messages:
10119
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10120
      for idx, msg in enumerate(self.op.log_messages):
10121
        self.LogInfo("Sending log message %s", idx + 1)
10122
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10123
        # Report how many test messages have been sent
10124
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10125

    
10126
    if self.op.fail:
10127
      raise errors.OpExecError("Opcode failure was requested")
10128

    
10129
    return True
10130

    
10131

    
10132
class IAllocator(object):
10133
  """IAllocator framework.
10134

10135
  An IAllocator instance has three sets of attributes:
10136
    - cfg that is needed to query the cluster
10137
    - input data (all members of the _KEYS class attribute are required)
10138
    - four buffer attributes (in|out_data|text), that represent the
10139
      input (to the external script) in text and data structure format,
10140
      and the output from it, again in two formats
10141
    - the result variables from the script (success, info, nodes) for
10142
      easy usage
10143

10144
  """
10145
  # pylint: disable-msg=R0902
10146
  # lots of instance attributes
10147
  _ALLO_KEYS = [
10148
    "name", "mem_size", "disks", "disk_template",
10149
    "os", "tags", "nics", "vcpus", "hypervisor",
10150
    ]
10151
  _RELO_KEYS = [
10152
    "name", "relocate_from",
10153
    ]
10154
  _EVAC_KEYS = [
10155
    "evac_nodes",
10156
    ]
10157

    
10158
  def __init__(self, cfg, rpc, mode, **kwargs):
10159
    self.cfg = cfg
10160
    self.rpc = rpc
10161
    # init buffer variables
10162
    self.in_text = self.out_text = self.in_data = self.out_data = None
10163
    # init all input fields so that pylint is happy
10164
    self.mode = mode
10165
    self.mem_size = self.disks = self.disk_template = None
10166
    self.os = self.tags = self.nics = self.vcpus = None
10167
    self.hypervisor = None
10168
    self.relocate_from = None
10169
    self.name = None
10170
    self.evac_nodes = None
10171
    # computed fields
10172
    self.required_nodes = None
10173
    # init result fields
10174
    self.success = self.info = self.result = None
10175
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10176
      keyset = self._ALLO_KEYS
10177
      fn = self._AddNewInstance
10178
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10179
      keyset = self._RELO_KEYS
10180
      fn = self._AddRelocateInstance
10181
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10182
      keyset = self._EVAC_KEYS
10183
      fn = self._AddEvacuateNodes
10184
    else:
10185
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10186
                                   " IAllocator" % self.mode)
10187
    for key in kwargs:
10188
      if key not in keyset:
10189
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
10190
                                     " IAllocator" % key)
10191
      setattr(self, key, kwargs[key])
10192

    
10193
    for key in keyset:
10194
      if key not in kwargs:
10195
        raise errors.ProgrammerError("Missing input parameter '%s' to"
10196
                                     " IAllocator" % key)
10197
    self._BuildInputData(fn)
10198

    
10199
  def _ComputeClusterData(self):
10200
    """Compute the generic allocator input data.
10201

10202
    This is the data that is independent of the actual operation.
10203

10204
    """
10205
    cfg = self.cfg
10206
    cluster_info = cfg.GetClusterInfo()
10207
    # cluster data
10208
    data = {
10209
      "version": constants.IALLOCATOR_VERSION,
10210
      "cluster_name": cfg.GetClusterName(),
10211
      "cluster_tags": list(cluster_info.GetTags()),
10212
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10213
      # we don't have job IDs
10214
      }
10215
    iinfo = cfg.GetAllInstancesInfo().values()
10216
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10217

    
10218
    # node data
10219
    node_results = {}
10220
    node_list = cfg.GetNodeList()
10221

    
10222
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10223
      hypervisor_name = self.hypervisor
10224
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10225
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10226
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10227
      hypervisor_name = cluster_info.enabled_hypervisors[0]
10228

    
10229
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10230
                                        hypervisor_name)
10231
    node_iinfo = \
10232
      self.rpc.call_all_instances_info(node_list,
10233
                                       cluster_info.enabled_hypervisors)
10234
    for nname, nresult in node_data.items():
10235
      # first fill in static (config-based) values
10236
      ninfo = cfg.GetNodeInfo(nname)
10237
      pnr = {
10238
        "tags": list(ninfo.GetTags()),
10239
        "primary_ip": ninfo.primary_ip,
10240
        "secondary_ip": ninfo.secondary_ip,
10241
        "offline": ninfo.offline,
10242
        "drained": ninfo.drained,
10243
        "master_candidate": ninfo.master_candidate,
10244
        }
10245

    
10246
      if not (ninfo.offline or ninfo.drained):
10247
        nresult.Raise("Can't get data for node %s" % nname)
10248
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10249
                                nname)
10250
        remote_info = nresult.payload
10251

    
10252
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
10253
                     'vg_size', 'vg_free', 'cpu_total']:
10254
          if attr not in remote_info:
10255
            raise errors.OpExecError("Node '%s' didn't return attribute"
10256
                                     " '%s'" % (nname, attr))
10257
          if not isinstance(remote_info[attr], int):
10258
            raise errors.OpExecError("Node '%s' returned invalid value"
10259
                                     " for '%s': %s" %
10260
                                     (nname, attr, remote_info[attr]))
10261
        # compute memory used by primary instances
10262
        i_p_mem = i_p_up_mem = 0
10263
        for iinfo, beinfo in i_list:
10264
          if iinfo.primary_node == nname:
10265
            i_p_mem += beinfo[constants.BE_MEMORY]
10266
            if iinfo.name not in node_iinfo[nname].payload:
10267
              i_used_mem = 0
10268
            else:
10269
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10270
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10271
            remote_info['memory_free'] -= max(0, i_mem_diff)
10272

    
10273
            if iinfo.admin_up:
10274
              i_p_up_mem += beinfo[constants.BE_MEMORY]
10275

    
10276
        # compute memory used by instances
10277
        pnr_dyn = {
10278
          "total_memory": remote_info['memory_total'],
10279
          "reserved_memory": remote_info['memory_dom0'],
10280
          "free_memory": remote_info['memory_free'],
10281
          "total_disk": remote_info['vg_size'],
10282
          "free_disk": remote_info['vg_free'],
10283
          "total_cpus": remote_info['cpu_total'],
10284
          "i_pri_memory": i_p_mem,
10285
          "i_pri_up_memory": i_p_up_mem,
10286
          }
10287
        pnr.update(pnr_dyn)
10288

    
10289
      node_results[nname] = pnr
10290
    data["nodes"] = node_results
10291

    
10292
    # instance data
10293
    instance_data = {}
10294
    for iinfo, beinfo in i_list:
10295
      nic_data = []
10296
      for nic in iinfo.nics:
10297
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10298
        nic_dict = {"mac": nic.mac,
10299
                    "ip": nic.ip,
10300
                    "mode": filled_params[constants.NIC_MODE],
10301
                    "link": filled_params[constants.NIC_LINK],
10302
                   }
10303
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10304
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10305
        nic_data.append(nic_dict)
10306
      pir = {
10307
        "tags": list(iinfo.GetTags()),
10308
        "admin_up": iinfo.admin_up,
10309
        "vcpus": beinfo[constants.BE_VCPUS],
10310
        "memory": beinfo[constants.BE_MEMORY],
10311
        "os": iinfo.os,
10312
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10313
        "nics": nic_data,
10314
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10315
        "disk_template": iinfo.disk_template,
10316
        "hypervisor": iinfo.hypervisor,
10317
        }
10318
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10319
                                                 pir["disks"])
10320
      instance_data[iinfo.name] = pir
10321

    
10322
    data["instances"] = instance_data
10323

    
10324
    self.in_data = data
10325

    
10326
  def _AddNewInstance(self):
10327
    """Add new instance data to allocator structure.
10328

10329
    This in combination with _AllocatorGetClusterData will create the
10330
    correct structure needed as input for the allocator.
10331

10332
    The checks for the completeness of the opcode must have already been
10333
    done.
10334

10335
    """
10336
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10337

    
10338
    if self.disk_template in constants.DTS_NET_MIRROR:
10339
      self.required_nodes = 2
10340
    else:
10341
      self.required_nodes = 1
10342
    request = {
10343
      "name": self.name,
10344
      "disk_template": self.disk_template,
10345
      "tags": self.tags,
10346
      "os": self.os,
10347
      "vcpus": self.vcpus,
10348
      "memory": self.mem_size,
10349
      "disks": self.disks,
10350
      "disk_space_total": disk_space,
10351
      "nics": self.nics,
10352
      "required_nodes": self.required_nodes,
10353
      }
10354
    return request
10355

    
10356
  def _AddRelocateInstance(self):
10357
    """Add relocate instance data to allocator structure.
10358

10359
    This in combination with _IAllocatorGetClusterData will create the
10360
    correct structure needed as input for the allocator.
10361

10362
    The checks for the completeness of the opcode must have already been
10363
    done.
10364

10365
    """
10366
    instance = self.cfg.GetInstanceInfo(self.name)
10367
    if instance is None:
10368
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
10369
                                   " IAllocator" % self.name)
10370

    
10371
    if instance.disk_template not in constants.DTS_NET_MIRROR:
10372
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10373
                                 errors.ECODE_INVAL)
10374

    
10375
    if len(instance.secondary_nodes) != 1:
10376
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
10377
                                 errors.ECODE_STATE)
10378

    
10379
    self.required_nodes = 1
10380
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
10381
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10382

    
10383
    request = {
10384
      "name": self.name,
10385
      "disk_space_total": disk_space,
10386
      "required_nodes": self.required_nodes,
10387
      "relocate_from": self.relocate_from,
10388
      }
10389
    return request
10390

    
10391
  def _AddEvacuateNodes(self):
10392
    """Add evacuate nodes data to allocator structure.
10393

10394
    """
10395
    request = {
10396
      "evac_nodes": self.evac_nodes
10397
      }
10398
    return request
10399

    
10400
  def _BuildInputData(self, fn):
10401
    """Build input data structures.
10402

10403
    """
10404
    self._ComputeClusterData()
10405

    
10406
    request = fn()
10407
    request["type"] = self.mode
10408
    self.in_data["request"] = request
10409

    
10410
    self.in_text = serializer.Dump(self.in_data)
10411

    
10412
  def Run(self, name, validate=True, call_fn=None):
10413
    """Run an instance allocator and return the results.
10414

10415
    """
10416
    if call_fn is None:
10417
      call_fn = self.rpc.call_iallocator_runner
10418

    
10419
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10420
    result.Raise("Failure while running the iallocator script")
10421

    
10422
    self.out_text = result.payload
10423
    if validate:
10424
      self._ValidateResult()
10425

    
10426
  def _ValidateResult(self):
10427
    """Process the allocator results.
10428

10429
    This will process and if successful save the result in
10430
    self.out_data and the other parameters.
10431

10432
    """
10433
    try:
10434
      rdict = serializer.Load(self.out_text)
10435
    except Exception, err:
10436
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10437

    
10438
    if not isinstance(rdict, dict):
10439
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10440

    
10441
    # TODO: remove backwards compatiblity in later versions
10442
    if "nodes" in rdict and "result" not in rdict:
10443
      rdict["result"] = rdict["nodes"]
10444
      del rdict["nodes"]
10445

    
10446
    for key in "success", "info", "result":
10447
      if key not in rdict:
10448
        raise errors.OpExecError("Can't parse iallocator results:"
10449
                                 " missing key '%s'" % key)
10450
      setattr(self, key, rdict[key])
10451

    
10452
    if not isinstance(rdict["result"], list):
10453
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10454
                               " is not a list")
10455
    self.out_data = rdict
10456

    
10457

    
10458
class LUTestAllocator(NoHooksLU):
10459
  """Run allocator tests.
10460

10461
  This LU runs the allocator tests
10462

10463
  """
10464
  _OP_PARAMS = [
10465
    ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10466
    ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10467
    ("name", _NoDefault, _TNonEmptyString),
10468
    ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10469
      _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10470
               _TOr(_TNone, _TNonEmptyString))))),
10471
    ("disks", _NoDefault, _TOr(_TNone, _TList)),
10472
    ("hypervisor", None, _TMaybeString),
10473
    ("allocator", None, _TMaybeString),
10474
    ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10475
    ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10476
    ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10477
    ("os", None, _TMaybeString),
10478
    ("disk_template", None, _TMaybeString),
10479
    ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10480
    ]
10481

    
10482
  def CheckPrereq(self):
10483
    """Check prerequisites.
10484

10485
    This checks the opcode parameters depending on the director and mode test.
10486

10487
    """
10488
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10489
      for attr in ["mem_size", "disks", "disk_template",
10490
                   "os", "tags", "nics", "vcpus"]:
10491
        if not hasattr(self.op, attr):
10492
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10493
                                     attr, errors.ECODE_INVAL)
10494
      iname = self.cfg.ExpandInstanceName(self.op.name)
10495
      if iname is not None:
10496
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10497
                                   iname, errors.ECODE_EXISTS)
10498
      if not isinstance(self.op.nics, list):
10499
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10500
                                   errors.ECODE_INVAL)
10501
      if not isinstance(self.op.disks, list):
10502
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10503
                                   errors.ECODE_INVAL)
10504
      for row in self.op.disks:
10505
        if (not isinstance(row, dict) or
10506
            "size" not in row or
10507
            not isinstance(row["size"], int) or
10508
            "mode" not in row or
10509
            row["mode"] not in ['r', 'w']):
10510
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10511
                                     " parameter", errors.ECODE_INVAL)
10512
      if self.op.hypervisor is None:
10513
        self.op.hypervisor = self.cfg.GetHypervisorType()
10514
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10515
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10516
      self.op.name = fname
10517
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10518
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10519
      if not hasattr(self.op, "evac_nodes"):
10520
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10521
                                   " opcode input", errors.ECODE_INVAL)
10522
    else:
10523
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10524
                                 self.op.mode, errors.ECODE_INVAL)
10525

    
10526
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10527
      if self.op.allocator is None:
10528
        raise errors.OpPrereqError("Missing allocator name",
10529
                                   errors.ECODE_INVAL)
10530
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10531
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10532
                                 self.op.direction, errors.ECODE_INVAL)
10533

    
10534
  def Exec(self, feedback_fn):
10535
    """Run the allocator test.
10536

10537
    """
10538
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10539
      ial = IAllocator(self.cfg, self.rpc,
10540
                       mode=self.op.mode,
10541
                       name=self.op.name,
10542
                       mem_size=self.op.mem_size,
10543
                       disks=self.op.disks,
10544
                       disk_template=self.op.disk_template,
10545
                       os=self.op.os,
10546
                       tags=self.op.tags,
10547
                       nics=self.op.nics,
10548
                       vcpus=self.op.vcpus,
10549
                       hypervisor=self.op.hypervisor,
10550
                       )
10551
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10552
      ial = IAllocator(self.cfg, self.rpc,
10553
                       mode=self.op.mode,
10554
                       name=self.op.name,
10555
                       relocate_from=list(self.relocate_from),
10556
                       )
10557
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10558
      ial = IAllocator(self.cfg, self.rpc,
10559
                       mode=self.op.mode,
10560
                       evac_nodes=self.op.evac_nodes)
10561
    else:
10562
      raise errors.ProgrammerError("Uncatched mode %s in"
10563
                                   " LUTestAllocator.Exec", self.op.mode)
10564

    
10565
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10566
      result = ial.in_text
10567
    else:
10568
      ial.Run(self.op.allocator, validate=False)
10569
      result = ial.out_text
10570
    return result