Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 90224407

History | View | Annotate | Download (369.5 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42

    
43
from ganeti import ssh
44
from ganeti import utils
45
from ganeti import errors
46
from ganeti import hypervisor
47
from ganeti import locking
48
from ganeti import constants
49
from ganeti import objects
50
from ganeti import serializer
51
from ganeti import ssconf
52
from ganeti import uidpool
53
from ganeti import compat
54
from ganeti import masterd
55
from ganeti import netutils
56

    
57
import ganeti.masterd.instance # pylint: disable-msg=W0611
58

    
59

    
60
# Modifiable default values; need to define these here before the
61
# actual LUs
62

    
63
def _EmptyList():
64
  """Returns an empty list.
65

66
  """
67
  return []
68

    
69

    
70
def _EmptyDict():
71
  """Returns an empty dict.
72

73
  """
74
  return {}
75

    
76

    
77
#: The without-default default value
78
_NoDefault = object()
79

    
80

    
81
#: The no-type (value to complex to check it in the type system)
82
_NoType = object()
83

    
84

    
85
# Some basic types
86
def _TNotNone(val):
87
  """Checks if the given value is not None.
88

89
  """
90
  return val is not None
91

    
92

    
93
def _TNone(val):
94
  """Checks if the given value is None.
95

96
  """
97
  return val is None
98

    
99

    
100
def _TBool(val):
101
  """Checks if the given value is a boolean.
102

103
  """
104
  return isinstance(val, bool)
105

    
106

    
107
def _TInt(val):
108
  """Checks if the given value is an integer.
109

110
  """
111
  return isinstance(val, int)
112

    
113

    
114
def _TFloat(val):
115
  """Checks if the given value is a float.
116

117
  """
118
  return isinstance(val, float)
119

    
120

    
121
def _TString(val):
122
  """Checks if the given value is a string.
123

124
  """
125
  return isinstance(val, basestring)
126

    
127

    
128
def _TTrue(val):
129
  """Checks if a given value evaluates to a boolean True value.
130

131
  """
132
  return bool(val)
133

    
134

    
135
def _TElemOf(target_list):
136
  """Builds a function that checks if a given value is a member of a list.
137

138
  """
139
  return lambda val: val in target_list
140

    
141

    
142
# Container types
143
def _TList(val):
144
  """Checks if the given value is a list.
145

146
  """
147
  return isinstance(val, list)
148

    
149

    
150
def _TDict(val):
151
  """Checks if the given value is a dictionary.
152

153
  """
154
  return isinstance(val, dict)
155

    
156

    
157
def _TIsLength(size):
158
  """Check is the given container is of the given size.
159

160
  """
161
  return lambda container: len(container) == size
162

    
163

    
164
# Combinator types
165
def _TAnd(*args):
166
  """Combine multiple functions using an AND operation.
167

168
  """
169
  def fn(val):
170
    return compat.all(t(val) for t in args)
171
  return fn
172

    
173

    
174
def _TOr(*args):
175
  """Combine multiple functions using an AND operation.
176

177
  """
178
  def fn(val):
179
    return compat.any(t(val) for t in args)
180
  return fn
181

    
182

    
183
def _TMap(fn, test):
184
  """Checks that a modified version of the argument passes the given test.
185

186
  """
187
  return lambda val: test(fn(val))
188

    
189

    
190
# Type aliases
191

    
192
#: a non-empty string
193
_TNonEmptyString = _TAnd(_TString, _TTrue)
194

    
195

    
196
#: a maybe non-empty string
197
_TMaybeString = _TOr(_TNonEmptyString, _TNone)
198

    
199

    
200
#: a maybe boolean (bool or none)
201
_TMaybeBool = _TOr(_TBool, _TNone)
202

    
203

    
204
#: a positive integer
205
_TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
206

    
207
#: a strictly positive integer
208
_TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
209

    
210

    
211
def _TListOf(my_type):
212
  """Checks if a given value is a list with all elements of the same type.
213

214
  """
215
  return _TAnd(_TList,
216
               lambda lst: compat.all(my_type(v) for v in lst))
217

    
218

    
219
def _TDictOf(key_type, val_type):
220
  """Checks a dict type for the type of its key/values.
221

222
  """
223
  return _TAnd(_TDict,
224
               lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
225
                                and compat.all(val_type(v)
226
                                               for v in my_dict.values())))
227

    
228

    
229
# Common opcode attributes
230

    
231
#: output fields for a query operation
232
_POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
233

    
234

    
235
#: the shutdown timeout
236
_PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
237
                     _TPositiveInt)
238

    
239
#: the force parameter
240
_PForce = ("force", False, _TBool)
241

    
242
#: a required instance name (for single-instance LUs)
243
_PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
244

    
245

    
246
#: a required node name (for single-node LUs)
247
_PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
248

    
249
#: the migration type (live/non-live)
250
_PMigrationMode = ("mode", None, _TOr(_TNone,
251
                                      _TElemOf(constants.HT_MIGRATION_MODES)))
252

    
253
#: the obsolete 'live' mode (boolean)
254
_PMigrationLive = ("live", None, _TMaybeBool)
255

    
256

    
257
# End types
258
class LogicalUnit(object):
259
  """Logical Unit base class.
260

261
  Subclasses must follow these rules:
262
    - implement ExpandNames
263
    - implement CheckPrereq (except when tasklets are used)
264
    - implement Exec (except when tasklets are used)
265
    - implement BuildHooksEnv
266
    - redefine HPATH and HTYPE
267
    - optionally redefine their run requirements:
268
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
269

270
  Note that all commands require root permissions.
271

272
  @ivar dry_run_result: the value (if any) that will be returned to the caller
273
      in dry-run mode (signalled by opcode dry_run parameter)
274
  @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
275
      they should get if not already defined, and types they must match
276

277
  """
278
  HPATH = None
279
  HTYPE = None
280
  _OP_PARAMS = []
281
  REQ_BGL = True
282

    
283
  def __init__(self, processor, op, context, rpc):
284
    """Constructor for LogicalUnit.
285

286
    This needs to be overridden in derived classes in order to check op
287
    validity.
288

289
    """
290
    self.proc = processor
291
    self.op = op
292
    self.cfg = context.cfg
293
    self.context = context
294
    self.rpc = rpc
295
    # Dicts used to declare locking needs to mcpu
296
    self.needed_locks = None
297
    self.acquired_locks = {}
298
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
299
    self.add_locks = {}
300
    self.remove_locks = {}
301
    # Used to force good behavior when calling helper functions
302
    self.recalculate_locks = {}
303
    self.__ssh = None
304
    # logging
305
    self.Log = processor.Log # pylint: disable-msg=C0103
306
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
307
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
308
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
309
    # support for dry-run
310
    self.dry_run_result = None
311
    # support for generic debug attribute
312
    if (not hasattr(self.op, "debug_level") or
313
        not isinstance(self.op.debug_level, int)):
314
      self.op.debug_level = 0
315

    
316
    # Tasklets
317
    self.tasklets = None
318

    
319
    # The new kind-of-type-system
320
    op_id = self.op.OP_ID
321
    for attr_name, aval, test in self._OP_PARAMS:
322
      if not hasattr(op, attr_name):
323
        if aval == _NoDefault:
324
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
325
                                     (op_id, attr_name), errors.ECODE_INVAL)
326
        else:
327
          if callable(aval):
328
            dval = aval()
329
          else:
330
            dval = aval
331
          setattr(self.op, attr_name, dval)
332
      attr_val = getattr(op, attr_name)
333
      if test == _NoType:
334
        # no tests here
335
        continue
336
      if not callable(test):
337
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
338
                                     " given type is not a proper type (%s)" %
339
                                     (op_id, attr_name, test))
340
      if not test(attr_val):
341
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
342
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
343
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
344
                                   (op_id, attr_name), errors.ECODE_INVAL)
345

    
346
    self.CheckArguments()
347

    
348
  def __GetSSH(self):
349
    """Returns the SshRunner object
350

351
    """
352
    if not self.__ssh:
353
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
354
    return self.__ssh
355

    
356
  ssh = property(fget=__GetSSH)
357

    
358
  def CheckArguments(self):
359
    """Check syntactic validity for the opcode arguments.
360

361
    This method is for doing a simple syntactic check and ensure
362
    validity of opcode parameters, without any cluster-related
363
    checks. While the same can be accomplished in ExpandNames and/or
364
    CheckPrereq, doing these separate is better because:
365

366
      - ExpandNames is left as as purely a lock-related function
367
      - CheckPrereq is run after we have acquired locks (and possible
368
        waited for them)
369

370
    The function is allowed to change the self.op attribute so that
371
    later methods can no longer worry about missing parameters.
372

373
    """
374
    pass
375

    
376
  def ExpandNames(self):
377
    """Expand names for this LU.
378

379
    This method is called before starting to execute the opcode, and it should
380
    update all the parameters of the opcode to their canonical form (e.g. a
381
    short node name must be fully expanded after this method has successfully
382
    completed). This way locking, hooks, logging, ecc. can work correctly.
383

384
    LUs which implement this method must also populate the self.needed_locks
385
    member, as a dict with lock levels as keys, and a list of needed lock names
386
    as values. Rules:
387

388
      - use an empty dict if you don't need any lock
389
      - if you don't need any lock at a particular level omit that level
390
      - don't put anything for the BGL level
391
      - if you want all locks at a level use locking.ALL_SET as a value
392

393
    If you need to share locks (rather than acquire them exclusively) at one
394
    level you can modify self.share_locks, setting a true value (usually 1) for
395
    that level. By default locks are not shared.
396

397
    This function can also define a list of tasklets, which then will be
398
    executed in order instead of the usual LU-level CheckPrereq and Exec
399
    functions, if those are not defined by the LU.
400

401
    Examples::
402

403
      # Acquire all nodes and one instance
404
      self.needed_locks = {
405
        locking.LEVEL_NODE: locking.ALL_SET,
406
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
407
      }
408
      # Acquire just two nodes
409
      self.needed_locks = {
410
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
411
      }
412
      # Acquire no locks
413
      self.needed_locks = {} # No, you can't leave it to the default value None
414

415
    """
416
    # The implementation of this method is mandatory only if the new LU is
417
    # concurrent, so that old LUs don't need to be changed all at the same
418
    # time.
419
    if self.REQ_BGL:
420
      self.needed_locks = {} # Exclusive LUs don't need locks.
421
    else:
422
      raise NotImplementedError
423

    
424
  def DeclareLocks(self, level):
425
    """Declare LU locking needs for a level
426

427
    While most LUs can just declare their locking needs at ExpandNames time,
428
    sometimes there's the need to calculate some locks after having acquired
429
    the ones before. This function is called just before acquiring locks at a
430
    particular level, but after acquiring the ones at lower levels, and permits
431
    such calculations. It can be used to modify self.needed_locks, and by
432
    default it does nothing.
433

434
    This function is only called if you have something already set in
435
    self.needed_locks for the level.
436

437
    @param level: Locking level which is going to be locked
438
    @type level: member of ganeti.locking.LEVELS
439

440
    """
441

    
442
  def CheckPrereq(self):
443
    """Check prerequisites for this LU.
444

445
    This method should check that the prerequisites for the execution
446
    of this LU are fulfilled. It can do internode communication, but
447
    it should be idempotent - no cluster or system changes are
448
    allowed.
449

450
    The method should raise errors.OpPrereqError in case something is
451
    not fulfilled. Its return value is ignored.
452

453
    This method should also update all the parameters of the opcode to
454
    their canonical form if it hasn't been done by ExpandNames before.
455

456
    """
457
    if self.tasklets is not None:
458
      for (idx, tl) in enumerate(self.tasklets):
459
        logging.debug("Checking prerequisites for tasklet %s/%s",
460
                      idx + 1, len(self.tasklets))
461
        tl.CheckPrereq()
462
    else:
463
      pass
464

    
465
  def Exec(self, feedback_fn):
466
    """Execute the LU.
467

468
    This method should implement the actual work. It should raise
469
    errors.OpExecError for failures that are somewhat dealt with in
470
    code, or expected.
471

472
    """
473
    if self.tasklets is not None:
474
      for (idx, tl) in enumerate(self.tasklets):
475
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
476
        tl.Exec(feedback_fn)
477
    else:
478
      raise NotImplementedError
479

    
480
  def BuildHooksEnv(self):
481
    """Build hooks environment for this LU.
482

483
    This method should return a three-node tuple consisting of: a dict
484
    containing the environment that will be used for running the
485
    specific hook for this LU, a list of node names on which the hook
486
    should run before the execution, and a list of node names on which
487
    the hook should run after the execution.
488

489
    The keys of the dict must not have 'GANETI_' prefixed as this will
490
    be handled in the hooks runner. Also note additional keys will be
491
    added by the hooks runner. If the LU doesn't define any
492
    environment, an empty dict (and not None) should be returned.
493

494
    No nodes should be returned as an empty list (and not None).
495

496
    Note that if the HPATH for a LU class is None, this function will
497
    not be called.
498

499
    """
500
    raise NotImplementedError
501

    
502
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
503
    """Notify the LU about the results of its hooks.
504

505
    This method is called every time a hooks phase is executed, and notifies
506
    the Logical Unit about the hooks' result. The LU can then use it to alter
507
    its result based on the hooks.  By default the method does nothing and the
508
    previous result is passed back unchanged but any LU can define it if it
509
    wants to use the local cluster hook-scripts somehow.
510

511
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
512
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
513
    @param hook_results: the results of the multi-node hooks rpc call
514
    @param feedback_fn: function used send feedback back to the caller
515
    @param lu_result: the previous Exec result this LU had, or None
516
        in the PRE phase
517
    @return: the new Exec result, based on the previous result
518
        and hook results
519

520
    """
521
    # API must be kept, thus we ignore the unused argument and could
522
    # be a function warnings
523
    # pylint: disable-msg=W0613,R0201
524
    return lu_result
525

    
526
  def _ExpandAndLockInstance(self):
527
    """Helper function to expand and lock an instance.
528

529
    Many LUs that work on an instance take its name in self.op.instance_name
530
    and need to expand it and then declare the expanded name for locking. This
531
    function does it, and then updates self.op.instance_name to the expanded
532
    name. It also initializes needed_locks as a dict, if this hasn't been done
533
    before.
534

535
    """
536
    if self.needed_locks is None:
537
      self.needed_locks = {}
538
    else:
539
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
540
        "_ExpandAndLockInstance called with instance-level locks set"
541
    self.op.instance_name = _ExpandInstanceName(self.cfg,
542
                                                self.op.instance_name)
543
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
544

    
545
  def _LockInstancesNodes(self, primary_only=False):
546
    """Helper function to declare instances' nodes for locking.
547

548
    This function should be called after locking one or more instances to lock
549
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
550
    with all primary or secondary nodes for instances already locked and
551
    present in self.needed_locks[locking.LEVEL_INSTANCE].
552

553
    It should be called from DeclareLocks, and for safety only works if
554
    self.recalculate_locks[locking.LEVEL_NODE] is set.
555

556
    In the future it may grow parameters to just lock some instance's nodes, or
557
    to just lock primaries or secondary nodes, if needed.
558

559
    If should be called in DeclareLocks in a way similar to::
560

561
      if level == locking.LEVEL_NODE:
562
        self._LockInstancesNodes()
563

564
    @type primary_only: boolean
565
    @param primary_only: only lock primary nodes of locked instances
566

567
    """
568
    assert locking.LEVEL_NODE in self.recalculate_locks, \
569
      "_LockInstancesNodes helper function called with no nodes to recalculate"
570

    
571
    # TODO: check if we're really been called with the instance locks held
572

    
573
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
574
    # future we might want to have different behaviors depending on the value
575
    # of self.recalculate_locks[locking.LEVEL_NODE]
576
    wanted_nodes = []
577
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
578
      instance = self.context.cfg.GetInstanceInfo(instance_name)
579
      wanted_nodes.append(instance.primary_node)
580
      if not primary_only:
581
        wanted_nodes.extend(instance.secondary_nodes)
582

    
583
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
584
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
585
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
586
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
587

    
588
    del self.recalculate_locks[locking.LEVEL_NODE]
589

    
590

    
591
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
592
  """Simple LU which runs no hooks.
593

594
  This LU is intended as a parent for other LogicalUnits which will
595
  run no hooks, in order to reduce duplicate code.
596

597
  """
598
  HPATH = None
599
  HTYPE = None
600

    
601
  def BuildHooksEnv(self):
602
    """Empty BuildHooksEnv for NoHooksLu.
603

604
    This just raises an error.
605

606
    """
607
    assert False, "BuildHooksEnv called for NoHooksLUs"
608

    
609

    
610
class Tasklet:
611
  """Tasklet base class.
612

613
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
614
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
615
  tasklets know nothing about locks.
616

617
  Subclasses must follow these rules:
618
    - Implement CheckPrereq
619
    - Implement Exec
620

621
  """
622
  def __init__(self, lu):
623
    self.lu = lu
624

    
625
    # Shortcuts
626
    self.cfg = lu.cfg
627
    self.rpc = lu.rpc
628

    
629
  def CheckPrereq(self):
630
    """Check prerequisites for this tasklets.
631

632
    This method should check whether the prerequisites for the execution of
633
    this tasklet are fulfilled. It can do internode communication, but it
634
    should be idempotent - no cluster or system changes are allowed.
635

636
    The method should raise errors.OpPrereqError in case something is not
637
    fulfilled. Its return value is ignored.
638

639
    This method should also update all parameters to their canonical form if it
640
    hasn't been done before.
641

642
    """
643
    pass
644

    
645
  def Exec(self, feedback_fn):
646
    """Execute the tasklet.
647

648
    This method should implement the actual work. It should raise
649
    errors.OpExecError for failures that are somewhat dealt with in code, or
650
    expected.
651

652
    """
653
    raise NotImplementedError
654

    
655

    
656
def _GetWantedNodes(lu, nodes):
657
  """Returns list of checked and expanded node names.
658

659
  @type lu: L{LogicalUnit}
660
  @param lu: the logical unit on whose behalf we execute
661
  @type nodes: list
662
  @param nodes: list of node names or None for all nodes
663
  @rtype: list
664
  @return: the list of nodes, sorted
665
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
666

667
  """
668
  if not nodes:
669
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
670
      " non-empty list of nodes whose name is to be expanded.")
671

    
672
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
673
  return utils.NiceSort(wanted)
674

    
675

    
676
def _GetWantedInstances(lu, instances):
677
  """Returns list of checked and expanded instance names.
678

679
  @type lu: L{LogicalUnit}
680
  @param lu: the logical unit on whose behalf we execute
681
  @type instances: list
682
  @param instances: list of instance names or None for all instances
683
  @rtype: list
684
  @return: the list of instances, sorted
685
  @raise errors.OpPrereqError: if the instances parameter is wrong type
686
  @raise errors.OpPrereqError: if any of the passed instances is not found
687

688
  """
689
  if instances:
690
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
691
  else:
692
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
693
  return wanted
694

    
695

    
696
def _GetUpdatedParams(old_params, update_dict,
697
                      use_default=True, use_none=False):
698
  """Return the new version of a parameter dictionary.
699

700
  @type old_params: dict
701
  @param old_params: old parameters
702
  @type update_dict: dict
703
  @param update_dict: dict containing new parameter values, or
704
      constants.VALUE_DEFAULT to reset the parameter to its default
705
      value
706
  @param use_default: boolean
707
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
708
      values as 'to be deleted' values
709
  @param use_none: boolean
710
  @type use_none: whether to recognise C{None} values as 'to be
711
      deleted' values
712
  @rtype: dict
713
  @return: the new parameter dictionary
714

715
  """
716
  params_copy = copy.deepcopy(old_params)
717
  for key, val in update_dict.iteritems():
718
    if ((use_default and val == constants.VALUE_DEFAULT) or
719
        (use_none and val is None)):
720
      try:
721
        del params_copy[key]
722
      except KeyError:
723
        pass
724
    else:
725
      params_copy[key] = val
726
  return params_copy
727

    
728

    
729
def _CheckOutputFields(static, dynamic, selected):
730
  """Checks whether all selected fields are valid.
731

732
  @type static: L{utils.FieldSet}
733
  @param static: static fields set
734
  @type dynamic: L{utils.FieldSet}
735
  @param dynamic: dynamic fields set
736

737
  """
738
  f = utils.FieldSet()
739
  f.Extend(static)
740
  f.Extend(dynamic)
741

    
742
  delta = f.NonMatching(selected)
743
  if delta:
744
    raise errors.OpPrereqError("Unknown output fields selected: %s"
745
                               % ",".join(delta), errors.ECODE_INVAL)
746

    
747

    
748
def _CheckGlobalHvParams(params):
749
  """Validates that given hypervisor params are not global ones.
750

751
  This will ensure that instances don't get customised versions of
752
  global params.
753

754
  """
755
  used_globals = constants.HVC_GLOBALS.intersection(params)
756
  if used_globals:
757
    msg = ("The following hypervisor parameters are global and cannot"
758
           " be customized at instance level, please modify them at"
759
           " cluster level: %s" % utils.CommaJoin(used_globals))
760
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
761

    
762

    
763
def _CheckNodeOnline(lu, node):
764
  """Ensure that a given node is online.
765

766
  @param lu: the LU on behalf of which we make the check
767
  @param node: the node to check
768
  @raise errors.OpPrereqError: if the node is offline
769

770
  """
771
  if lu.cfg.GetNodeInfo(node).offline:
772
    raise errors.OpPrereqError("Can't use offline node %s" % node,
773
                               errors.ECODE_INVAL)
774

    
775

    
776
def _CheckNodeNotDrained(lu, node):
777
  """Ensure that a given node is not drained.
778

779
  @param lu: the LU on behalf of which we make the check
780
  @param node: the node to check
781
  @raise errors.OpPrereqError: if the node is drained
782

783
  """
784
  if lu.cfg.GetNodeInfo(node).drained:
785
    raise errors.OpPrereqError("Can't use drained node %s" % node,
786
                               errors.ECODE_INVAL)
787

    
788

    
789
def _CheckNodeHasOS(lu, node, os_name, force_variant):
790
  """Ensure that a node supports a given OS.
791

792
  @param lu: the LU on behalf of which we make the check
793
  @param node: the node to check
794
  @param os_name: the OS to query about
795
  @param force_variant: whether to ignore variant errors
796
  @raise errors.OpPrereqError: if the node is not supporting the OS
797

798
  """
799
  result = lu.rpc.call_os_get(node, os_name)
800
  result.Raise("OS '%s' not in supported OS list for node %s" %
801
               (os_name, node),
802
               prereq=True, ecode=errors.ECODE_INVAL)
803
  if not force_variant:
804
    _CheckOSVariant(result.payload, os_name)
805

    
806

    
807
def _RequireFileStorage():
808
  """Checks that file storage is enabled.
809

810
  @raise errors.OpPrereqError: when file storage is disabled
811

812
  """
813
  if not constants.ENABLE_FILE_STORAGE:
814
    raise errors.OpPrereqError("File storage disabled at configure time",
815
                               errors.ECODE_INVAL)
816

    
817

    
818
def _CheckDiskTemplate(template):
819
  """Ensure a given disk template is valid.
820

821
  """
822
  if template not in constants.DISK_TEMPLATES:
823
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
824
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
825
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
826
  if template == constants.DT_FILE:
827
    _RequireFileStorage()
828
  return True
829

    
830

    
831
def _CheckStorageType(storage_type):
832
  """Ensure a given storage type is valid.
833

834
  """
835
  if storage_type not in constants.VALID_STORAGE_TYPES:
836
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
837
                               errors.ECODE_INVAL)
838
  if storage_type == constants.ST_FILE:
839
    _RequireFileStorage()
840
  return True
841

    
842

    
843
def _GetClusterDomainSecret():
844
  """Reads the cluster domain secret.
845

846
  """
847
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
848
                               strict=True)
849

    
850

    
851
def _CheckInstanceDown(lu, instance, reason):
852
  """Ensure that an instance is not running."""
853
  if instance.admin_up:
854
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
855
                               (instance.name, reason), errors.ECODE_STATE)
856

    
857
  pnode = instance.primary_node
858
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
859
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
860
              prereq=True, ecode=errors.ECODE_ENVIRON)
861

    
862
  if instance.name in ins_l.payload:
863
    raise errors.OpPrereqError("Instance %s is running, %s" %
864
                               (instance.name, reason), errors.ECODE_STATE)
865

    
866

    
867
def _ExpandItemName(fn, name, kind):
868
  """Expand an item name.
869

870
  @param fn: the function to use for expansion
871
  @param name: requested item name
872
  @param kind: text description ('Node' or 'Instance')
873
  @return: the resolved (full) name
874
  @raise errors.OpPrereqError: if the item is not found
875

876
  """
877
  full_name = fn(name)
878
  if full_name is None:
879
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
880
                               errors.ECODE_NOENT)
881
  return full_name
882

    
883

    
884
def _ExpandNodeName(cfg, name):
885
  """Wrapper over L{_ExpandItemName} for nodes."""
886
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
887

    
888

    
889
def _ExpandInstanceName(cfg, name):
890
  """Wrapper over L{_ExpandItemName} for instance."""
891
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
892

    
893

    
894
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
895
                          memory, vcpus, nics, disk_template, disks,
896
                          bep, hvp, hypervisor_name):
897
  """Builds instance related env variables for hooks
898

899
  This builds the hook environment from individual variables.
900

901
  @type name: string
902
  @param name: the name of the instance
903
  @type primary_node: string
904
  @param primary_node: the name of the instance's primary node
905
  @type secondary_nodes: list
906
  @param secondary_nodes: list of secondary nodes as strings
907
  @type os_type: string
908
  @param os_type: the name of the instance's OS
909
  @type status: boolean
910
  @param status: the should_run status of the instance
911
  @type memory: string
912
  @param memory: the memory size of the instance
913
  @type vcpus: string
914
  @param vcpus: the count of VCPUs the instance has
915
  @type nics: list
916
  @param nics: list of tuples (ip, mac, mode, link) representing
917
      the NICs the instance has
918
  @type disk_template: string
919
  @param disk_template: the disk template of the instance
920
  @type disks: list
921
  @param disks: the list of (size, mode) pairs
922
  @type bep: dict
923
  @param bep: the backend parameters for the instance
924
  @type hvp: dict
925
  @param hvp: the hypervisor parameters for the instance
926
  @type hypervisor_name: string
927
  @param hypervisor_name: the hypervisor for the instance
928
  @rtype: dict
929
  @return: the hook environment for this instance
930

931
  """
932
  if status:
933
    str_status = "up"
934
  else:
935
    str_status = "down"
936
  env = {
937
    "OP_TARGET": name,
938
    "INSTANCE_NAME": name,
939
    "INSTANCE_PRIMARY": primary_node,
940
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
941
    "INSTANCE_OS_TYPE": os_type,
942
    "INSTANCE_STATUS": str_status,
943
    "INSTANCE_MEMORY": memory,
944
    "INSTANCE_VCPUS": vcpus,
945
    "INSTANCE_DISK_TEMPLATE": disk_template,
946
    "INSTANCE_HYPERVISOR": hypervisor_name,
947
  }
948

    
949
  if nics:
950
    nic_count = len(nics)
951
    for idx, (ip, mac, mode, link) in enumerate(nics):
952
      if ip is None:
953
        ip = ""
954
      env["INSTANCE_NIC%d_IP" % idx] = ip
955
      env["INSTANCE_NIC%d_MAC" % idx] = mac
956
      env["INSTANCE_NIC%d_MODE" % idx] = mode
957
      env["INSTANCE_NIC%d_LINK" % idx] = link
958
      if mode == constants.NIC_MODE_BRIDGED:
959
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
960
  else:
961
    nic_count = 0
962

    
963
  env["INSTANCE_NIC_COUNT"] = nic_count
964

    
965
  if disks:
966
    disk_count = len(disks)
967
    for idx, (size, mode) in enumerate(disks):
968
      env["INSTANCE_DISK%d_SIZE" % idx] = size
969
      env["INSTANCE_DISK%d_MODE" % idx] = mode
970
  else:
971
    disk_count = 0
972

    
973
  env["INSTANCE_DISK_COUNT"] = disk_count
974

    
975
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
976
    for key, value in source.items():
977
      env["INSTANCE_%s_%s" % (kind, key)] = value
978

    
979
  return env
980

    
981

    
982
def _NICListToTuple(lu, nics):
983
  """Build a list of nic information tuples.
984

985
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
986
  value in LUQueryInstanceData.
987

988
  @type lu:  L{LogicalUnit}
989
  @param lu: the logical unit on whose behalf we execute
990
  @type nics: list of L{objects.NIC}
991
  @param nics: list of nics to convert to hooks tuples
992

993
  """
994
  hooks_nics = []
995
  cluster = lu.cfg.GetClusterInfo()
996
  for nic in nics:
997
    ip = nic.ip
998
    mac = nic.mac
999
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1000
    mode = filled_params[constants.NIC_MODE]
1001
    link = filled_params[constants.NIC_LINK]
1002
    hooks_nics.append((ip, mac, mode, link))
1003
  return hooks_nics
1004

    
1005

    
1006
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1007
  """Builds instance related env variables for hooks from an object.
1008

1009
  @type lu: L{LogicalUnit}
1010
  @param lu: the logical unit on whose behalf we execute
1011
  @type instance: L{objects.Instance}
1012
  @param instance: the instance for which we should build the
1013
      environment
1014
  @type override: dict
1015
  @param override: dictionary with key/values that will override
1016
      our values
1017
  @rtype: dict
1018
  @return: the hook environment dictionary
1019

1020
  """
1021
  cluster = lu.cfg.GetClusterInfo()
1022
  bep = cluster.FillBE(instance)
1023
  hvp = cluster.FillHV(instance)
1024
  args = {
1025
    'name': instance.name,
1026
    'primary_node': instance.primary_node,
1027
    'secondary_nodes': instance.secondary_nodes,
1028
    'os_type': instance.os,
1029
    'status': instance.admin_up,
1030
    'memory': bep[constants.BE_MEMORY],
1031
    'vcpus': bep[constants.BE_VCPUS],
1032
    'nics': _NICListToTuple(lu, instance.nics),
1033
    'disk_template': instance.disk_template,
1034
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1035
    'bep': bep,
1036
    'hvp': hvp,
1037
    'hypervisor_name': instance.hypervisor,
1038
  }
1039
  if override:
1040
    args.update(override)
1041
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1042

    
1043

    
1044
def _AdjustCandidatePool(lu, exceptions):
1045
  """Adjust the candidate pool after node operations.
1046

1047
  """
1048
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1049
  if mod_list:
1050
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1051
               utils.CommaJoin(node.name for node in mod_list))
1052
    for name in mod_list:
1053
      lu.context.ReaddNode(name)
1054
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1055
  if mc_now > mc_max:
1056
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1057
               (mc_now, mc_max))
1058

    
1059

    
1060
def _DecideSelfPromotion(lu, exceptions=None):
1061
  """Decide whether I should promote myself as a master candidate.
1062

1063
  """
1064
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1065
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1066
  # the new node will increase mc_max with one, so:
1067
  mc_should = min(mc_should + 1, cp_size)
1068
  return mc_now < mc_should
1069

    
1070

    
1071
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1072
  """Check that the brigdes needed by a list of nics exist.
1073

1074
  """
1075
  cluster = lu.cfg.GetClusterInfo()
1076
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1077
  brlist = [params[constants.NIC_LINK] for params in paramslist
1078
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1079
  if brlist:
1080
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1081
    result.Raise("Error checking bridges on destination node '%s'" %
1082
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1083

    
1084

    
1085
def _CheckInstanceBridgesExist(lu, instance, node=None):
1086
  """Check that the brigdes needed by an instance exist.
1087

1088
  """
1089
  if node is None:
1090
    node = instance.primary_node
1091
  _CheckNicsBridgesExist(lu, instance.nics, node)
1092

    
1093

    
1094
def _CheckOSVariant(os_obj, name):
1095
  """Check whether an OS name conforms to the os variants specification.
1096

1097
  @type os_obj: L{objects.OS}
1098
  @param os_obj: OS object to check
1099
  @type name: string
1100
  @param name: OS name passed by the user, to check for validity
1101

1102
  """
1103
  if not os_obj.supported_variants:
1104
    return
1105
  variant = objects.OS.GetVariant(name)
1106
  if not variant:
1107
    raise errors.OpPrereqError("OS name must include a variant",
1108
                               errors.ECODE_INVAL)
1109

    
1110
  if variant not in os_obj.supported_variants:
1111
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1112

    
1113

    
1114
def _GetNodeInstancesInner(cfg, fn):
1115
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1116

    
1117

    
1118
def _GetNodeInstances(cfg, node_name):
1119
  """Returns a list of all primary and secondary instances on a node.
1120

1121
  """
1122

    
1123
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1124

    
1125

    
1126
def _GetNodePrimaryInstances(cfg, node_name):
1127
  """Returns primary instances on a node.
1128

1129
  """
1130
  return _GetNodeInstancesInner(cfg,
1131
                                lambda inst: node_name == inst.primary_node)
1132

    
1133

    
1134
def _GetNodeSecondaryInstances(cfg, node_name):
1135
  """Returns secondary instances on a node.
1136

1137
  """
1138
  return _GetNodeInstancesInner(cfg,
1139
                                lambda inst: node_name in inst.secondary_nodes)
1140

    
1141

    
1142
def _GetStorageTypeArgs(cfg, storage_type):
1143
  """Returns the arguments for a storage type.
1144

1145
  """
1146
  # Special case for file storage
1147
  if storage_type == constants.ST_FILE:
1148
    # storage.FileStorage wants a list of storage directories
1149
    return [[cfg.GetFileStorageDir()]]
1150

    
1151
  return []
1152

    
1153

    
1154
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1155
  faulty = []
1156

    
1157
  for dev in instance.disks:
1158
    cfg.SetDiskID(dev, node_name)
1159

    
1160
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1161
  result.Raise("Failed to get disk status from node %s" % node_name,
1162
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1163

    
1164
  for idx, bdev_status in enumerate(result.payload):
1165
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1166
      faulty.append(idx)
1167

    
1168
  return faulty
1169

    
1170

    
1171
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1172
  """Check the sanity of iallocator and node arguments and use the
1173
  cluster-wide iallocator if appropriate.
1174

1175
  Check that at most one of (iallocator, node) is specified. If none is
1176
  specified, then the LU's opcode's iallocator slot is filled with the
1177
  cluster-wide default iallocator.
1178

1179
  @type iallocator_slot: string
1180
  @param iallocator_slot: the name of the opcode iallocator slot
1181
  @type node_slot: string
1182
  @param node_slot: the name of the opcode target node slot
1183

1184
  """
1185
  node = getattr(lu.op, node_slot, None)
1186
  iallocator = getattr(lu.op, iallocator_slot, None)
1187

    
1188
  if node is not None and iallocator is not None:
1189
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1190
                               errors.ECODE_INVAL)
1191
  elif node is None and iallocator is None:
1192
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1193
    if default_iallocator:
1194
      setattr(lu.op, iallocator_slot, default_iallocator)
1195
    else:
1196
      raise errors.OpPrereqError("No iallocator or node given and no"
1197
                                 " cluster-wide default iallocator found."
1198
                                 " Please specify either an iallocator or a"
1199
                                 " node, or set a cluster-wide default"
1200
                                 " iallocator.")
1201

    
1202

    
1203
class LUPostInitCluster(LogicalUnit):
1204
  """Logical unit for running hooks after cluster initialization.
1205

1206
  """
1207
  HPATH = "cluster-init"
1208
  HTYPE = constants.HTYPE_CLUSTER
1209

    
1210
  def BuildHooksEnv(self):
1211
    """Build hooks env.
1212

1213
    """
1214
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1215
    mn = self.cfg.GetMasterNode()
1216
    return env, [], [mn]
1217

    
1218
  def Exec(self, feedback_fn):
1219
    """Nothing to do.
1220

1221
    """
1222
    return True
1223

    
1224

    
1225
class LUDestroyCluster(LogicalUnit):
1226
  """Logical unit for destroying the cluster.
1227

1228
  """
1229
  HPATH = "cluster-destroy"
1230
  HTYPE = constants.HTYPE_CLUSTER
1231

    
1232
  def BuildHooksEnv(self):
1233
    """Build hooks env.
1234

1235
    """
1236
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1237
    return env, [], []
1238

    
1239
  def CheckPrereq(self):
1240
    """Check prerequisites.
1241

1242
    This checks whether the cluster is empty.
1243

1244
    Any errors are signaled by raising errors.OpPrereqError.
1245

1246
    """
1247
    master = self.cfg.GetMasterNode()
1248

    
1249
    nodelist = self.cfg.GetNodeList()
1250
    if len(nodelist) != 1 or nodelist[0] != master:
1251
      raise errors.OpPrereqError("There are still %d node(s) in"
1252
                                 " this cluster." % (len(nodelist) - 1),
1253
                                 errors.ECODE_INVAL)
1254
    instancelist = self.cfg.GetInstanceList()
1255
    if instancelist:
1256
      raise errors.OpPrereqError("There are still %d instance(s) in"
1257
                                 " this cluster." % len(instancelist),
1258
                                 errors.ECODE_INVAL)
1259

    
1260
  def Exec(self, feedback_fn):
1261
    """Destroys the cluster.
1262

1263
    """
1264
    master = self.cfg.GetMasterNode()
1265

    
1266
    # Run post hooks on master node before it's removed
1267
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1268
    try:
1269
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1270
    except:
1271
      # pylint: disable-msg=W0702
1272
      self.LogWarning("Errors occurred running hooks on %s" % master)
1273

    
1274
    result = self.rpc.call_node_stop_master(master, False)
1275
    result.Raise("Could not disable the master role")
1276

    
1277
    return master
1278

    
1279

    
1280
def _VerifyCertificate(filename):
1281
  """Verifies a certificate for LUVerifyCluster.
1282

1283
  @type filename: string
1284
  @param filename: Path to PEM file
1285

1286
  """
1287
  try:
1288
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1289
                                           utils.ReadFile(filename))
1290
  except Exception, err: # pylint: disable-msg=W0703
1291
    return (LUVerifyCluster.ETYPE_ERROR,
1292
            "Failed to load X509 certificate %s: %s" % (filename, err))
1293

    
1294
  (errcode, msg) = \
1295
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1296
                                constants.SSL_CERT_EXPIRATION_ERROR)
1297

    
1298
  if msg:
1299
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1300
  else:
1301
    fnamemsg = None
1302

    
1303
  if errcode is None:
1304
    return (None, fnamemsg)
1305
  elif errcode == utils.CERT_WARNING:
1306
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1307
  elif errcode == utils.CERT_ERROR:
1308
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1309

    
1310
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1311

    
1312

    
1313
class LUVerifyCluster(LogicalUnit):
1314
  """Verifies the cluster status.
1315

1316
  """
1317
  HPATH = "cluster-verify"
1318
  HTYPE = constants.HTYPE_CLUSTER
1319
  _OP_PARAMS = [
1320
    ("skip_checks", _EmptyList,
1321
     _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1322
    ("verbose", False, _TBool),
1323
    ("error_codes", False, _TBool),
1324
    ("debug_simulate_errors", False, _TBool),
1325
    ]
1326
  REQ_BGL = False
1327

    
1328
  TCLUSTER = "cluster"
1329
  TNODE = "node"
1330
  TINSTANCE = "instance"
1331

    
1332
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1333
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1334
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1335
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1336
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1337
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1338
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1339
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1340
  ENODEDRBD = (TNODE, "ENODEDRBD")
1341
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1342
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1343
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1344
  ENODEHV = (TNODE, "ENODEHV")
1345
  ENODELVM = (TNODE, "ENODELVM")
1346
  ENODEN1 = (TNODE, "ENODEN1")
1347
  ENODENET = (TNODE, "ENODENET")
1348
  ENODEOS = (TNODE, "ENODEOS")
1349
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1350
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1351
  ENODERPC = (TNODE, "ENODERPC")
1352
  ENODESSH = (TNODE, "ENODESSH")
1353
  ENODEVERSION = (TNODE, "ENODEVERSION")
1354
  ENODESETUP = (TNODE, "ENODESETUP")
1355
  ENODETIME = (TNODE, "ENODETIME")
1356

    
1357
  ETYPE_FIELD = "code"
1358
  ETYPE_ERROR = "ERROR"
1359
  ETYPE_WARNING = "WARNING"
1360

    
1361
  class NodeImage(object):
1362
    """A class representing the logical and physical status of a node.
1363

1364
    @type name: string
1365
    @ivar name: the node name to which this object refers
1366
    @ivar volumes: a structure as returned from
1367
        L{ganeti.backend.GetVolumeList} (runtime)
1368
    @ivar instances: a list of running instances (runtime)
1369
    @ivar pinst: list of configured primary instances (config)
1370
    @ivar sinst: list of configured secondary instances (config)
1371
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1372
        of this node (config)
1373
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1374
    @ivar dfree: free disk, as reported by the node (runtime)
1375
    @ivar offline: the offline status (config)
1376
    @type rpc_fail: boolean
1377
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1378
        not whether the individual keys were correct) (runtime)
1379
    @type lvm_fail: boolean
1380
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1381
    @type hyp_fail: boolean
1382
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1383
    @type ghost: boolean
1384
    @ivar ghost: whether this is a known node or not (config)
1385
    @type os_fail: boolean
1386
    @ivar os_fail: whether the RPC call didn't return valid OS data
1387
    @type oslist: list
1388
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1389

1390
    """
1391
    def __init__(self, offline=False, name=None):
1392
      self.name = name
1393
      self.volumes = {}
1394
      self.instances = []
1395
      self.pinst = []
1396
      self.sinst = []
1397
      self.sbp = {}
1398
      self.mfree = 0
1399
      self.dfree = 0
1400
      self.offline = offline
1401
      self.rpc_fail = False
1402
      self.lvm_fail = False
1403
      self.hyp_fail = False
1404
      self.ghost = False
1405
      self.os_fail = False
1406
      self.oslist = {}
1407

    
1408
  def ExpandNames(self):
1409
    self.needed_locks = {
1410
      locking.LEVEL_NODE: locking.ALL_SET,
1411
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1412
    }
1413
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1414

    
1415
  def _Error(self, ecode, item, msg, *args, **kwargs):
1416
    """Format an error message.
1417

1418
    Based on the opcode's error_codes parameter, either format a
1419
    parseable error code, or a simpler error string.
1420

1421
    This must be called only from Exec and functions called from Exec.
1422

1423
    """
1424
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1425
    itype, etxt = ecode
1426
    # first complete the msg
1427
    if args:
1428
      msg = msg % args
1429
    # then format the whole message
1430
    if self.op.error_codes:
1431
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1432
    else:
1433
      if item:
1434
        item = " " + item
1435
      else:
1436
        item = ""
1437
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1438
    # and finally report it via the feedback_fn
1439
    self._feedback_fn("  - %s" % msg)
1440

    
1441
  def _ErrorIf(self, cond, *args, **kwargs):
1442
    """Log an error message if the passed condition is True.
1443

1444
    """
1445
    cond = bool(cond) or self.op.debug_simulate_errors
1446
    if cond:
1447
      self._Error(*args, **kwargs)
1448
    # do not mark the operation as failed for WARN cases only
1449
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1450
      self.bad = self.bad or cond
1451

    
1452
  def _VerifyNode(self, ninfo, nresult):
1453
    """Perform some basic validation on data returned from a node.
1454

1455
      - check the result data structure is well formed and has all the
1456
        mandatory fields
1457
      - check ganeti version
1458

1459
    @type ninfo: L{objects.Node}
1460
    @param ninfo: the node to check
1461
    @param nresult: the results from the node
1462
    @rtype: boolean
1463
    @return: whether overall this call was successful (and we can expect
1464
         reasonable values in the respose)
1465

1466
    """
1467
    node = ninfo.name
1468
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1469

    
1470
    # main result, nresult should be a non-empty dict
1471
    test = not nresult or not isinstance(nresult, dict)
1472
    _ErrorIf(test, self.ENODERPC, node,
1473
                  "unable to verify node: no data returned")
1474
    if test:
1475
      return False
1476

    
1477
    # compares ganeti version
1478
    local_version = constants.PROTOCOL_VERSION
1479
    remote_version = nresult.get("version", None)
1480
    test = not (remote_version and
1481
                isinstance(remote_version, (list, tuple)) and
1482
                len(remote_version) == 2)
1483
    _ErrorIf(test, self.ENODERPC, node,
1484
             "connection to node returned invalid data")
1485
    if test:
1486
      return False
1487

    
1488
    test = local_version != remote_version[0]
1489
    _ErrorIf(test, self.ENODEVERSION, node,
1490
             "incompatible protocol versions: master %s,"
1491
             " node %s", local_version, remote_version[0])
1492
    if test:
1493
      return False
1494

    
1495
    # node seems compatible, we can actually try to look into its results
1496

    
1497
    # full package version
1498
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1499
                  self.ENODEVERSION, node,
1500
                  "software version mismatch: master %s, node %s",
1501
                  constants.RELEASE_VERSION, remote_version[1],
1502
                  code=self.ETYPE_WARNING)
1503

    
1504
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1505
    if isinstance(hyp_result, dict):
1506
      for hv_name, hv_result in hyp_result.iteritems():
1507
        test = hv_result is not None
1508
        _ErrorIf(test, self.ENODEHV, node,
1509
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1510

    
1511

    
1512
    test = nresult.get(constants.NV_NODESETUP,
1513
                           ["Missing NODESETUP results"])
1514
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1515
             "; ".join(test))
1516

    
1517
    return True
1518

    
1519
  def _VerifyNodeTime(self, ninfo, nresult,
1520
                      nvinfo_starttime, nvinfo_endtime):
1521
    """Check the node time.
1522

1523
    @type ninfo: L{objects.Node}
1524
    @param ninfo: the node to check
1525
    @param nresult: the remote results for the node
1526
    @param nvinfo_starttime: the start time of the RPC call
1527
    @param nvinfo_endtime: the end time of the RPC call
1528

1529
    """
1530
    node = ninfo.name
1531
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1532

    
1533
    ntime = nresult.get(constants.NV_TIME, None)
1534
    try:
1535
      ntime_merged = utils.MergeTime(ntime)
1536
    except (ValueError, TypeError):
1537
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1538
      return
1539

    
1540
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1541
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1542
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1543
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1544
    else:
1545
      ntime_diff = None
1546

    
1547
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1548
             "Node time diverges by at least %s from master node time",
1549
             ntime_diff)
1550

    
1551
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1552
    """Check the node time.
1553

1554
    @type ninfo: L{objects.Node}
1555
    @param ninfo: the node to check
1556
    @param nresult: the remote results for the node
1557
    @param vg_name: the configured VG name
1558

1559
    """
1560
    if vg_name is None:
1561
      return
1562

    
1563
    node = ninfo.name
1564
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1565

    
1566
    # checks vg existence and size > 20G
1567
    vglist = nresult.get(constants.NV_VGLIST, None)
1568
    test = not vglist
1569
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1570
    if not test:
1571
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1572
                                            constants.MIN_VG_SIZE)
1573
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1574

    
1575
    # check pv names
1576
    pvlist = nresult.get(constants.NV_PVLIST, None)
1577
    test = pvlist is None
1578
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1579
    if not test:
1580
      # check that ':' is not present in PV names, since it's a
1581
      # special character for lvcreate (denotes the range of PEs to
1582
      # use on the PV)
1583
      for _, pvname, owner_vg in pvlist:
1584
        test = ":" in pvname
1585
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1586
                 " '%s' of VG '%s'", pvname, owner_vg)
1587

    
1588
  def _VerifyNodeNetwork(self, ninfo, nresult):
1589
    """Check the node time.
1590

1591
    @type ninfo: L{objects.Node}
1592
    @param ninfo: the node to check
1593
    @param nresult: the remote results for the node
1594

1595
    """
1596
    node = ninfo.name
1597
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1598

    
1599
    test = constants.NV_NODELIST not in nresult
1600
    _ErrorIf(test, self.ENODESSH, node,
1601
             "node hasn't returned node ssh connectivity data")
1602
    if not test:
1603
      if nresult[constants.NV_NODELIST]:
1604
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1605
          _ErrorIf(True, self.ENODESSH, node,
1606
                   "ssh communication with node '%s': %s", a_node, a_msg)
1607

    
1608
    test = constants.NV_NODENETTEST not in nresult
1609
    _ErrorIf(test, self.ENODENET, node,
1610
             "node hasn't returned node tcp connectivity data")
1611
    if not test:
1612
      if nresult[constants.NV_NODENETTEST]:
1613
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1614
        for anode in nlist:
1615
          _ErrorIf(True, self.ENODENET, node,
1616
                   "tcp communication with node '%s': %s",
1617
                   anode, nresult[constants.NV_NODENETTEST][anode])
1618

    
1619
    test = constants.NV_MASTERIP not in nresult
1620
    _ErrorIf(test, self.ENODENET, node,
1621
             "node hasn't returned node master IP reachability data")
1622
    if not test:
1623
      if not nresult[constants.NV_MASTERIP]:
1624
        if node == self.master_node:
1625
          msg = "the master node cannot reach the master IP (not configured?)"
1626
        else:
1627
          msg = "cannot reach the master IP"
1628
        _ErrorIf(True, self.ENODENET, node, msg)
1629

    
1630

    
1631
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1632
    """Verify an instance.
1633

1634
    This function checks to see if the required block devices are
1635
    available on the instance's node.
1636

1637
    """
1638
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1639
    node_current = instanceconfig.primary_node
1640

    
1641
    node_vol_should = {}
1642
    instanceconfig.MapLVsByNode(node_vol_should)
1643

    
1644
    for node in node_vol_should:
1645
      n_img = node_image[node]
1646
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1647
        # ignore missing volumes on offline or broken nodes
1648
        continue
1649
      for volume in node_vol_should[node]:
1650
        test = volume not in n_img.volumes
1651
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1652
                 "volume %s missing on node %s", volume, node)
1653

    
1654
    if instanceconfig.admin_up:
1655
      pri_img = node_image[node_current]
1656
      test = instance not in pri_img.instances and not pri_img.offline
1657
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1658
               "instance not running on its primary node %s",
1659
               node_current)
1660

    
1661
    for node, n_img in node_image.items():
1662
      if (not node == node_current):
1663
        test = instance in n_img.instances
1664
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1665
                 "instance should not run on node %s", node)
1666

    
1667
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1668
    """Verify if there are any unknown volumes in the cluster.
1669

1670
    The .os, .swap and backup volumes are ignored. All other volumes are
1671
    reported as unknown.
1672

1673
    @type reserved: L{ganeti.utils.FieldSet}
1674
    @param reserved: a FieldSet of reserved volume names
1675

1676
    """
1677
    for node, n_img in node_image.items():
1678
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1679
        # skip non-healthy nodes
1680
        continue
1681
      for volume in n_img.volumes:
1682
        test = ((node not in node_vol_should or
1683
                volume not in node_vol_should[node]) and
1684
                not reserved.Matches(volume))
1685
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1686
                      "volume %s is unknown", volume)
1687

    
1688
  def _VerifyOrphanInstances(self, instancelist, node_image):
1689
    """Verify the list of running instances.
1690

1691
    This checks what instances are running but unknown to the cluster.
1692

1693
    """
1694
    for node, n_img in node_image.items():
1695
      for o_inst in n_img.instances:
1696
        test = o_inst not in instancelist
1697
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1698
                      "instance %s on node %s should not exist", o_inst, node)
1699

    
1700
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1701
    """Verify N+1 Memory Resilience.
1702

1703
    Check that if one single node dies we can still start all the
1704
    instances it was primary for.
1705

1706
    """
1707
    for node, n_img in node_image.items():
1708
      # This code checks that every node which is now listed as
1709
      # secondary has enough memory to host all instances it is
1710
      # supposed to should a single other node in the cluster fail.
1711
      # FIXME: not ready for failover to an arbitrary node
1712
      # FIXME: does not support file-backed instances
1713
      # WARNING: we currently take into account down instances as well
1714
      # as up ones, considering that even if they're down someone
1715
      # might want to start them even in the event of a node failure.
1716
      for prinode, instances in n_img.sbp.items():
1717
        needed_mem = 0
1718
        for instance in instances:
1719
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1720
          if bep[constants.BE_AUTO_BALANCE]:
1721
            needed_mem += bep[constants.BE_MEMORY]
1722
        test = n_img.mfree < needed_mem
1723
        self._ErrorIf(test, self.ENODEN1, node,
1724
                      "not enough memory on to accommodate"
1725
                      " failovers should peer node %s fail", prinode)
1726

    
1727
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1728
                       master_files):
1729
    """Verifies and computes the node required file checksums.
1730

1731
    @type ninfo: L{objects.Node}
1732
    @param ninfo: the node to check
1733
    @param nresult: the remote results for the node
1734
    @param file_list: required list of files
1735
    @param local_cksum: dictionary of local files and their checksums
1736
    @param master_files: list of files that only masters should have
1737

1738
    """
1739
    node = ninfo.name
1740
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1741

    
1742
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1743
    test = not isinstance(remote_cksum, dict)
1744
    _ErrorIf(test, self.ENODEFILECHECK, node,
1745
             "node hasn't returned file checksum data")
1746
    if test:
1747
      return
1748

    
1749
    for file_name in file_list:
1750
      node_is_mc = ninfo.master_candidate
1751
      must_have = (file_name not in master_files) or node_is_mc
1752
      # missing
1753
      test1 = file_name not in remote_cksum
1754
      # invalid checksum
1755
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1756
      # existing and good
1757
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1758
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1759
               "file '%s' missing", file_name)
1760
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1761
               "file '%s' has wrong checksum", file_name)
1762
      # not candidate and this is not a must-have file
1763
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1764
               "file '%s' should not exist on non master"
1765
               " candidates (and the file is outdated)", file_name)
1766
      # all good, except non-master/non-must have combination
1767
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1768
               "file '%s' should not exist"
1769
               " on non master candidates", file_name)
1770

    
1771
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1772
                      drbd_map):
1773
    """Verifies and the node DRBD status.
1774

1775
    @type ninfo: L{objects.Node}
1776
    @param ninfo: the node to check
1777
    @param nresult: the remote results for the node
1778
    @param instanceinfo: the dict of instances
1779
    @param drbd_helper: the configured DRBD usermode helper
1780
    @param drbd_map: the DRBD map as returned by
1781
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1782

1783
    """
1784
    node = ninfo.name
1785
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1786

    
1787
    if drbd_helper:
1788
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1789
      test = (helper_result == None)
1790
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1791
               "no drbd usermode helper returned")
1792
      if helper_result:
1793
        status, payload = helper_result
1794
        test = not status
1795
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1796
                 "drbd usermode helper check unsuccessful: %s", payload)
1797
        test = status and (payload != drbd_helper)
1798
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1799
                 "wrong drbd usermode helper: %s", payload)
1800

    
1801
    # compute the DRBD minors
1802
    node_drbd = {}
1803
    for minor, instance in drbd_map[node].items():
1804
      test = instance not in instanceinfo
1805
      _ErrorIf(test, self.ECLUSTERCFG, None,
1806
               "ghost instance '%s' in temporary DRBD map", instance)
1807
        # ghost instance should not be running, but otherwise we
1808
        # don't give double warnings (both ghost instance and
1809
        # unallocated minor in use)
1810
      if test:
1811
        node_drbd[minor] = (instance, False)
1812
      else:
1813
        instance = instanceinfo[instance]
1814
        node_drbd[minor] = (instance.name, instance.admin_up)
1815

    
1816
    # and now check them
1817
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1818
    test = not isinstance(used_minors, (tuple, list))
1819
    _ErrorIf(test, self.ENODEDRBD, node,
1820
             "cannot parse drbd status file: %s", str(used_minors))
1821
    if test:
1822
      # we cannot check drbd status
1823
      return
1824

    
1825
    for minor, (iname, must_exist) in node_drbd.items():
1826
      test = minor not in used_minors and must_exist
1827
      _ErrorIf(test, self.ENODEDRBD, node,
1828
               "drbd minor %d of instance %s is not active", minor, iname)
1829
    for minor in used_minors:
1830
      test = minor not in node_drbd
1831
      _ErrorIf(test, self.ENODEDRBD, node,
1832
               "unallocated drbd minor %d is in use", minor)
1833

    
1834
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1835
    """Builds the node OS structures.
1836

1837
    @type ninfo: L{objects.Node}
1838
    @param ninfo: the node to check
1839
    @param nresult: the remote results for the node
1840
    @param nimg: the node image object
1841

1842
    """
1843
    node = ninfo.name
1844
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1845

    
1846
    remote_os = nresult.get(constants.NV_OSLIST, None)
1847
    test = (not isinstance(remote_os, list) or
1848
            not compat.all(isinstance(v, list) and len(v) == 7
1849
                           for v in remote_os))
1850

    
1851
    _ErrorIf(test, self.ENODEOS, node,
1852
             "node hasn't returned valid OS data")
1853

    
1854
    nimg.os_fail = test
1855

    
1856
    if test:
1857
      return
1858

    
1859
    os_dict = {}
1860

    
1861
    for (name, os_path, status, diagnose,
1862
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1863

    
1864
      if name not in os_dict:
1865
        os_dict[name] = []
1866

    
1867
      # parameters is a list of lists instead of list of tuples due to
1868
      # JSON lacking a real tuple type, fix it:
1869
      parameters = [tuple(v) for v in parameters]
1870
      os_dict[name].append((os_path, status, diagnose,
1871
                            set(variants), set(parameters), set(api_ver)))
1872

    
1873
    nimg.oslist = os_dict
1874

    
1875
  def _VerifyNodeOS(self, ninfo, nimg, base):
1876
    """Verifies the node OS list.
1877

1878
    @type ninfo: L{objects.Node}
1879
    @param ninfo: the node to check
1880
    @param nimg: the node image object
1881
    @param base: the 'template' node we match against (e.g. from the master)
1882

1883
    """
1884
    node = ninfo.name
1885
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1886

    
1887
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1888

    
1889
    for os_name, os_data in nimg.oslist.items():
1890
      assert os_data, "Empty OS status for OS %s?!" % os_name
1891
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1892
      _ErrorIf(not f_status, self.ENODEOS, node,
1893
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1894
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1895
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1896
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1897
      # this will catched in backend too
1898
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1899
               and not f_var, self.ENODEOS, node,
1900
               "OS %s with API at least %d does not declare any variant",
1901
               os_name, constants.OS_API_V15)
1902
      # comparisons with the 'base' image
1903
      test = os_name not in base.oslist
1904
      _ErrorIf(test, self.ENODEOS, node,
1905
               "Extra OS %s not present on reference node (%s)",
1906
               os_name, base.name)
1907
      if test:
1908
        continue
1909
      assert base.oslist[os_name], "Base node has empty OS status?"
1910
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1911
      if not b_status:
1912
        # base OS is invalid, skipping
1913
        continue
1914
      for kind, a, b in [("API version", f_api, b_api),
1915
                         ("variants list", f_var, b_var),
1916
                         ("parameters", f_param, b_param)]:
1917
        _ErrorIf(a != b, self.ENODEOS, node,
1918
                 "OS %s %s differs from reference node %s: %s vs. %s",
1919
                 kind, os_name, base.name,
1920
                 utils.CommaJoin(a), utils.CommaJoin(b))
1921

    
1922
    # check any missing OSes
1923
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1924
    _ErrorIf(missing, self.ENODEOS, node,
1925
             "OSes present on reference node %s but missing on this node: %s",
1926
             base.name, utils.CommaJoin(missing))
1927

    
1928
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1929
    """Verifies and updates the node volume data.
1930

1931
    This function will update a L{NodeImage}'s internal structures
1932
    with data from the remote call.
1933

1934
    @type ninfo: L{objects.Node}
1935
    @param ninfo: the node to check
1936
    @param nresult: the remote results for the node
1937
    @param nimg: the node image object
1938
    @param vg_name: the configured VG name
1939

1940
    """
1941
    node = ninfo.name
1942
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1943

    
1944
    nimg.lvm_fail = True
1945
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1946
    if vg_name is None:
1947
      pass
1948
    elif isinstance(lvdata, basestring):
1949
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1950
               utils.SafeEncode(lvdata))
1951
    elif not isinstance(lvdata, dict):
1952
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1953
    else:
1954
      nimg.volumes = lvdata
1955
      nimg.lvm_fail = False
1956

    
1957
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1958
    """Verifies and updates the node instance list.
1959

1960
    If the listing was successful, then updates this node's instance
1961
    list. Otherwise, it marks the RPC call as failed for the instance
1962
    list key.
1963

1964
    @type ninfo: L{objects.Node}
1965
    @param ninfo: the node to check
1966
    @param nresult: the remote results for the node
1967
    @param nimg: the node image object
1968

1969
    """
1970
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1971
    test = not isinstance(idata, list)
1972
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1973
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1974
    if test:
1975
      nimg.hyp_fail = True
1976
    else:
1977
      nimg.instances = idata
1978

    
1979
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1980
    """Verifies and computes a node information map
1981

1982
    @type ninfo: L{objects.Node}
1983
    @param ninfo: the node to check
1984
    @param nresult: the remote results for the node
1985
    @param nimg: the node image object
1986
    @param vg_name: the configured VG name
1987

1988
    """
1989
    node = ninfo.name
1990
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1991

    
1992
    # try to read free memory (from the hypervisor)
1993
    hv_info = nresult.get(constants.NV_HVINFO, None)
1994
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1995
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1996
    if not test:
1997
      try:
1998
        nimg.mfree = int(hv_info["memory_free"])
1999
      except (ValueError, TypeError):
2000
        _ErrorIf(True, self.ENODERPC, node,
2001
                 "node returned invalid nodeinfo, check hypervisor")
2002

    
2003
    # FIXME: devise a free space model for file based instances as well
2004
    if vg_name is not None:
2005
      test = (constants.NV_VGLIST not in nresult or
2006
              vg_name not in nresult[constants.NV_VGLIST])
2007
      _ErrorIf(test, self.ENODELVM, node,
2008
               "node didn't return data for the volume group '%s'"
2009
               " - it is either missing or broken", vg_name)
2010
      if not test:
2011
        try:
2012
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2013
        except (ValueError, TypeError):
2014
          _ErrorIf(True, self.ENODERPC, node,
2015
                   "node returned invalid LVM info, check LVM status")
2016

    
2017
  def BuildHooksEnv(self):
2018
    """Build hooks env.
2019

2020
    Cluster-Verify hooks just ran in the post phase and their failure makes
2021
    the output be logged in the verify output and the verification to fail.
2022

2023
    """
2024
    all_nodes = self.cfg.GetNodeList()
2025
    env = {
2026
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2027
      }
2028
    for node in self.cfg.GetAllNodesInfo().values():
2029
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2030

    
2031
    return env, [], all_nodes
2032

    
2033
  def Exec(self, feedback_fn):
2034
    """Verify integrity of cluster, performing various test on nodes.
2035

2036
    """
2037
    self.bad = False
2038
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2039
    verbose = self.op.verbose
2040
    self._feedback_fn = feedback_fn
2041
    feedback_fn("* Verifying global settings")
2042
    for msg in self.cfg.VerifyConfig():
2043
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2044

    
2045
    # Check the cluster certificates
2046
    for cert_filename in constants.ALL_CERT_FILES:
2047
      (errcode, msg) = _VerifyCertificate(cert_filename)
2048
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2049

    
2050
    vg_name = self.cfg.GetVGName()
2051
    drbd_helper = self.cfg.GetDRBDHelper()
2052
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2053
    cluster = self.cfg.GetClusterInfo()
2054
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2055
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2056
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2057
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2058
                        for iname in instancelist)
2059
    i_non_redundant = [] # Non redundant instances
2060
    i_non_a_balanced = [] # Non auto-balanced instances
2061
    n_offline = 0 # Count of offline nodes
2062
    n_drained = 0 # Count of nodes being drained
2063
    node_vol_should = {}
2064

    
2065
    # FIXME: verify OS list
2066
    # do local checksums
2067
    master_files = [constants.CLUSTER_CONF_FILE]
2068
    master_node = self.master_node = self.cfg.GetMasterNode()
2069
    master_ip = self.cfg.GetMasterIP()
2070

    
2071
    file_names = ssconf.SimpleStore().GetFileList()
2072
    file_names.extend(constants.ALL_CERT_FILES)
2073
    file_names.extend(master_files)
2074
    if cluster.modify_etc_hosts:
2075
      file_names.append(constants.ETC_HOSTS)
2076

    
2077
    local_checksums = utils.FingerprintFiles(file_names)
2078

    
2079
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2080
    node_verify_param = {
2081
      constants.NV_FILELIST: file_names,
2082
      constants.NV_NODELIST: [node.name for node in nodeinfo
2083
                              if not node.offline],
2084
      constants.NV_HYPERVISOR: hypervisors,
2085
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2086
                                  node.secondary_ip) for node in nodeinfo
2087
                                 if not node.offline],
2088
      constants.NV_INSTANCELIST: hypervisors,
2089
      constants.NV_VERSION: None,
2090
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2091
      constants.NV_NODESETUP: None,
2092
      constants.NV_TIME: None,
2093
      constants.NV_MASTERIP: (master_node, master_ip),
2094
      constants.NV_OSLIST: None,
2095
      }
2096

    
2097
    if vg_name is not None:
2098
      node_verify_param[constants.NV_VGLIST] = None
2099
      node_verify_param[constants.NV_LVLIST] = vg_name
2100
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2101
      node_verify_param[constants.NV_DRBDLIST] = None
2102

    
2103
    if drbd_helper:
2104
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2105

    
2106
    # Build our expected cluster state
2107
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2108
                                                 name=node.name))
2109
                      for node in nodeinfo)
2110

    
2111
    for instance in instancelist:
2112
      inst_config = instanceinfo[instance]
2113

    
2114
      for nname in inst_config.all_nodes:
2115
        if nname not in node_image:
2116
          # ghost node
2117
          gnode = self.NodeImage(name=nname)
2118
          gnode.ghost = True
2119
          node_image[nname] = gnode
2120

    
2121
      inst_config.MapLVsByNode(node_vol_should)
2122

    
2123
      pnode = inst_config.primary_node
2124
      node_image[pnode].pinst.append(instance)
2125

    
2126
      for snode in inst_config.secondary_nodes:
2127
        nimg = node_image[snode]
2128
        nimg.sinst.append(instance)
2129
        if pnode not in nimg.sbp:
2130
          nimg.sbp[pnode] = []
2131
        nimg.sbp[pnode].append(instance)
2132

    
2133
    # At this point, we have the in-memory data structures complete,
2134
    # except for the runtime information, which we'll gather next
2135

    
2136
    # Due to the way our RPC system works, exact response times cannot be
2137
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2138
    # time before and after executing the request, we can at least have a time
2139
    # window.
2140
    nvinfo_starttime = time.time()
2141
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2142
                                           self.cfg.GetClusterName())
2143
    nvinfo_endtime = time.time()
2144

    
2145
    all_drbd_map = self.cfg.ComputeDRBDMap()
2146

    
2147
    feedback_fn("* Verifying node status")
2148

    
2149
    refos_img = None
2150

    
2151
    for node_i in nodeinfo:
2152
      node = node_i.name
2153
      nimg = node_image[node]
2154

    
2155
      if node_i.offline:
2156
        if verbose:
2157
          feedback_fn("* Skipping offline node %s" % (node,))
2158
        n_offline += 1
2159
        continue
2160

    
2161
      if node == master_node:
2162
        ntype = "master"
2163
      elif node_i.master_candidate:
2164
        ntype = "master candidate"
2165
      elif node_i.drained:
2166
        ntype = "drained"
2167
        n_drained += 1
2168
      else:
2169
        ntype = "regular"
2170
      if verbose:
2171
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2172

    
2173
      msg = all_nvinfo[node].fail_msg
2174
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2175
      if msg:
2176
        nimg.rpc_fail = True
2177
        continue
2178

    
2179
      nresult = all_nvinfo[node].payload
2180

    
2181
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2182
      self._VerifyNodeNetwork(node_i, nresult)
2183
      self._VerifyNodeLVM(node_i, nresult, vg_name)
2184
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2185
                            master_files)
2186
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2187
                           all_drbd_map)
2188
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2189

    
2190
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2191
      self._UpdateNodeInstances(node_i, nresult, nimg)
2192
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2193
      self._UpdateNodeOS(node_i, nresult, nimg)
2194
      if not nimg.os_fail:
2195
        if refos_img is None:
2196
          refos_img = nimg
2197
        self._VerifyNodeOS(node_i, nimg, refos_img)
2198

    
2199
    feedback_fn("* Verifying instance status")
2200
    for instance in instancelist:
2201
      if verbose:
2202
        feedback_fn("* Verifying instance %s" % instance)
2203
      inst_config = instanceinfo[instance]
2204
      self._VerifyInstance(instance, inst_config, node_image)
2205
      inst_nodes_offline = []
2206

    
2207
      pnode = inst_config.primary_node
2208
      pnode_img = node_image[pnode]
2209
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2210
               self.ENODERPC, pnode, "instance %s, connection to"
2211
               " primary node failed", instance)
2212

    
2213
      if pnode_img.offline:
2214
        inst_nodes_offline.append(pnode)
2215

    
2216
      # If the instance is non-redundant we cannot survive losing its primary
2217
      # node, so we are not N+1 compliant. On the other hand we have no disk
2218
      # templates with more than one secondary so that situation is not well
2219
      # supported either.
2220
      # FIXME: does not support file-backed instances
2221
      if not inst_config.secondary_nodes:
2222
        i_non_redundant.append(instance)
2223
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2224
               instance, "instance has multiple secondary nodes: %s",
2225
               utils.CommaJoin(inst_config.secondary_nodes),
2226
               code=self.ETYPE_WARNING)
2227

    
2228
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2229
        i_non_a_balanced.append(instance)
2230

    
2231
      for snode in inst_config.secondary_nodes:
2232
        s_img = node_image[snode]
2233
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2234
                 "instance %s, connection to secondary node failed", instance)
2235

    
2236
        if s_img.offline:
2237
          inst_nodes_offline.append(snode)
2238

    
2239
      # warn that the instance lives on offline nodes
2240
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2241
               "instance lives on offline node(s) %s",
2242
               utils.CommaJoin(inst_nodes_offline))
2243
      # ... or ghost nodes
2244
      for node in inst_config.all_nodes:
2245
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2246
                 "instance lives on ghost node %s", node)
2247

    
2248
    feedback_fn("* Verifying orphan volumes")
2249
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2250
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2251

    
2252
    feedback_fn("* Verifying orphan instances")
2253
    self._VerifyOrphanInstances(instancelist, node_image)
2254

    
2255
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2256
      feedback_fn("* Verifying N+1 Memory redundancy")
2257
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2258

    
2259
    feedback_fn("* Other Notes")
2260
    if i_non_redundant:
2261
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2262
                  % len(i_non_redundant))
2263

    
2264
    if i_non_a_balanced:
2265
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2266
                  % len(i_non_a_balanced))
2267

    
2268
    if n_offline:
2269
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2270

    
2271
    if n_drained:
2272
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2273

    
2274
    return not self.bad
2275

    
2276
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2277
    """Analyze the post-hooks' result
2278

2279
    This method analyses the hook result, handles it, and sends some
2280
    nicely-formatted feedback back to the user.
2281

2282
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2283
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2284
    @param hooks_results: the results of the multi-node hooks rpc call
2285
    @param feedback_fn: function used send feedback back to the caller
2286
    @param lu_result: previous Exec result
2287
    @return: the new Exec result, based on the previous result
2288
        and hook results
2289

2290
    """
2291
    # We only really run POST phase hooks, and are only interested in
2292
    # their results
2293
    if phase == constants.HOOKS_PHASE_POST:
2294
      # Used to change hooks' output to proper indentation
2295
      indent_re = re.compile('^', re.M)
2296
      feedback_fn("* Hooks Results")
2297
      assert hooks_results, "invalid result from hooks"
2298

    
2299
      for node_name in hooks_results:
2300
        res = hooks_results[node_name]
2301
        msg = res.fail_msg
2302
        test = msg and not res.offline
2303
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2304
                      "Communication failure in hooks execution: %s", msg)
2305
        if res.offline or msg:
2306
          # No need to investigate payload if node is offline or gave an error.
2307
          # override manually lu_result here as _ErrorIf only
2308
          # overrides self.bad
2309
          lu_result = 1
2310
          continue
2311
        for script, hkr, output in res.payload:
2312
          test = hkr == constants.HKR_FAIL
2313
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2314
                        "Script %s failed, output:", script)
2315
          if test:
2316
            output = indent_re.sub('      ', output)
2317
            feedback_fn("%s" % output)
2318
            lu_result = 0
2319

    
2320
      return lu_result
2321

    
2322

    
2323
class LUVerifyDisks(NoHooksLU):
2324
  """Verifies the cluster disks status.
2325

2326
  """
2327
  REQ_BGL = False
2328

    
2329
  def ExpandNames(self):
2330
    self.needed_locks = {
2331
      locking.LEVEL_NODE: locking.ALL_SET,
2332
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2333
    }
2334
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2335

    
2336
  def Exec(self, feedback_fn):
2337
    """Verify integrity of cluster disks.
2338

2339
    @rtype: tuple of three items
2340
    @return: a tuple of (dict of node-to-node_error, list of instances
2341
        which need activate-disks, dict of instance: (node, volume) for
2342
        missing volumes
2343

2344
    """
2345
    result = res_nodes, res_instances, res_missing = {}, [], {}
2346

    
2347
    vg_name = self.cfg.GetVGName()
2348
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2349
    instances = [self.cfg.GetInstanceInfo(name)
2350
                 for name in self.cfg.GetInstanceList()]
2351

    
2352
    nv_dict = {}
2353
    for inst in instances:
2354
      inst_lvs = {}
2355
      if (not inst.admin_up or
2356
          inst.disk_template not in constants.DTS_NET_MIRROR):
2357
        continue
2358
      inst.MapLVsByNode(inst_lvs)
2359
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2360
      for node, vol_list in inst_lvs.iteritems():
2361
        for vol in vol_list:
2362
          nv_dict[(node, vol)] = inst
2363

    
2364
    if not nv_dict:
2365
      return result
2366

    
2367
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2368

    
2369
    for node in nodes:
2370
      # node_volume
2371
      node_res = node_lvs[node]
2372
      if node_res.offline:
2373
        continue
2374
      msg = node_res.fail_msg
2375
      if msg:
2376
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2377
        res_nodes[node] = msg
2378
        continue
2379

    
2380
      lvs = node_res.payload
2381
      for lv_name, (_, _, lv_online) in lvs.items():
2382
        inst = nv_dict.pop((node, lv_name), None)
2383
        if (not lv_online and inst is not None
2384
            and inst.name not in res_instances):
2385
          res_instances.append(inst.name)
2386

    
2387
    # any leftover items in nv_dict are missing LVs, let's arrange the
2388
    # data better
2389
    for key, inst in nv_dict.iteritems():
2390
      if inst.name not in res_missing:
2391
        res_missing[inst.name] = []
2392
      res_missing[inst.name].append(key)
2393

    
2394
    return result
2395

    
2396

    
2397
class LURepairDiskSizes(NoHooksLU):
2398
  """Verifies the cluster disks sizes.
2399

2400
  """
2401
  _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2402
  REQ_BGL = False
2403

    
2404
  def ExpandNames(self):
2405
    if self.op.instances:
2406
      self.wanted_names = []
2407
      for name in self.op.instances:
2408
        full_name = _ExpandInstanceName(self.cfg, name)
2409
        self.wanted_names.append(full_name)
2410
      self.needed_locks = {
2411
        locking.LEVEL_NODE: [],
2412
        locking.LEVEL_INSTANCE: self.wanted_names,
2413
        }
2414
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2415
    else:
2416
      self.wanted_names = None
2417
      self.needed_locks = {
2418
        locking.LEVEL_NODE: locking.ALL_SET,
2419
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2420
        }
2421
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2422

    
2423
  def DeclareLocks(self, level):
2424
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2425
      self._LockInstancesNodes(primary_only=True)
2426

    
2427
  def CheckPrereq(self):
2428
    """Check prerequisites.
2429

2430
    This only checks the optional instance list against the existing names.
2431

2432
    """
2433
    if self.wanted_names is None:
2434
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2435

    
2436
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2437
                             in self.wanted_names]
2438

    
2439
  def _EnsureChildSizes(self, disk):
2440
    """Ensure children of the disk have the needed disk size.
2441

2442
    This is valid mainly for DRBD8 and fixes an issue where the
2443
    children have smaller disk size.
2444

2445
    @param disk: an L{ganeti.objects.Disk} object
2446

2447
    """
2448
    if disk.dev_type == constants.LD_DRBD8:
2449
      assert disk.children, "Empty children for DRBD8?"
2450
      fchild = disk.children[0]
2451
      mismatch = fchild.size < disk.size
2452
      if mismatch:
2453
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2454
                     fchild.size, disk.size)
2455
        fchild.size = disk.size
2456

    
2457
      # and we recurse on this child only, not on the metadev
2458
      return self._EnsureChildSizes(fchild) or mismatch
2459
    else:
2460
      return False
2461

    
2462
  def Exec(self, feedback_fn):
2463
    """Verify the size of cluster disks.
2464

2465
    """
2466
    # TODO: check child disks too
2467
    # TODO: check differences in size between primary/secondary nodes
2468
    per_node_disks = {}
2469
    for instance in self.wanted_instances:
2470
      pnode = instance.primary_node
2471
      if pnode not in per_node_disks:
2472
        per_node_disks[pnode] = []
2473
      for idx, disk in enumerate(instance.disks):
2474
        per_node_disks[pnode].append((instance, idx, disk))
2475

    
2476
    changed = []
2477
    for node, dskl in per_node_disks.items():
2478
      newl = [v[2].Copy() for v in dskl]
2479
      for dsk in newl:
2480
        self.cfg.SetDiskID(dsk, node)
2481
      result = self.rpc.call_blockdev_getsizes(node, newl)
2482
      if result.fail_msg:
2483
        self.LogWarning("Failure in blockdev_getsizes call to node"
2484
                        " %s, ignoring", node)
2485
        continue
2486
      if len(result.data) != len(dskl):
2487
        self.LogWarning("Invalid result from node %s, ignoring node results",
2488
                        node)
2489
        continue
2490
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2491
        if size is None:
2492
          self.LogWarning("Disk %d of instance %s did not return size"
2493
                          " information, ignoring", idx, instance.name)
2494
          continue
2495
        if not isinstance(size, (int, long)):
2496
          self.LogWarning("Disk %d of instance %s did not return valid"
2497
                          " size information, ignoring", idx, instance.name)
2498
          continue
2499
        size = size >> 20
2500
        if size != disk.size:
2501
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2502
                       " correcting: recorded %d, actual %d", idx,
2503
                       instance.name, disk.size, size)
2504
          disk.size = size
2505
          self.cfg.Update(instance, feedback_fn)
2506
          changed.append((instance.name, idx, size))
2507
        if self._EnsureChildSizes(disk):
2508
          self.cfg.Update(instance, feedback_fn)
2509
          changed.append((instance.name, idx, disk.size))
2510
    return changed
2511

    
2512

    
2513
class LURenameCluster(LogicalUnit):
2514
  """Rename the cluster.
2515

2516
  """
2517
  HPATH = "cluster-rename"
2518
  HTYPE = constants.HTYPE_CLUSTER
2519
  _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2520

    
2521
  def BuildHooksEnv(self):
2522
    """Build hooks env.
2523

2524
    """
2525
    env = {
2526
      "OP_TARGET": self.cfg.GetClusterName(),
2527
      "NEW_NAME": self.op.name,
2528
      }
2529
    mn = self.cfg.GetMasterNode()
2530
    all_nodes = self.cfg.GetNodeList()
2531
    return env, [mn], all_nodes
2532

    
2533
  def CheckPrereq(self):
2534
    """Verify that the passed name is a valid one.
2535

2536
    """
2537
    hostname = netutils.GetHostname(name=self.op.name,
2538
                                    family=self.cfg.GetPrimaryIPFamily())
2539

    
2540
    new_name = hostname.name
2541
    self.ip = new_ip = hostname.ip
2542
    old_name = self.cfg.GetClusterName()
2543
    old_ip = self.cfg.GetMasterIP()
2544
    if new_name == old_name and new_ip == old_ip:
2545
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2546
                                 " cluster has changed",
2547
                                 errors.ECODE_INVAL)
2548
    if new_ip != old_ip:
2549
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2550
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2551
                                   " reachable on the network" %
2552
                                   new_ip, errors.ECODE_NOTUNIQUE)
2553

    
2554
    self.op.name = new_name
2555

    
2556
  def Exec(self, feedback_fn):
2557
    """Rename the cluster.
2558

2559
    """
2560
    clustername = self.op.name
2561
    ip = self.ip
2562

    
2563
    # shutdown the master IP
2564
    master = self.cfg.GetMasterNode()
2565
    result = self.rpc.call_node_stop_master(master, False)
2566
    result.Raise("Could not disable the master role")
2567

    
2568
    try:
2569
      cluster = self.cfg.GetClusterInfo()
2570
      cluster.cluster_name = clustername
2571
      cluster.master_ip = ip
2572
      self.cfg.Update(cluster, feedback_fn)
2573

    
2574
      # update the known hosts file
2575
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2576
      node_list = self.cfg.GetNodeList()
2577
      try:
2578
        node_list.remove(master)
2579
      except ValueError:
2580
        pass
2581
      result = self.rpc.call_upload_file(node_list,
2582
                                         constants.SSH_KNOWN_HOSTS_FILE)
2583
      for to_node, to_result in result.iteritems():
2584
        msg = to_result.fail_msg
2585
        if msg:
2586
          msg = ("Copy of file %s to node %s failed: %s" %
2587
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2588
          self.proc.LogWarning(msg)
2589

    
2590
    finally:
2591
      result = self.rpc.call_node_start_master(master, False, False)
2592
      msg = result.fail_msg
2593
      if msg:
2594
        self.LogWarning("Could not re-enable the master role on"
2595
                        " the master, please restart manually: %s", msg)
2596

    
2597
    return clustername
2598

    
2599

    
2600
class LUSetClusterParams(LogicalUnit):
2601
  """Change the parameters of the cluster.
2602

2603
  """
2604
  HPATH = "cluster-modify"
2605
  HTYPE = constants.HTYPE_CLUSTER
2606
  _OP_PARAMS = [
2607
    ("vg_name", None, _TMaybeString),
2608
    ("enabled_hypervisors", None,
2609
     _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2610
    ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2611
    ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2612
    ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2613
    ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2614
    ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2615
    ("uid_pool", None, _NoType),
2616
    ("add_uids", None, _NoType),
2617
    ("remove_uids", None, _NoType),
2618
    ("maintain_node_health", None, _TMaybeBool),
2619
    ("nicparams", None, _TOr(_TDict, _TNone)),
2620
    ("drbd_helper", None, _TOr(_TString, _TNone)),
2621
    ("default_iallocator", None, _TMaybeString),
2622
    ("reserved_lvs", None, _TOr(_TListOf(_TNonEmptyString), _TNone)),
2623
    ("hidden_os", None, _TOr(_TListOf(\
2624
          _TAnd(_TList,
2625
                _TIsLength(2),
2626
                _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2627
          _TNone)),
2628
    ("blacklisted_os", None, _TOr(_TListOf(\
2629
          _TAnd(_TList,
2630
                _TIsLength(2),
2631
                _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2632
          _TNone)),
2633
    ]
2634
  REQ_BGL = False
2635

    
2636
  def CheckArguments(self):
2637
    """Check parameters
2638

2639
    """
2640
    if self.op.uid_pool:
2641
      uidpool.CheckUidPool(self.op.uid_pool)
2642

    
2643
    if self.op.add_uids:
2644
      uidpool.CheckUidPool(self.op.add_uids)
2645

    
2646
    if self.op.remove_uids:
2647
      uidpool.CheckUidPool(self.op.remove_uids)
2648

    
2649
  def ExpandNames(self):
2650
    # FIXME: in the future maybe other cluster params won't require checking on
2651
    # all nodes to be modified.
2652
    self.needed_locks = {
2653
      locking.LEVEL_NODE: locking.ALL_SET,
2654
    }
2655
    self.share_locks[locking.LEVEL_NODE] = 1
2656

    
2657
  def BuildHooksEnv(self):
2658
    """Build hooks env.
2659

2660
    """
2661
    env = {
2662
      "OP_TARGET": self.cfg.GetClusterName(),
2663
      "NEW_VG_NAME": self.op.vg_name,
2664
      }
2665
    mn = self.cfg.GetMasterNode()
2666
    return env, [mn], [mn]
2667

    
2668
  def CheckPrereq(self):
2669
    """Check prerequisites.
2670

2671
    This checks whether the given params don't conflict and
2672
    if the given volume group is valid.
2673

2674
    """
2675
    if self.op.vg_name is not None and not self.op.vg_name:
2676
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2677
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2678
                                   " instances exist", errors.ECODE_INVAL)
2679

    
2680
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2681
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2682
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2683
                                   " drbd-based instances exist",
2684
                                   errors.ECODE_INVAL)
2685

    
2686
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2687

    
2688
    # if vg_name not None, checks given volume group on all nodes
2689
    if self.op.vg_name:
2690
      vglist = self.rpc.call_vg_list(node_list)
2691
      for node in node_list:
2692
        msg = vglist[node].fail_msg
2693
        if msg:
2694
          # ignoring down node
2695
          self.LogWarning("Error while gathering data on node %s"
2696
                          " (ignoring node): %s", node, msg)
2697
          continue
2698
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2699
                                              self.op.vg_name,
2700
                                              constants.MIN_VG_SIZE)
2701
        if vgstatus:
2702
          raise errors.OpPrereqError("Error on node '%s': %s" %
2703
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2704

    
2705
    if self.op.drbd_helper:
2706
      # checks given drbd helper on all nodes
2707
      helpers = self.rpc.call_drbd_helper(node_list)
2708
      for node in node_list:
2709
        ninfo = self.cfg.GetNodeInfo(node)
2710
        if ninfo.offline:
2711
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2712
          continue
2713
        msg = helpers[node].fail_msg
2714
        if msg:
2715
          raise errors.OpPrereqError("Error checking drbd helper on node"
2716
                                     " '%s': %s" % (node, msg),
2717
                                     errors.ECODE_ENVIRON)
2718
        node_helper = helpers[node].payload
2719
        if node_helper != self.op.drbd_helper:
2720
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2721
                                     (node, node_helper), errors.ECODE_ENVIRON)
2722

    
2723
    self.cluster = cluster = self.cfg.GetClusterInfo()
2724
    # validate params changes
2725
    if self.op.beparams:
2726
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2727
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2728

    
2729
    if self.op.nicparams:
2730
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2731
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2732
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2733
      nic_errors = []
2734

    
2735
      # check all instances for consistency
2736
      for instance in self.cfg.GetAllInstancesInfo().values():
2737
        for nic_idx, nic in enumerate(instance.nics):
2738
          params_copy = copy.deepcopy(nic.nicparams)
2739
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2740

    
2741
          # check parameter syntax
2742
          try:
2743
            objects.NIC.CheckParameterSyntax(params_filled)
2744
          except errors.ConfigurationError, err:
2745
            nic_errors.append("Instance %s, nic/%d: %s" %
2746
                              (instance.name, nic_idx, err))
2747

    
2748
          # if we're moving instances to routed, check that they have an ip
2749
          target_mode = params_filled[constants.NIC_MODE]
2750
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2751
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2752
                              (instance.name, nic_idx))
2753
      if nic_errors:
2754
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2755
                                   "\n".join(nic_errors))
2756

    
2757
    # hypervisor list/parameters
2758
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2759
    if self.op.hvparams:
2760
      for hv_name, hv_dict in self.op.hvparams.items():
2761
        if hv_name not in self.new_hvparams:
2762
          self.new_hvparams[hv_name] = hv_dict
2763
        else:
2764
          self.new_hvparams[hv_name].update(hv_dict)
2765

    
2766
    # os hypervisor parameters
2767
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2768
    if self.op.os_hvp:
2769
      for os_name, hvs in self.op.os_hvp.items():
2770
        if os_name not in self.new_os_hvp:
2771
          self.new_os_hvp[os_name] = hvs
2772
        else:
2773
          for hv_name, hv_dict in hvs.items():
2774
            if hv_name not in self.new_os_hvp[os_name]:
2775
              self.new_os_hvp[os_name][hv_name] = hv_dict
2776
            else:
2777
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2778

    
2779
    # os parameters
2780
    self.new_osp = objects.FillDict(cluster.osparams, {})
2781
    if self.op.osparams:
2782
      for os_name, osp in self.op.osparams.items():
2783
        if os_name not in self.new_osp:
2784
          self.new_osp[os_name] = {}
2785

    
2786
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2787
                                                  use_none=True)
2788

    
2789
        if not self.new_osp[os_name]:
2790
          # we removed all parameters
2791
          del self.new_osp[os_name]
2792
        else:
2793
          # check the parameter validity (remote check)
2794
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2795
                         os_name, self.new_osp[os_name])
2796

    
2797
    # changes to the hypervisor list
2798
    if self.op.enabled_hypervisors is not None:
2799
      self.hv_list = self.op.enabled_hypervisors
2800
      for hv in self.hv_list:
2801
        # if the hypervisor doesn't already exist in the cluster
2802
        # hvparams, we initialize it to empty, and then (in both
2803
        # cases) we make sure to fill the defaults, as we might not
2804
        # have a complete defaults list if the hypervisor wasn't
2805
        # enabled before
2806
        if hv not in new_hvp:
2807
          new_hvp[hv] = {}
2808
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2809
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2810
    else:
2811
      self.hv_list = cluster.enabled_hypervisors
2812

    
2813
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2814
      # either the enabled list has changed, or the parameters have, validate
2815
      for hv_name, hv_params in self.new_hvparams.items():
2816
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2817
            (self.op.enabled_hypervisors and
2818
             hv_name in self.op.enabled_hypervisors)):
2819
          # either this is a new hypervisor, or its parameters have changed
2820
          hv_class = hypervisor.GetHypervisor(hv_name)
2821
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2822
          hv_class.CheckParameterSyntax(hv_params)
2823
          _CheckHVParams(self, node_list, hv_name, hv_params)
2824

    
2825
    if self.op.os_hvp:
2826
      # no need to check any newly-enabled hypervisors, since the
2827
      # defaults have already been checked in the above code-block
2828
      for os_name, os_hvp in self.new_os_hvp.items():
2829
        for hv_name, hv_params in os_hvp.items():
2830
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2831
          # we need to fill in the new os_hvp on top of the actual hv_p
2832
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2833
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2834
          hv_class = hypervisor.GetHypervisor(hv_name)
2835
          hv_class.CheckParameterSyntax(new_osp)
2836
          _CheckHVParams(self, node_list, hv_name, new_osp)
2837

    
2838
    if self.op.default_iallocator:
2839
      alloc_script = utils.FindFile(self.op.default_iallocator,
2840
                                    constants.IALLOCATOR_SEARCH_PATH,
2841
                                    os.path.isfile)
2842
      if alloc_script is None:
2843
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2844
                                   " specified" % self.op.default_iallocator,
2845
                                   errors.ECODE_INVAL)
2846

    
2847
  def Exec(self, feedback_fn):
2848
    """Change the parameters of the cluster.
2849

2850
    """
2851
    if self.op.vg_name is not None:
2852
      new_volume = self.op.vg_name
2853
      if not new_volume:
2854
        new_volume = None
2855
      if new_volume != self.cfg.GetVGName():
2856
        self.cfg.SetVGName(new_volume)
2857
      else:
2858
        feedback_fn("Cluster LVM configuration already in desired"
2859
                    " state, not changing")
2860
    if self.op.drbd_helper is not None:
2861
      new_helper = self.op.drbd_helper
2862
      if not new_helper:
2863
        new_helper = None
2864
      if new_helper != self.cfg.GetDRBDHelper():
2865
        self.cfg.SetDRBDHelper(new_helper)
2866
      else:
2867
        feedback_fn("Cluster DRBD helper already in desired state,"
2868
                    " not changing")
2869
    if self.op.hvparams:
2870
      self.cluster.hvparams = self.new_hvparams
2871
    if self.op.os_hvp:
2872
      self.cluster.os_hvp = self.new_os_hvp
2873
    if self.op.enabled_hypervisors is not None:
2874
      self.cluster.hvparams = self.new_hvparams
2875
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2876
    if self.op.beparams:
2877
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2878
    if self.op.nicparams:
2879
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2880
    if self.op.osparams:
2881
      self.cluster.osparams = self.new_osp
2882

    
2883
    if self.op.candidate_pool_size is not None:
2884
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2885
      # we need to update the pool size here, otherwise the save will fail
2886
      _AdjustCandidatePool(self, [])
2887

    
2888
    if self.op.maintain_node_health is not None:
2889
      self.cluster.maintain_node_health = self.op.maintain_node_health
2890

    
2891
    if self.op.add_uids is not None:
2892
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2893

    
2894
    if self.op.remove_uids is not None:
2895
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2896

    
2897
    if self.op.uid_pool is not None:
2898
      self.cluster.uid_pool = self.op.uid_pool
2899

    
2900
    if self.op.default_iallocator is not None:
2901
      self.cluster.default_iallocator = self.op.default_iallocator
2902

    
2903
    if self.op.reserved_lvs is not None:
2904
      self.cluster.reserved_lvs = self.op.reserved_lvs
2905

    
2906
    def helper_os(aname, mods, desc):
2907
      desc += " OS list"
2908
      lst = getattr(self.cluster, aname)
2909
      for key, val in mods:
2910
        if key == constants.DDM_ADD:
2911
          if val in lst:
2912
            feedback_fn("OS %s already in %s, ignoring", val, desc)
2913
          else:
2914
            lst.append(val)
2915
        elif key == constants.DDM_REMOVE:
2916
          if val in lst:
2917
            lst.remove(val)
2918
          else:
2919
            feedback_fn("OS %s not found in %s, ignoring", val, desc)
2920
        else:
2921
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
2922

    
2923
    if self.op.hidden_os:
2924
      helper_os("hidden_os", self.op.hidden_os, "hidden")
2925

    
2926
    if self.op.blacklisted_os:
2927
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2928

    
2929
    self.cfg.Update(self.cluster, feedback_fn)
2930

    
2931

    
2932
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2933
  """Distribute additional files which are part of the cluster configuration.
2934

2935
  ConfigWriter takes care of distributing the config and ssconf files, but
2936
  there are more files which should be distributed to all nodes. This function
2937
  makes sure those are copied.
2938

2939
  @param lu: calling logical unit
2940
  @param additional_nodes: list of nodes not in the config to distribute to
2941

2942
  """
2943
  # 1. Gather target nodes
2944
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2945
  dist_nodes = lu.cfg.GetOnlineNodeList()
2946
  if additional_nodes is not None:
2947
    dist_nodes.extend(additional_nodes)
2948
  if myself.name in dist_nodes:
2949
    dist_nodes.remove(myself.name)
2950

    
2951
  # 2. Gather files to distribute
2952
  dist_files = set([constants.ETC_HOSTS,
2953
                    constants.SSH_KNOWN_HOSTS_FILE,
2954
                    constants.RAPI_CERT_FILE,
2955
                    constants.RAPI_USERS_FILE,
2956
                    constants.CONFD_HMAC_KEY,
2957
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2958
                   ])
2959

    
2960
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2961
  for hv_name in enabled_hypervisors:
2962
    hv_class = hypervisor.GetHypervisor(hv_name)
2963
    dist_files.update(hv_class.GetAncillaryFiles())
2964

    
2965
  # 3. Perform the files upload
2966
  for fname in dist_files:
2967
    if os.path.exists(fname):
2968
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2969
      for to_node, to_result in result.items():
2970
        msg = to_result.fail_msg
2971
        if msg:
2972
          msg = ("Copy of file %s to node %s failed: %s" %
2973
                 (fname, to_node, msg))
2974
          lu.proc.LogWarning(msg)
2975

    
2976

    
2977
class LURedistributeConfig(NoHooksLU):
2978
  """Force the redistribution of cluster configuration.
2979

2980
  This is a very simple LU.
2981

2982
  """
2983
  REQ_BGL = False
2984

    
2985
  def ExpandNames(self):
2986
    self.needed_locks = {
2987
      locking.LEVEL_NODE: locking.ALL_SET,
2988
    }
2989
    self.share_locks[locking.LEVEL_NODE] = 1
2990

    
2991
  def Exec(self, feedback_fn):
2992
    """Redistribute the configuration.
2993

2994
    """
2995
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2996
    _RedistributeAncillaryFiles(self)
2997

    
2998

    
2999
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3000
  """Sleep and poll for an instance's disk to sync.
3001

3002
  """
3003
  if not instance.disks or disks is not None and not disks:
3004
    return True
3005

    
3006
  disks = _ExpandCheckDisks(instance, disks)
3007

    
3008
  if not oneshot:
3009
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3010

    
3011
  node = instance.primary_node
3012

    
3013
  for dev in disks:
3014
    lu.cfg.SetDiskID(dev, node)
3015

    
3016
  # TODO: Convert to utils.Retry
3017

    
3018
  retries = 0
3019
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3020
  while True:
3021
    max_time = 0
3022
    done = True
3023
    cumul_degraded = False
3024
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3025
    msg = rstats.fail_msg
3026
    if msg:
3027
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3028
      retries += 1
3029
      if retries >= 10:
3030
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3031
                                 " aborting." % node)
3032
      time.sleep(6)
3033
      continue
3034
    rstats = rstats.payload
3035
    retries = 0
3036
    for i, mstat in enumerate(rstats):
3037
      if mstat is None:
3038
        lu.LogWarning("Can't compute data for node %s/%s",
3039
                           node, disks[i].iv_name)
3040
        continue
3041

    
3042
      cumul_degraded = (cumul_degraded or
3043
                        (mstat.is_degraded and mstat.sync_percent is None))
3044
      if mstat.sync_percent is not None:
3045
        done = False
3046
        if mstat.estimated_time is not None:
3047
          rem_time = ("%s remaining (estimated)" %
3048
                      utils.FormatSeconds(mstat.estimated_time))
3049
          max_time = mstat.estimated_time
3050
        else:
3051
          rem_time = "no time estimate"
3052
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3053
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3054

    
3055
    # if we're done but degraded, let's do a few small retries, to
3056
    # make sure we see a stable and not transient situation; therefore
3057
    # we force restart of the loop
3058
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3059
      logging.info("Degraded disks found, %d retries left", degr_retries)
3060
      degr_retries -= 1
3061
      time.sleep(1)
3062
      continue
3063

    
3064
    if done or oneshot:
3065
      break
3066

    
3067
    time.sleep(min(60, max_time))
3068

    
3069
  if done:
3070
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3071
  return not cumul_degraded
3072

    
3073

    
3074
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3075
  """Check that mirrors are not degraded.
3076

3077
  The ldisk parameter, if True, will change the test from the
3078
  is_degraded attribute (which represents overall non-ok status for
3079
  the device(s)) to the ldisk (representing the local storage status).
3080

3081
  """
3082
  lu.cfg.SetDiskID(dev, node)
3083

    
3084
  result = True
3085

    
3086
  if on_primary or dev.AssembleOnSecondary():
3087
    rstats = lu.rpc.call_blockdev_find(node, dev)
3088
    msg = rstats.fail_msg
3089
    if msg:
3090
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3091
      result = False
3092
    elif not rstats.payload:
3093
      lu.LogWarning("Can't find disk on node %s", node)
3094
      result = False
3095
    else:
3096
      if ldisk:
3097
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3098
      else:
3099
        result = result and not rstats.payload.is_degraded
3100

    
3101
  if dev.children:
3102
    for child in dev.children:
3103
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3104

    
3105
  return result
3106

    
3107

    
3108
class LUDiagnoseOS(NoHooksLU):
3109
  """Logical unit for OS diagnose/query.
3110

3111
  """
3112
  _OP_PARAMS = [
3113
    _POutputFields,
3114
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3115
    ]
3116
  REQ_BGL = False
3117
  _HID = "hidden"
3118
  _BLK = "blacklisted"
3119
  _VLD = "valid"
3120
  _FIELDS_STATIC = utils.FieldSet()
3121
  _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3122
                                   "parameters", "api_versions", _HID, _BLK)
3123

    
3124
  def CheckArguments(self):
3125
    if self.op.names:
3126
      raise errors.OpPrereqError("Selective OS query not supported",
3127
                                 errors.ECODE_INVAL)
3128

    
3129
    _CheckOutputFields(static=self._FIELDS_STATIC,
3130
                       dynamic=self._FIELDS_DYNAMIC,
3131
                       selected=self.op.output_fields)
3132

    
3133
  def ExpandNames(self):
3134
    # Lock all nodes, in shared mode
3135
    # Temporary removal of locks, should be reverted later
3136
    # TODO: reintroduce locks when they are lighter-weight
3137
    self.needed_locks = {}
3138
    #self.share_locks[locking.LEVEL_NODE] = 1
3139
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3140

    
3141
  @staticmethod
3142
  def _DiagnoseByOS(rlist):
3143
    """Remaps a per-node return list into an a per-os per-node dictionary
3144

3145
    @param rlist: a map with node names as keys and OS objects as values
3146

3147
    @rtype: dict
3148
    @return: a dictionary with osnames as keys and as value another
3149
        map, with nodes as keys and tuples of (path, status, diagnose,
3150
        variants, parameters, api_versions) as values, eg::
3151

3152
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3153
                                     (/srv/..., False, "invalid api")],
3154
                           "node2": [(/srv/..., True, "", [], [])]}
3155
          }
3156

3157
    """
3158
    all_os = {}
3159
    # we build here the list of nodes that didn't fail the RPC (at RPC
3160
    # level), so that nodes with a non-responding node daemon don't
3161
    # make all OSes invalid
3162
    good_nodes = [node_name for node_name in rlist
3163
                  if not rlist[node_name].fail_msg]
3164
    for node_name, nr in rlist.items():
3165
      if nr.fail_msg or not nr.payload:
3166
        continue
3167
      for (name, path, status, diagnose, variants,
3168
           params, api_versions) in nr.payload:
3169
        if name not in all_os:
3170
          # build a list of nodes for this os containing empty lists
3171
          # for each node in node_list
3172
          all_os[name] = {}
3173
          for nname in good_nodes:
3174
            all_os[name][nname] = []
3175
        # convert params from [name, help] to (name, help)
3176
        params = [tuple(v) for v in params]
3177
        all_os[name][node_name].append((path, status, diagnose,
3178
                                        variants, params, api_versions))
3179
    return all_os
3180

    
3181
  def Exec(self, feedback_fn):
3182
    """Compute the list of OSes.
3183

3184
    """
3185
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3186
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3187
    pol = self._DiagnoseByOS(node_data)
3188
    output = []
3189
    cluster = self.cfg.GetClusterInfo()
3190

    
3191
    for os_name in utils.NiceSort(pol.keys()):
3192
      os_data = pol[os_name]
3193
      row = []
3194
      valid = True
3195
      (variants, params, api_versions) = null_state = (set(), set(), set())
3196
      for idx, osl in enumerate(os_data.values()):
3197
        valid = bool(valid and osl and osl[0][1])
3198
        if not valid:
3199
          (variants, params, api_versions) = null_state
3200
          break
3201
        node_variants, node_params, node_api = osl[0][3:6]
3202
        if idx == 0: # first entry
3203
          variants = set(node_variants)
3204
          params = set(node_params)
3205
          api_versions = set(node_api)
3206
        else: # keep consistency
3207
          variants.intersection_update(node_variants)
3208
          params.intersection_update(node_params)
3209
          api_versions.intersection_update(node_api)
3210

    
3211
      is_hid = os_name in cluster.hidden_os
3212
      is_blk = os_name in cluster.blacklisted_os
3213
      if ((self._HID not in self.op.output_fields and is_hid) or
3214
          (self._BLK not in self.op.output_fields and is_blk) or
3215
          (self._VLD not in self.op.output_fields and not valid)):
3216
        continue
3217

    
3218
      for field in self.op.output_fields:
3219
        if field == "name":
3220
          val = os_name
3221
        elif field == self._VLD:
3222
          val = valid
3223
        elif field == "node_status":
3224
          # this is just a copy of the dict
3225
          val = {}
3226
          for node_name, nos_list in os_data.items():
3227
            val[node_name] = nos_list
3228
        elif field == "variants":
3229
          val = utils.NiceSort(list(variants))
3230
        elif field == "parameters":
3231
          val = list(params)
3232
        elif field == "api_versions":
3233
          val = list(api_versions)
3234
        elif field == self._HID:
3235
          val = is_hid
3236
        elif field == self._BLK:
3237
          val = is_blk
3238
        else:
3239
          raise errors.ParameterError(field)
3240
        row.append(val)
3241
      output.append(row)
3242

    
3243
    return output
3244

    
3245

    
3246
class LURemoveNode(LogicalUnit):
3247
  """Logical unit for removing a node.
3248

3249
  """
3250
  HPATH = "node-remove"
3251
  HTYPE = constants.HTYPE_NODE
3252
  _OP_PARAMS = [
3253
    _PNodeName,
3254
    ]
3255

    
3256
  def BuildHooksEnv(self):
3257
    """Build hooks env.
3258

3259
    This doesn't run on the target node in the pre phase as a failed
3260
    node would then be impossible to remove.
3261

3262
    """
3263
    env = {
3264
      "OP_TARGET": self.op.node_name,
3265
      "NODE_NAME": self.op.node_name,
3266
      }
3267
    all_nodes = self.cfg.GetNodeList()
3268
    try:
3269
      all_nodes.remove(self.op.node_name)
3270
    except ValueError:
3271
      logging.warning("Node %s which is about to be removed not found"
3272
                      " in the all nodes list", self.op.node_name)
3273
    return env, all_nodes, all_nodes
3274

    
3275
  def CheckPrereq(self):
3276
    """Check prerequisites.
3277

3278
    This checks:
3279
     - the node exists in the configuration
3280
     - it does not have primary or secondary instances
3281
     - it's not the master
3282

3283
    Any errors are signaled by raising errors.OpPrereqError.
3284

3285
    """
3286
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3287
    node = self.cfg.GetNodeInfo(self.op.node_name)
3288
    assert node is not None
3289

    
3290
    instance_list = self.cfg.GetInstanceList()
3291

    
3292
    masternode = self.cfg.GetMasterNode()
3293
    if node.name == masternode:
3294
      raise errors.OpPrereqError("Node is the master node,"
3295
                                 " you need to failover first.",
3296
                                 errors.ECODE_INVAL)
3297

    
3298
    for instance_name in instance_list:
3299
      instance = self.cfg.GetInstanceInfo(instance_name)
3300
      if node.name in instance.all_nodes:
3301
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3302
                                   " please remove first." % instance_name,
3303
                                   errors.ECODE_INVAL)
3304
    self.op.node_name = node.name
3305
    self.node = node
3306

    
3307
  def Exec(self, feedback_fn):
3308
    """Removes the node from the cluster.
3309

3310
    """
3311
    node = self.node
3312
    logging.info("Stopping the node daemon and removing configs from node %s",
3313
                 node.name)
3314

    
3315
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3316

    
3317
    # Promote nodes to master candidate as needed
3318
    _AdjustCandidatePool(self, exceptions=[node.name])
3319
    self.context.RemoveNode(node.name)
3320

    
3321
    # Run post hooks on the node before it's removed
3322
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3323
    try:
3324
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3325
    except:
3326
      # pylint: disable-msg=W0702
3327
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3328

    
3329
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3330
    msg = result.fail_msg
3331
    if msg:
3332
      self.LogWarning("Errors encountered on the remote node while leaving"
3333
                      " the cluster: %s", msg)
3334

    
3335
    # Remove node from our /etc/hosts
3336
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3337
      master_node = self.cfg.GetMasterNode()
3338
      result = self.rpc.call_etc_hosts_modify(master_node,
3339
                                              constants.ETC_HOSTS_REMOVE,
3340
                                              node.name, None)
3341
      result.Raise("Can't update hosts file with new host data")
3342
      _RedistributeAncillaryFiles(self)
3343

    
3344

    
3345
class LUQueryNodes(NoHooksLU):
3346
  """Logical unit for querying nodes.
3347

3348
  """
3349
  # pylint: disable-msg=W0142
3350
  _OP_PARAMS = [
3351
    _POutputFields,
3352
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3353
    ("use_locking", False, _TBool),
3354
    ]
3355
  REQ_BGL = False
3356

    
3357
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3358
                    "master_candidate", "offline", "drained"]
3359

    
3360
  _FIELDS_DYNAMIC = utils.FieldSet(
3361
    "dtotal", "dfree",
3362
    "mtotal", "mnode", "mfree",
3363
    "bootid",
3364
    "ctotal", "cnodes", "csockets",
3365
    )
3366

    
3367
  _FIELDS_STATIC = utils.FieldSet(*[
3368
    "pinst_cnt", "sinst_cnt",
3369
    "pinst_list", "sinst_list",
3370
    "pip", "sip", "tags",
3371
    "master",
3372
    "role"] + _SIMPLE_FIELDS
3373
    )
3374

    
3375
  def CheckArguments(self):
3376
    _CheckOutputFields(static=self._FIELDS_STATIC,
3377
                       dynamic=self._FIELDS_DYNAMIC,
3378
                       selected=self.op.output_fields)
3379

    
3380
  def ExpandNames(self):
3381
    self.needed_locks = {}
3382
    self.share_locks[locking.LEVEL_NODE] = 1
3383

    
3384
    if self.op.names:
3385
      self.wanted = _GetWantedNodes(self, self.op.names)
3386
    else:
3387
      self.wanted = locking.ALL_SET
3388

    
3389
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3390
    self.do_locking = self.do_node_query and self.op.use_locking
3391
    if self.do_locking:
3392
      # if we don't request only static fields, we need to lock the nodes
3393
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
3394

    
3395
  def Exec(self, feedback_fn):
3396
    """Computes the list of nodes and their attributes.
3397

3398
    """
3399
    all_info = self.cfg.GetAllNodesInfo()
3400
    if self.do_locking:
3401
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
3402
    elif self.wanted != locking.ALL_SET:
3403
      nodenames = self.wanted
3404
      missing = set(nodenames).difference(all_info.keys())
3405
      if missing:
3406
        raise errors.OpExecError(
3407
          "Some nodes were removed before retrieving their data: %s" % missing)
3408
    else:
3409
      nodenames = all_info.keys()
3410

    
3411
    nodenames = utils.NiceSort(nodenames)
3412
    nodelist = [all_info[name] for name in nodenames]
3413

    
3414
    # begin data gathering
3415

    
3416
    if self.do_node_query:
3417
      live_data = {}
3418
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3419
                                          self.cfg.GetHypervisorType())
3420
      for name in nodenames:
3421
        nodeinfo = node_data[name]
3422
        if not nodeinfo.fail_msg and nodeinfo.payload:
3423
          nodeinfo = nodeinfo.payload
3424
          fn = utils.TryConvert
3425
          live_data[name] = {
3426
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3427
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3428
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
3429
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3430
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3431
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3432
            "bootid": nodeinfo.get('bootid', None),
3433
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3434
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3435
            }
3436
        else:
3437
          live_data[name] = {}
3438
    else:
3439
      live_data = dict.fromkeys(nodenames, {})
3440

    
3441
    node_to_primary = dict([(name, set()) for name in nodenames])
3442
    node_to_secondary = dict([(name, set()) for name in nodenames])
3443

    
3444
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3445
                             "sinst_cnt", "sinst_list"))
3446
    if inst_fields & frozenset(self.op.output_fields):
3447
      inst_data = self.cfg.GetAllInstancesInfo()
3448

    
3449
      for inst in inst_data.values():
3450
        if inst.primary_node in node_to_primary:
3451
          node_to_primary[inst.primary_node].add(inst.name)
3452
        for secnode in inst.secondary_nodes:
3453
          if secnode in node_to_secondary:
3454
            node_to_secondary[secnode].add(inst.name)
3455

    
3456
    master_node = self.cfg.GetMasterNode()
3457

    
3458
    # end data gathering
3459

    
3460
    output = []
3461
    for node in nodelist:
3462
      node_output = []
3463
      for field in self.op.output_fields:
3464
        if field in self._SIMPLE_FIELDS:
3465
          val = getattr(node, field)
3466
        elif field == "pinst_list":
3467
          val = list(node_to_primary[node.name])
3468
        elif field == "sinst_list":
3469
          val = list(node_to_secondary[node.name])
3470
        elif field == "pinst_cnt":
3471
          val = len(node_to_primary[node.name])
3472
        elif field == "sinst_cnt":
3473
          val = len(node_to_secondary[node.name])
3474
        elif field == "pip":
3475
          val = node.primary_ip
3476
        elif field == "sip":
3477
          val = node.secondary_ip
3478
        elif field == "tags":
3479
          val = list(node.GetTags())
3480
        elif field == "master":
3481
          val = node.name == master_node
3482
        elif self._FIELDS_DYNAMIC.Matches(field):
3483
          val = live_data[node.name].get(field, None)
3484
        elif field == "role":
3485
          if node.name == master_node:
3486
            val = "M"
3487
          elif node.master_candidate:
3488
            val = "C"
3489
          elif node.drained:
3490
            val = "D"
3491
          elif node.offline:
3492
            val = "O"
3493
          else:
3494
            val = "R"
3495
        else:
3496
          raise errors.ParameterError(field)
3497
        node_output.append(val)
3498
      output.append(node_output)
3499

    
3500
    return output
3501

    
3502

    
3503
class LUQueryNodeVolumes(NoHooksLU):
3504
  """Logical unit for getting volumes on node(s).
3505

3506
  """
3507
  _OP_PARAMS = [
3508
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3509
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3510
    ]
3511
  REQ_BGL = False
3512
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3513
  _FIELDS_STATIC = utils.FieldSet("node")
3514

    
3515
  def CheckArguments(self):
3516
    _CheckOutputFields(static=self._FIELDS_STATIC,
3517
                       dynamic=self._FIELDS_DYNAMIC,
3518
                       selected=self.op.output_fields)
3519

    
3520
  def ExpandNames(self):
3521
    self.needed_locks = {}
3522
    self.share_locks[locking.LEVEL_NODE] = 1
3523
    if not self.op.nodes:
3524
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3525
    else:
3526
      self.needed_locks[locking.LEVEL_NODE] = \
3527
        _GetWantedNodes(self, self.op.nodes)
3528

    
3529
  def Exec(self, feedback_fn):
3530
    """Computes the list of nodes and their attributes.
3531

3532
    """
3533
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3534
    volumes = self.rpc.call_node_volumes(nodenames)
3535

    
3536
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3537
             in self.cfg.GetInstanceList()]
3538

    
3539
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3540

    
3541
    output = []
3542
    for node in nodenames:
3543
      nresult = volumes[node]
3544
      if nresult.offline:
3545
        continue
3546
      msg = nresult.fail_msg
3547
      if msg:
3548
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3549
        continue
3550

    
3551
      node_vols = nresult.payload[:]
3552
      node_vols.sort(key=lambda vol: vol['dev'])
3553

    
3554
      for vol in node_vols:
3555
        node_output = []
3556
        for field in self.op.output_fields:
3557
          if field == "node":
3558
            val = node
3559
          elif field == "phys":
3560
            val = vol['dev']
3561
          elif field == "vg":
3562
            val = vol['vg']
3563
          elif field == "name":
3564
            val = vol['name']
3565
          elif field == "size":
3566
            val = int(float(vol['size']))
3567
          elif field == "instance":
3568
            for inst in ilist:
3569
              if node not in lv_by_node[inst]:
3570
                continue
3571
              if vol['name'] in lv_by_node[inst][node]:
3572
                val = inst.name
3573
                break
3574
            else:
3575
              val = '-'
3576
          else:
3577
            raise errors.ParameterError(field)
3578
          node_output.append(str(val))
3579

    
3580
        output.append(node_output)
3581

    
3582
    return output
3583

    
3584

    
3585
class LUQueryNodeStorage(NoHooksLU):
3586
  """Logical unit for getting information on storage units on node(s).
3587

3588
  """
3589
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3590
  _OP_PARAMS = [
3591
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3592
    ("storage_type", _NoDefault, _CheckStorageType),
3593
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3594
    ("name", None, _TMaybeString),
3595
    ]
3596
  REQ_BGL = False
3597

    
3598
  def CheckArguments(self):
3599
    _CheckOutputFields(static=self._FIELDS_STATIC,
3600
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3601
                       selected=self.op.output_fields)
3602

    
3603
  def ExpandNames(self):
3604
    self.needed_locks = {}
3605
    self.share_locks[locking.LEVEL_NODE] = 1
3606

    
3607
    if self.op.nodes:
3608
      self.needed_locks[locking.LEVEL_NODE] = \
3609
        _GetWantedNodes(self, self.op.nodes)
3610
    else:
3611
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3612

    
3613
  def Exec(self, feedback_fn):
3614
    """Computes the list of nodes and their attributes.
3615

3616
    """
3617
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3618

    
3619
    # Always get name to sort by
3620
    if constants.SF_NAME in self.op.output_fields:
3621
      fields = self.op.output_fields[:]
3622
    else:
3623
      fields = [constants.SF_NAME] + self.op.output_fields
3624

    
3625
    # Never ask for node or type as it's only known to the LU
3626
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3627
      while extra in fields:
3628
        fields.remove(extra)
3629

    
3630
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3631
    name_idx = field_idx[constants.SF_NAME]
3632

    
3633
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3634
    data = self.rpc.call_storage_list(self.nodes,
3635
                                      self.op.storage_type, st_args,
3636
                                      self.op.name, fields)
3637

    
3638
    result = []
3639

    
3640
    for node in utils.NiceSort(self.nodes):
3641
      nresult = data[node]
3642
      if nresult.offline:
3643
        continue
3644

    
3645
      msg = nresult.fail_msg
3646
      if msg:
3647
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3648
        continue
3649

    
3650
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3651

    
3652
      for name in utils.NiceSort(rows.keys()):
3653
        row = rows[name]
3654

    
3655
        out = []
3656

    
3657
        for field in self.op.output_fields:
3658
          if field == constants.SF_NODE:
3659
            val = node
3660
          elif field == constants.SF_TYPE:
3661
            val = self.op.storage_type
3662
          elif field in field_idx:
3663
            val = row[field_idx[field]]
3664
          else:
3665
            raise errors.ParameterError(field)
3666

    
3667
          out.append(val)
3668

    
3669
        result.append(out)
3670

    
3671
    return result
3672

    
3673

    
3674
class LUModifyNodeStorage(NoHooksLU):
3675
  """Logical unit for modifying a storage volume on a node.
3676

3677
  """
3678
  _OP_PARAMS = [
3679
    _PNodeName,
3680
    ("storage_type", _NoDefault, _CheckStorageType),
3681
    ("name", _NoDefault, _TNonEmptyString),
3682
    ("changes", _NoDefault, _TDict),
3683
    ]
3684
  REQ_BGL = False
3685

    
3686
  def CheckArguments(self):
3687
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3688

    
3689
    storage_type = self.op.storage_type
3690

    
3691
    try:
3692
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3693
    except KeyError:
3694
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3695
                                 " modified" % storage_type,
3696
                                 errors.ECODE_INVAL)
3697

    
3698
    diff = set(self.op.changes.keys()) - modifiable
3699
    if diff:
3700
      raise errors.OpPrereqError("The following fields can not be modified for"
3701
                                 " storage units of type '%s': %r" %
3702
                                 (storage_type, list(diff)),
3703
                                 errors.ECODE_INVAL)
3704

    
3705
  def ExpandNames(self):
3706
    self.needed_locks = {
3707
      locking.LEVEL_NODE: self.op.node_name,
3708
      }
3709

    
3710
  def Exec(self, feedback_fn):
3711
    """Computes the list of nodes and their attributes.
3712

3713
    """
3714
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3715
    result = self.rpc.call_storage_modify(self.op.node_name,
3716
                                          self.op.storage_type, st_args,
3717
                                          self.op.name, self.op.changes)
3718
    result.Raise("Failed to modify storage unit '%s' on %s" %
3719
                 (self.op.name, self.op.node_name))
3720

    
3721

    
3722
class LUAddNode(LogicalUnit):
3723
  """Logical unit for adding node to the cluster.
3724

3725
  """
3726
  HPATH = "node-add"
3727
  HTYPE = constants.HTYPE_NODE
3728
  _OP_PARAMS = [
3729
    _PNodeName,
3730
    ("primary_ip", None, _NoType),
3731
    ("secondary_ip", None, _TMaybeString),
3732
    ("readd", False, _TBool),
3733
    ("nodegroup", None, _TMaybeString)
3734
    ]
3735

    
3736
  def CheckArguments(self):
3737
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3738
    # validate/normalize the node name
3739
    self.hostname = netutils.GetHostname(name=self.op.node_name,
3740
                                         family=self.primary_ip_family)
3741
    self.op.node_name = self.hostname.name
3742
    if self.op.readd and self.op.nodegroup:
3743
      raise errors.OpPrereqError("Cannot pass a nodegroup when a node is"
3744
                                 " being readded", errors.ECODE_INVAL)
3745

    
3746
  def BuildHooksEnv(self):
3747
    """Build hooks env.
3748

3749
    This will run on all nodes before, and on all nodes + the new node after.
3750

3751
    """
3752
    env = {
3753
      "OP_TARGET": self.op.node_name,
3754
      "NODE_NAME": self.op.node_name,
3755
      "NODE_PIP": self.op.primary_ip,
3756
      "NODE_SIP": self.op.secondary_ip,
3757
      }
3758
    nodes_0 = self.cfg.GetNodeList()
3759
    nodes_1 = nodes_0 + [self.op.node_name, ]
3760
    return env, nodes_0, nodes_1
3761

    
3762
  def CheckPrereq(self):
3763
    """Check prerequisites.
3764

3765
    This checks:
3766
     - the new node is not already in the config
3767
     - it is resolvable
3768
     - its parameters (single/dual homed) matches the cluster
3769

3770
    Any errors are signaled by raising errors.OpPrereqError.
3771

3772
    """
3773
    cfg = self.cfg
3774
    hostname = self.hostname
3775
    node = hostname.name
3776
    primary_ip = self.op.primary_ip = hostname.ip
3777
    if self.op.secondary_ip is None:
3778
      if self.primary_ip_family == netutils.IP6Address.family:
3779
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3780
                                   " IPv4 address must be given as secondary",
3781
                                   errors.ECODE_INVAL)
3782
      self.op.secondary_ip = primary_ip
3783

    
3784
    secondary_ip = self.op.secondary_ip
3785
    if not netutils.IP4Address.IsValid(secondary_ip):
3786
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3787
                                 " address" % secondary_ip, errors.ECODE_INVAL)
3788

    
3789
    node_list = cfg.GetNodeList()
3790
    if not self.op.readd and node in node_list:
3791
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3792
                                 node, errors.ECODE_EXISTS)
3793
    elif self.op.readd and node not in node_list:
3794
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3795
                                 errors.ECODE_NOENT)
3796

    
3797
    self.changed_primary_ip = False
3798

    
3799
    for existing_node_name in node_list:
3800
      existing_node = cfg.GetNodeInfo(existing_node_name)
3801

    
3802
      if self.op.readd and node == existing_node_name:
3803
        if existing_node.secondary_ip != secondary_ip:
3804
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3805
                                     " address configuration as before",
3806
                                     errors.ECODE_INVAL)
3807
        if existing_node.primary_ip != primary_ip:
3808
          self.changed_primary_ip = True
3809

    
3810
        continue
3811

    
3812
      if (existing_node.primary_ip == primary_ip or
3813
          existing_node.secondary_ip == primary_ip or
3814
          existing_node.primary_ip == secondary_ip or
3815
          existing_node.secondary_ip == secondary_ip):
3816
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3817
                                   " existing node %s" % existing_node.name,
3818
                                   errors.ECODE_NOTUNIQUE)
3819

    
3820
    # check that the type of the node (single versus dual homed) is the
3821
    # same as for the master
3822
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3823
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3824
    newbie_singlehomed = secondary_ip == primary_ip
3825
    if master_singlehomed != newbie_singlehomed:
3826
      if master_singlehomed:
3827
        raise errors.OpPrereqError("The master has no private ip but the"
3828
                                   " new node has one",
3829
                                   errors.ECODE_INVAL)
3830
      else:
3831
        raise errors.OpPrereqError("The master has a private ip but the"
3832
                                   " new node doesn't have one",
3833
                                   errors.ECODE_INVAL)
3834

    
3835
    # checks reachability
3836
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3837
      raise errors.OpPrereqError("Node not reachable by ping",
3838
                                 errors.ECODE_ENVIRON)
3839

    
3840
    if not newbie_singlehomed:
3841
      # check reachability from my secondary ip to newbie's secondary ip
3842
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3843
                           source=myself.secondary_ip):
3844
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3845
                                   " based ping to noded port",
3846
                                   errors.ECODE_ENVIRON)
3847

    
3848
    if self.op.readd:
3849
      exceptions = [node]
3850
    else:
3851
      exceptions = []
3852

    
3853
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3854

    
3855
    if self.op.readd:
3856
      self.new_node = self.cfg.GetNodeInfo(node)
3857
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3858
    else:
3859
      nodegroup = cfg.LookupNodeGroup(self.op.nodegroup)
3860
      self.new_node = objects.Node(name=node,
3861
                                   primary_ip=primary_ip,
3862
                                   secondary_ip=secondary_ip,
3863
                                   master_candidate=self.master_candidate,
3864
                                   offline=False, drained=False,
3865
                                   nodegroup=nodegroup)
3866

    
3867
  def Exec(self, feedback_fn):
3868
    """Adds the new node to the cluster.
3869

3870
    """
3871
    new_node = self.new_node
3872
    node = new_node.name
3873

    
3874
    # for re-adds, reset the offline/drained/master-candidate flags;
3875
    # we need to reset here, otherwise offline would prevent RPC calls
3876
    # later in the procedure; this also means that if the re-add
3877
    # fails, we are left with a non-offlined, broken node
3878
    if self.op.readd:
3879
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3880
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3881
      # if we demote the node, we do cleanup later in the procedure
3882
      new_node.master_candidate = self.master_candidate
3883
      if self.changed_primary_ip:
3884
        new_node.primary_ip = self.op.primary_ip
3885

    
3886
    # notify the user about any possible mc promotion
3887
    if new_node.master_candidate:
3888
      self.LogInfo("Node will be a master candidate")
3889

    
3890
    # check connectivity
3891
    result = self.rpc.call_version([node])[node]
3892
    result.Raise("Can't get version information from node %s" % node)
3893
    if constants.PROTOCOL_VERSION == result.payload:
3894
      logging.info("Communication to node %s fine, sw version %s match",
3895
                   node, result.payload)
3896
    else:
3897
      raise errors.OpExecError("Version mismatch master version %s,"
3898
                               " node version %s" %
3899
                               (constants.PROTOCOL_VERSION, result.payload))
3900

    
3901
    # Add node to our /etc/hosts, and add key to known_hosts
3902
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3903
      master_node = self.cfg.GetMasterNode()
3904
      result = self.rpc.call_etc_hosts_modify(master_node,
3905
                                              constants.ETC_HOSTS_ADD,
3906
                                              self.hostname.name,
3907
                                              self.hostname.ip)
3908
      result.Raise("Can't update hosts file with new host data")
3909

    
3910
    if new_node.secondary_ip != new_node.primary_ip:
3911
      result = self.rpc.call_node_has_ip_address(new_node.name,
3912
                                                 new_node.secondary_ip)
3913
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3914
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3915
      if not result.payload:
3916
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3917
                                 " you gave (%s). Please fix and re-run this"
3918
                                 " command." % new_node.secondary_ip)
3919

    
3920
    node_verify_list = [self.cfg.GetMasterNode()]
3921
    node_verify_param = {
3922
      constants.NV_NODELIST: [node],
3923
      # TODO: do a node-net-test as well?
3924
    }
3925

    
3926
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3927
                                       self.cfg.GetClusterName())
3928
    for verifier in node_verify_list:
3929
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3930
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3931
      if nl_payload:
3932
        for failed in nl_payload:
3933
          feedback_fn("ssh/hostname verification failed"
3934
                      " (checking from %s): %s" %
3935
                      (verifier, nl_payload[failed]))
3936
        raise errors.OpExecError("ssh/hostname verification failed.")
3937

    
3938
    if self.op.readd:
3939
      _RedistributeAncillaryFiles(self)
3940
      self.context.ReaddNode(new_node)
3941
      # make sure we redistribute the config
3942
      self.cfg.Update(new_node, feedback_fn)
3943
      # and make sure the new node will not have old files around
3944
      if not new_node.master_candidate:
3945
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3946
        msg = result.fail_msg
3947
        if msg:
3948
          self.LogWarning("Node failed to demote itself from master"
3949
                          " candidate status: %s" % msg)
3950
    else:
3951
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3952
      self.context.AddNode(new_node, self.proc.GetECId())
3953

    
3954

    
3955
class LUSetNodeParams(LogicalUnit):
3956
  """Modifies the parameters of a node.
3957

3958
  """
3959
  HPATH = "node-modify"
3960
  HTYPE = constants.HTYPE_NODE
3961
  _OP_PARAMS = [
3962
    _PNodeName,
3963
    ("master_candidate", None, _TMaybeBool),
3964
    ("offline", None, _TMaybeBool),
3965
    ("drained", None, _TMaybeBool),
3966
    ("auto_promote", False, _TBool),
3967
    _PForce,
3968
    ]
3969
  REQ_BGL = False
3970

    
3971
  def CheckArguments(self):
3972
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3973
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3974
    if all_mods.count(None) == 3:
3975
      raise errors.OpPrereqError("Please pass at least one modification",
3976
                                 errors.ECODE_INVAL)
3977
    if all_mods.count(True) > 1:
3978
      raise errors.OpPrereqError("Can't set the node into more than one"
3979
                                 " state at the same time",
3980
                                 errors.ECODE_INVAL)
3981

    
3982
    # Boolean value that tells us whether we're offlining or draining the node
3983
    self.offline_or_drain = (self.op.offline == True or
3984
                             self.op.drained == True)
3985
    self.deoffline_or_drain = (self.op.offline == False or
3986
                               self.op.drained == False)
3987
    self.might_demote = (self.op.master_candidate == False or
3988
                         self.offline_or_drain)
3989

    
3990
    self.lock_all = self.op.auto_promote and self.might_demote
3991

    
3992

    
3993
  def ExpandNames(self):
3994
    if self.lock_all:
3995
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3996
    else:
3997
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3998

    
3999
  def BuildHooksEnv(self):
4000
    """Build hooks env.
4001

4002
    This runs on the master node.
4003

4004
    """
4005
    env = {
4006
      "OP_TARGET": self.op.node_name,
4007
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4008
      "OFFLINE": str(self.op.offline),
4009
      "DRAINED": str(self.op.drained),
4010
      }
4011
    nl = [self.cfg.GetMasterNode(),
4012
          self.op.node_name]
4013
    return env, nl, nl
4014

    
4015
  def CheckPrereq(self):
4016
    """Check prerequisites.
4017

4018
    This only checks the instance list against the existing names.
4019

4020
    """
4021
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4022

    
4023
    if (self.op.master_candidate is not None or
4024
        self.op.drained is not None or
4025
        self.op.offline is not None):
4026
      # we can't change the master's node flags
4027
      if self.op.node_name == self.cfg.GetMasterNode():
4028
        raise errors.OpPrereqError("The master role can be changed"
4029
                                   " only via master-failover",
4030
                                   errors.ECODE_INVAL)
4031

    
4032

    
4033
    if node.master_candidate and self.might_demote and not self.lock_all:
4034
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
4035
      # check if after removing the current node, we're missing master
4036
      # candidates
4037
      (mc_remaining, mc_should, _) = \
4038
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4039
      if mc_remaining < mc_should:
4040
        raise errors.OpPrereqError("Not enough master candidates, please"
4041
                                   " pass auto_promote to allow promotion",
4042
                                   errors.ECODE_INVAL)
4043

    
4044
    if (self.op.master_candidate == True and
4045
        ((node.offline and not self.op.offline == False) or
4046
         (node.drained and not self.op.drained == False))):
4047
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
4048
                                 " to master_candidate" % node.name,
4049
                                 errors.ECODE_INVAL)
4050

    
4051
    # If we're being deofflined/drained, we'll MC ourself if needed
4052
    if (self.deoffline_or_drain and not self.offline_or_drain and not
4053
        self.op.master_candidate == True and not node.master_candidate):
4054
      self.op.master_candidate = _DecideSelfPromotion(self)
4055
      if self.op.master_candidate:
4056
        self.LogInfo("Autopromoting node to master candidate")
4057

    
4058
    return
4059

    
4060
  def Exec(self, feedback_fn):
4061
    """Modifies a node.
4062

4063
    """
4064
    node = self.node
4065

    
4066
    result = []
4067
    changed_mc = False
4068

    
4069
    if self.op.offline is not None:
4070
      node.offline = self.op.offline
4071
      result.append(("offline", str(self.op.offline)))
4072
      if self.op.offline == True:
4073
        if node.master_candidate:
4074
          node.master_candidate = False
4075
          changed_mc = True
4076
          result.append(("master_candidate", "auto-demotion due to offline"))
4077
        if node.drained:
4078
          node.drained = False
4079
          result.append(("drained", "clear drained status due to offline"))
4080

    
4081
    if self.op.master_candidate is not None:
4082
      node.master_candidate = self.op.master_candidate
4083
      changed_mc = True
4084
      result.append(("master_candidate", str(self.op.master_candidate)))
4085
      if self.op.master_candidate == False:
4086
        rrc = self.rpc.call_node_demote_from_mc(node.name)
4087
        msg = rrc.fail_msg
4088
        if msg:
4089
          self.LogWarning("Node failed to demote itself: %s" % msg)
4090

    
4091
    if self.op.drained is not None:
4092
      node.drained = self.op.drained
4093
      result.append(("drained", str(self.op.drained)))
4094
      if self.op.drained == True:
4095
        if node.master_candidate:
4096
          node.master_candidate = False
4097
          changed_mc = True
4098
          result.append(("master_candidate", "auto-demotion due to drain"))
4099
          rrc = self.rpc.call_node_demote_from_mc(node.name)
4100
          msg = rrc.fail_msg
4101
          if msg:
4102
            self.LogWarning("Node failed to demote itself: %s" % msg)
4103
        if node.offline:
4104
          node.offline = False
4105
          result.append(("offline", "clear offline status due to drain"))
4106

    
4107
    # we locked all nodes, we adjust the CP before updating this node
4108
    if self.lock_all:
4109
      _AdjustCandidatePool(self, [node.name])
4110

    
4111
    # this will trigger configuration file update, if needed
4112
    self.cfg.Update(node, feedback_fn)
4113

    
4114
    # this will trigger job queue propagation or cleanup
4115
    if changed_mc:
4116
      self.context.ReaddNode(node)
4117

    
4118
    return result
4119

    
4120

    
4121
class LUPowercycleNode(NoHooksLU):
4122
  """Powercycles a node.
4123

4124
  """
4125
  _OP_PARAMS = [
4126
    _PNodeName,
4127
    _PForce,
4128
    ]
4129
  REQ_BGL = False
4130

    
4131
  def CheckArguments(self):
4132
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4133
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4134
      raise errors.OpPrereqError("The node is the master and the force"
4135
                                 " parameter was not set",
4136
                                 errors.ECODE_INVAL)
4137

    
4138
  def ExpandNames(self):
4139
    """Locking for PowercycleNode.
4140

4141
    This is a last-resort option and shouldn't block on other
4142
    jobs. Therefore, we grab no locks.
4143

4144
    """
4145
    self.needed_locks = {}
4146

    
4147
  def Exec(self, feedback_fn):
4148
    """Reboots a node.
4149

4150
    """
4151
    result = self.rpc.call_node_powercycle(self.op.node_name,
4152
                                           self.cfg.GetHypervisorType())
4153
    result.Raise("Failed to schedule the reboot")
4154
    return result.payload
4155

    
4156

    
4157
class LUQueryClusterInfo(NoHooksLU):
4158
  """Query cluster configuration.
4159

4160
  """
4161
  REQ_BGL = False
4162

    
4163
  def ExpandNames(self):
4164
    self.needed_locks = {}
4165

    
4166
  def Exec(self, feedback_fn):
4167
    """Return cluster config.
4168

4169
    """
4170
    cluster = self.cfg.GetClusterInfo()
4171
    os_hvp = {}
4172

    
4173
    # Filter just for enabled hypervisors
4174
    for os_name, hv_dict in cluster.os_hvp.items():
4175
      os_hvp[os_name] = {}
4176
      for hv_name, hv_params in hv_dict.items():
4177
        if hv_name in cluster.enabled_hypervisors:
4178
          os_hvp[os_name][hv_name] = hv_params
4179

    
4180
    # Convert ip_family to ip_version
4181
    primary_ip_version = constants.IP4_VERSION
4182
    if cluster.primary_ip_family == netutils.IP6Address.family:
4183
      primary_ip_version = constants.IP6_VERSION
4184

    
4185
    result = {
4186
      "software_version": constants.RELEASE_VERSION,
4187
      "protocol_version": constants.PROTOCOL_VERSION,
4188
      "config_version": constants.CONFIG_VERSION,
4189
      "os_api_version": max(constants.OS_API_VERSIONS),
4190
      "export_version": constants.EXPORT_VERSION,
4191
      "architecture": (platform.architecture()[0], platform.machine()),
4192
      "name": cluster.cluster_name,
4193
      "master": cluster.master_node,
4194
      "default_hypervisor": cluster.enabled_hypervisors[0],
4195
      "enabled_hypervisors": cluster.enabled_hypervisors,
4196
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4197
                        for hypervisor_name in cluster.enabled_hypervisors]),
4198
      "os_hvp": os_hvp,
4199
      "beparams": cluster.beparams,
4200
      "osparams": cluster.osparams,
4201
      "nicparams": cluster.nicparams,
4202
      "candidate_pool_size": cluster.candidate_pool_size,
4203
      "master_netdev": cluster.master_netdev,
4204
      "volume_group_name": cluster.volume_group_name,
4205
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4206
      "file_storage_dir": cluster.file_storage_dir,
4207
      "maintain_node_health": cluster.maintain_node_health,
4208
      "ctime": cluster.ctime,
4209
      "mtime": cluster.mtime,
4210
      "uuid": cluster.uuid,
4211
      "tags": list(cluster.GetTags()),
4212
      "uid_pool": cluster.uid_pool,
4213
      "default_iallocator": cluster.default_iallocator,
4214
      "reserved_lvs": cluster.reserved_lvs,
4215
      "primary_ip_version": primary_ip_version,
4216
      }
4217

    
4218
    return result
4219

    
4220

    
4221
class LUQueryConfigValues(NoHooksLU):
4222
  """Return configuration values.
4223

4224
  """
4225
  _OP_PARAMS = [_POutputFields]
4226
  REQ_BGL = False
4227
  _FIELDS_DYNAMIC = utils.FieldSet()
4228
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4229
                                  "watcher_pause", "volume_group_name")
4230

    
4231
  def CheckArguments(self):
4232
    _CheckOutputFields(static=self._FIELDS_STATIC,
4233
                       dynamic=self._FIELDS_DYNAMIC,
4234
                       selected=self.op.output_fields)
4235

    
4236
  def ExpandNames(self):
4237
    self.needed_locks = {}
4238

    
4239
  def Exec(self, feedback_fn):
4240
    """Dump a representation of the cluster config to the standard output.
4241

4242
    """
4243
    values = []
4244
    for field in self.op.output_fields:
4245
      if field == "cluster_name":
4246
        entry = self.cfg.GetClusterName()
4247
      elif field == "master_node":
4248
        entry = self.cfg.GetMasterNode()
4249
      elif field == "drain_flag":
4250
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4251
      elif field == "watcher_pause":
4252
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4253
      elif field == "volume_group_name":
4254
        entry = self.cfg.GetVGName()
4255
      else:
4256
        raise errors.ParameterError(field)
4257
      values.append(entry)
4258
    return values
4259

    
4260

    
4261
class LUActivateInstanceDisks(NoHooksLU):
4262
  """Bring up an instance's disks.
4263

4264
  """
4265
  _OP_PARAMS = [
4266
    _PInstanceName,
4267
    ("ignore_size", False, _TBool),
4268
    ]
4269
  REQ_BGL = False
4270

    
4271
  def ExpandNames(self):
4272
    self._ExpandAndLockInstance()
4273
    self.needed_locks[locking.LEVEL_NODE] = []
4274
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4275

    
4276
  def DeclareLocks(self, level):
4277
    if level == locking.LEVEL_NODE:
4278
      self._LockInstancesNodes()
4279

    
4280
  def CheckPrereq(self):
4281
    """Check prerequisites.
4282

4283
    This checks that the instance is in the cluster.
4284

4285
    """
4286
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4287
    assert self.instance is not None, \
4288
      "Cannot retrieve locked instance %s" % self.op.instance_name
4289
    _CheckNodeOnline(self, self.instance.primary_node)
4290

    
4291
  def Exec(self, feedback_fn):
4292
    """Activate the disks.
4293

4294
    """
4295
    disks_ok, disks_info = \
4296
              _AssembleInstanceDisks(self, self.instance,
4297
                                     ignore_size=self.op.ignore_size)
4298
    if not disks_ok:
4299
      raise errors.OpExecError("Cannot activate block devices")
4300

    
4301
    return disks_info
4302

    
4303

    
4304
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4305
                           ignore_size=False):
4306
  """Prepare the block devices for an instance.
4307

4308
  This sets up the block devices on all nodes.
4309

4310
  @type lu: L{LogicalUnit}
4311
  @param lu: the logical unit on whose behalf we execute
4312
  @type instance: L{objects.Instance}
4313
  @param instance: the instance for whose disks we assemble
4314
  @type disks: list of L{objects.Disk} or None
4315
  @param disks: which disks to assemble (or all, if None)
4316
  @type ignore_secondaries: boolean
4317
  @param ignore_secondaries: if true, errors on secondary nodes
4318
      won't result in an error return from the function
4319
  @type ignore_size: boolean
4320
  @param ignore_size: if true, the current known size of the disk
4321
      will not be used during the disk activation, useful for cases
4322
      when the size is wrong
4323
  @return: False if the operation failed, otherwise a list of
4324
      (host, instance_visible_name, node_visible_name)
4325
      with the mapping from node devices to instance devices
4326

4327
  """
4328
  device_info = []
4329
  disks_ok = True
4330
  iname = instance.name
4331
  disks = _ExpandCheckDisks(instance, disks)
4332

    
4333
  # With the two passes mechanism we try to reduce the window of
4334
  # opportunity for the race condition of switching DRBD to primary
4335
  # before handshaking occured, but we do not eliminate it
4336

    
4337
  # The proper fix would be to wait (with some limits) until the
4338
  # connection has been made and drbd transitions from WFConnection
4339
  # into any other network-connected state (Connected, SyncTarget,
4340
  # SyncSource, etc.)
4341

    
4342
  # 1st pass, assemble on all nodes in secondary mode
4343
  for inst_disk in disks:
4344
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4345
      if ignore_size:
4346
        node_disk = node_disk.Copy()
4347
        node_disk.UnsetSize()
4348
      lu.cfg.SetDiskID(node_disk, node)
4349
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4350
      msg = result.fail_msg
4351
      if msg:
4352
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4353
                           " (is_primary=False, pass=1): %s",
4354
                           inst_disk.iv_name, node, msg)
4355
        if not ignore_secondaries:
4356
          disks_ok = False
4357

    
4358
  # FIXME: race condition on drbd migration to primary
4359

    
4360
  # 2nd pass, do only the primary node
4361
  for inst_disk in disks:
4362
    dev_path = None
4363

    
4364
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4365
      if node != instance.primary_node:
4366
        continue
4367
      if ignore_size:
4368
        node_disk = node_disk.Copy()
4369
        node_disk.UnsetSize()
4370
      lu.cfg.SetDiskID(node_disk, node)
4371
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4372
      msg = result.fail_msg
4373
      if msg:
4374
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4375
                           " (is_primary=True, pass=2): %s",
4376
                           inst_disk.iv_name, node, msg)
4377
        disks_ok = False
4378
      else:
4379
        dev_path = result.payload
4380

    
4381
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4382

    
4383
  # leave the disks configured for the primary node
4384
  # this is a workaround that would be fixed better by
4385
  # improving the logical/physical id handling
4386
  for disk in disks:
4387
    lu.cfg.SetDiskID(disk, instance.primary_node)
4388

    
4389
  return disks_ok, device_info
4390

    
4391

    
4392
def _StartInstanceDisks(lu, instance, force):
4393
  """Start the disks of an instance.
4394

4395
  """
4396
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4397
                                           ignore_secondaries=force)
4398
  if not disks_ok:
4399
    _ShutdownInstanceDisks(lu, instance)
4400
    if force is not None and not force:
4401
      lu.proc.LogWarning("", hint="If the message above refers to a"
4402
                         " secondary node,"
4403
                         " you can retry the operation using '--force'.")
4404
    raise errors.OpExecError("Disk consistency error")
4405

    
4406

    
4407
class LUDeactivateInstanceDisks(NoHooksLU):
4408
  """Shutdown an instance's disks.
4409

4410
  """
4411
  _OP_PARAMS = [
4412
    _PInstanceName,
4413
    ]
4414
  REQ_BGL = False
4415

    
4416
  def ExpandNames(self):
4417
    self._ExpandAndLockInstance()
4418
    self.needed_locks[locking.LEVEL_NODE] = []
4419
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4420

    
4421
  def DeclareLocks(self, level):
4422
    if level == locking.LEVEL_NODE:
4423
      self._LockInstancesNodes()
4424

    
4425
  def CheckPrereq(self):
4426
    """Check prerequisites.
4427

4428
    This checks that the instance is in the cluster.
4429

4430
    """
4431
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4432
    assert self.instance is not None, \
4433
      "Cannot retrieve locked instance %s" % self.op.instance_name
4434

    
4435
  def Exec(self, feedback_fn):
4436
    """Deactivate the disks
4437

4438
    """
4439
    instance = self.instance
4440
    _SafeShutdownInstanceDisks(self, instance)
4441

    
4442

    
4443
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4444
  """Shutdown block devices of an instance.
4445

4446
  This function checks if an instance is running, before calling
4447
  _ShutdownInstanceDisks.
4448

4449
  """
4450
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4451
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4452

    
4453

    
4454
def _ExpandCheckDisks(instance, disks):
4455
  """Return the instance disks selected by the disks list
4456

4457
  @type disks: list of L{objects.Disk} or None
4458
  @param disks: selected disks
4459
  @rtype: list of L{objects.Disk}
4460
  @return: selected instance disks to act on
4461

4462
  """
4463
  if disks is None:
4464
    return instance.disks
4465
  else:
4466
    if not set(disks).issubset(instance.disks):
4467
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4468
                                   " target instance")
4469
    return disks
4470

    
4471

    
4472
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4473
  """Shutdown block devices of an instance.
4474

4475
  This does the shutdown on all nodes of the instance.
4476

4477
  If the ignore_primary is false, errors on the primary node are
4478
  ignored.
4479

4480
  """
4481
  all_result = True
4482
  disks = _ExpandCheckDisks(instance, disks)
4483

    
4484
  for disk in disks:
4485
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4486
      lu.cfg.SetDiskID(top_disk, node)
4487
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4488
      msg = result.fail_msg
4489
      if msg:
4490
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4491
                      disk.iv_name, node, msg)
4492
        if not ignore_primary or node != instance.primary_node:
4493
          all_result = False
4494
  return all_result
4495

    
4496

    
4497
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4498
  """Checks if a node has enough free memory.
4499

4500
  This function check if a given node has the needed amount of free
4501
  memory. In case the node has less memory or we cannot get the
4502
  information from the node, this function raise an OpPrereqError
4503
  exception.
4504

4505
  @type lu: C{LogicalUnit}
4506
  @param lu: a logical unit from which we get configuration data
4507
  @type node: C{str}
4508
  @param node: the node to check
4509
  @type reason: C{str}
4510
  @param reason: string to use in the error message
4511
  @type requested: C{int}
4512
  @param requested: the amount of memory in MiB to check for
4513
  @type hypervisor_name: C{str}
4514
  @param hypervisor_name: the hypervisor to ask for memory stats
4515
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4516
      we cannot check the node
4517

4518
  """
4519
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4520
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4521
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4522
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4523
  if not isinstance(free_mem, int):
4524
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4525
                               " was '%s'" % (node, free_mem),
4526
                               errors.ECODE_ENVIRON)
4527
  if requested > free_mem:
4528
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4529
                               " needed %s MiB, available %s MiB" %
4530
                               (node, reason, requested, free_mem),
4531
                               errors.ECODE_NORES)
4532

    
4533

    
4534
def _CheckNodesFreeDisk(lu, nodenames, requested):
4535
  """Checks if nodes have enough free disk space in the default VG.
4536

4537
  This function check if all given nodes have the needed amount of
4538
  free disk. In case any node has less disk or we cannot get the
4539
  information from the node, this function raise an OpPrereqError
4540
  exception.
4541

4542
  @type lu: C{LogicalUnit}
4543
  @param lu: a logical unit from which we get configuration data
4544
  @type nodenames: C{list}
4545
  @param nodenames: the list of node names to check
4546
  @type requested: C{int}
4547
  @param requested: the amount of disk in MiB to check for
4548
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4549
      we cannot check the node
4550

4551
  """
4552
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4553
                                   lu.cfg.GetHypervisorType())
4554
  for node in nodenames:
4555
    info = nodeinfo[node]
4556
    info.Raise("Cannot get current information from node %s" % node,
4557
               prereq=True, ecode=errors.ECODE_ENVIRON)
4558
    vg_free = info.payload.get("vg_free", None)
4559
    if not isinstance(vg_free, int):
4560
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4561
                                 " result was '%s'" % (node, vg_free),
4562
                                 errors.ECODE_ENVIRON)
4563
    if requested > vg_free:
4564
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4565
                                 " required %d MiB, available %d MiB" %
4566
                                 (node, requested, vg_free),
4567
                                 errors.ECODE_NORES)
4568

    
4569

    
4570
class LUStartupInstance(LogicalUnit):
4571
  """Starts an instance.
4572

4573
  """
4574
  HPATH = "instance-start"
4575
  HTYPE = constants.HTYPE_INSTANCE
4576
  _OP_PARAMS = [
4577
    _PInstanceName,
4578
    _PForce,
4579
    ("hvparams", _EmptyDict, _TDict),
4580
    ("beparams", _EmptyDict, _TDict),
4581
    ]
4582
  REQ_BGL = False
4583

    
4584
  def CheckArguments(self):
4585
    # extra beparams
4586
    if self.op.beparams:
4587
      # fill the beparams dict
4588
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4589

    
4590
  def ExpandNames(self):
4591
    self._ExpandAndLockInstance()
4592

    
4593
  def BuildHooksEnv(self):
4594
    """Build hooks env.
4595

4596
    This runs on master, primary and secondary nodes of the instance.
4597

4598
    """
4599
    env = {
4600
      "FORCE": self.op.force,
4601
      }
4602
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4603
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4604
    return env, nl, nl
4605

    
4606
  def CheckPrereq(self):
4607
    """Check prerequisites.
4608

4609
    This checks that the instance is in the cluster.
4610

4611
    """
4612
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4613
    assert self.instance is not None, \
4614
      "Cannot retrieve locked instance %s" % self.op.instance_name
4615

    
4616
    # extra hvparams
4617
    if self.op.hvparams:
4618
      # check hypervisor parameter syntax (locally)
4619
      cluster = self.cfg.GetClusterInfo()
4620
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4621
      filled_hvp = cluster.FillHV(instance)
4622
      filled_hvp.update(self.op.hvparams)
4623
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4624
      hv_type.CheckParameterSyntax(filled_hvp)
4625
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4626

    
4627
    _CheckNodeOnline(self, instance.primary_node)
4628

    
4629
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4630
    # check bridges existence
4631
    _CheckInstanceBridgesExist(self, instance)
4632

    
4633
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4634
                                              instance.name,
4635
                                              instance.hypervisor)
4636
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4637
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4638
    if not remote_info.payload: # not running already
4639
      _CheckNodeFreeMemory(self, instance.primary_node,
4640
                           "starting instance %s" % instance.name,
4641
                           bep[constants.BE_MEMORY], instance.hypervisor)
4642

    
4643
  def Exec(self, feedback_fn):
4644
    """Start the instance.
4645

4646
    """
4647
    instance = self.instance
4648
    force = self.op.force
4649

    
4650
    self.cfg.MarkInstanceUp(instance.name)
4651

    
4652
    node_current = instance.primary_node
4653

    
4654
    _StartInstanceDisks(self, instance, force)
4655

    
4656
    result = self.rpc.call_instance_start(node_current, instance,
4657
                                          self.op.hvparams, self.op.beparams)
4658
    msg = result.fail_msg
4659
    if msg:
4660
      _ShutdownInstanceDisks(self, instance)
4661
      raise errors.OpExecError("Could not start instance: %s" % msg)
4662

    
4663

    
4664
class LURebootInstance(LogicalUnit):
4665
  """Reboot an instance.
4666

4667
  """
4668
  HPATH = "instance-reboot"
4669
  HTYPE = constants.HTYPE_INSTANCE
4670
  _OP_PARAMS = [
4671
    _PInstanceName,
4672
    ("ignore_secondaries", False, _TBool),
4673
    ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4674
    _PShutdownTimeout,
4675
    ]
4676
  REQ_BGL = False
4677

    
4678
  def ExpandNames(self):
4679
    self._ExpandAndLockInstance()
4680

    
4681
  def BuildHooksEnv(self):
4682
    """Build hooks env.
4683

4684
    This runs on master, primary and secondary nodes of the instance.
4685

4686
    """
4687
    env = {
4688
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4689
      "REBOOT_TYPE": self.op.reboot_type,
4690
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4691
      }
4692
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4693
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4694
    return env, nl, nl
4695

    
4696
  def CheckPrereq(self):
4697
    """Check prerequisites.
4698

4699
    This checks that the instance is in the cluster.
4700

4701
    """
4702
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4703
    assert self.instance is not None, \
4704
      "Cannot retrieve locked instance %s" % self.op.instance_name
4705

    
4706
    _CheckNodeOnline(self, instance.primary_node)
4707

    
4708
    # check bridges existence
4709
    _CheckInstanceBridgesExist(self, instance)
4710

    
4711
  def Exec(self, feedback_fn):
4712
    """Reboot the instance.
4713

4714
    """
4715
    instance = self.instance
4716
    ignore_secondaries = self.op.ignore_secondaries
4717
    reboot_type = self.op.reboot_type
4718

    
4719
    node_current = instance.primary_node
4720

    
4721
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4722
                       constants.INSTANCE_REBOOT_HARD]:
4723
      for disk in instance.disks:
4724
        self.cfg.SetDiskID(disk, node_current)
4725
      result = self.rpc.call_instance_reboot(node_current, instance,
4726
                                             reboot_type,
4727
                                             self.op.shutdown_timeout)
4728
      result.Raise("Could not reboot instance")
4729
    else:
4730
      result = self.rpc.call_instance_shutdown(node_current, instance,
4731
                                               self.op.shutdown_timeout)
4732
      result.Raise("Could not shutdown instance for full reboot")
4733
      _ShutdownInstanceDisks(self, instance)
4734
      _StartInstanceDisks(self, instance, ignore_secondaries)
4735
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4736
      msg = result.fail_msg
4737
      if msg:
4738
        _ShutdownInstanceDisks(self, instance)
4739
        raise errors.OpExecError("Could not start instance for"
4740
                                 " full reboot: %s" % msg)
4741

    
4742
    self.cfg.MarkInstanceUp(instance.name)
4743

    
4744

    
4745
class LUShutdownInstance(LogicalUnit):
4746
  """Shutdown an instance.
4747

4748
  """
4749
  HPATH = "instance-stop"
4750
  HTYPE = constants.HTYPE_INSTANCE
4751
  _OP_PARAMS = [
4752
    _PInstanceName,
4753
    ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4754
    ]
4755
  REQ_BGL = False
4756

    
4757
  def ExpandNames(self):
4758
    self._ExpandAndLockInstance()
4759

    
4760
  def BuildHooksEnv(self):
4761
    """Build hooks env.
4762

4763
    This runs on master, primary and secondary nodes of the instance.
4764

4765
    """
4766
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4767
    env["TIMEOUT"] = self.op.timeout
4768
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4769
    return env, nl, nl
4770

    
4771
  def CheckPrereq(self):
4772
    """Check prerequisites.
4773

4774
    This checks that the instance is in the cluster.
4775

4776
    """
4777
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4778
    assert self.instance is not None, \
4779
      "Cannot retrieve locked instance %s" % self.op.instance_name
4780
    _CheckNodeOnline(self, self.instance.primary_node)
4781

    
4782
  def Exec(self, feedback_fn):
4783
    """Shutdown the instance.
4784

4785
    """
4786
    instance = self.instance
4787
    node_current = instance.primary_node
4788
    timeout = self.op.timeout
4789
    self.cfg.MarkInstanceDown(instance.name)
4790
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4791
    msg = result.fail_msg
4792
    if msg:
4793
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4794

    
4795
    _ShutdownInstanceDisks(self, instance)
4796

    
4797

    
4798
class LUReinstallInstance(LogicalUnit):
4799
  """Reinstall an instance.
4800

4801
  """
4802
  HPATH = "instance-reinstall"
4803
  HTYPE = constants.HTYPE_INSTANCE
4804
  _OP_PARAMS = [
4805
    _PInstanceName,
4806
    ("os_type", None, _TMaybeString),
4807
    ("force_variant", False, _TBool),
4808
    ]
4809
  REQ_BGL = False
4810

    
4811
  def ExpandNames(self):
4812
    self._ExpandAndLockInstance()
4813

    
4814
  def BuildHooksEnv(self):
4815
    """Build hooks env.
4816

4817
    This runs on master, primary and secondary nodes of the instance.
4818

4819
    """
4820
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4821
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4822
    return env, nl, nl
4823

    
4824
  def CheckPrereq(self):
4825
    """Check prerequisites.
4826

4827
    This checks that the instance is in the cluster and is not running.
4828

4829
    """
4830
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4831
    assert instance is not None, \
4832
      "Cannot retrieve locked instance %s" % self.op.instance_name
4833
    _CheckNodeOnline(self, instance.primary_node)
4834

    
4835
    if instance.disk_template == constants.DT_DISKLESS:
4836
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4837
                                 self.op.instance_name,
4838
                                 errors.ECODE_INVAL)
4839
    _CheckInstanceDown(self, instance, "cannot reinstall")
4840

    
4841
    if self.op.os_type is not None:
4842
      # OS verification
4843
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4844
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4845

    
4846
    self.instance = instance
4847

    
4848
  def Exec(self, feedback_fn):
4849
    """Reinstall the instance.
4850

4851
    """
4852
    inst = self.instance
4853

    
4854
    if self.op.os_type is not None:
4855
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4856
      inst.os = self.op.os_type
4857
      self.cfg.Update(inst, feedback_fn)
4858

    
4859
    _StartInstanceDisks(self, inst, None)
4860
    try:
4861
      feedback_fn("Running the instance OS create scripts...")
4862
      # FIXME: pass debug option from opcode to backend
4863
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4864
                                             self.op.debug_level)
4865
      result.Raise("Could not install OS for instance %s on node %s" %
4866
                   (inst.name, inst.primary_node))
4867
    finally:
4868
      _ShutdownInstanceDisks(self, inst)
4869

    
4870

    
4871
class LURecreateInstanceDisks(LogicalUnit):
4872
  """Recreate an instance's missing disks.
4873

4874
  """
4875
  HPATH = "instance-recreate-disks"
4876
  HTYPE = constants.HTYPE_INSTANCE
4877
  _OP_PARAMS = [
4878
    _PInstanceName,
4879
    ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4880
    ]
4881
  REQ_BGL = False
4882

    
4883
  def ExpandNames(self):
4884
    self._ExpandAndLockInstance()
4885

    
4886
  def BuildHooksEnv(self):
4887
    """Build hooks env.
4888

4889
    This runs on master, primary and secondary nodes of the instance.
4890

4891
    """
4892
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4893
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4894
    return env, nl, nl
4895

    
4896
  def CheckPrereq(self):
4897
    """Check prerequisites.
4898

4899
    This checks that the instance is in the cluster and is not running.
4900

4901
    """
4902
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4903
    assert instance is not None, \
4904
      "Cannot retrieve locked instance %s" % self.op.instance_name
4905
    _CheckNodeOnline(self, instance.primary_node)
4906

    
4907
    if instance.disk_template == constants.DT_DISKLESS:
4908
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4909
                                 self.op.instance_name, errors.ECODE_INVAL)
4910
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4911

    
4912
    if not self.op.disks:
4913
      self.op.disks = range(len(instance.disks))
4914
    else:
4915
      for idx in self.op.disks:
4916
        if idx >= len(instance.disks):
4917
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4918
                                     errors.ECODE_INVAL)
4919

    
4920
    self.instance = instance
4921

    
4922
  def Exec(self, feedback_fn):
4923
    """Recreate the disks.
4924

4925
    """
4926
    to_skip = []
4927
    for idx, _ in enumerate(self.instance.disks):
4928
      if idx not in self.op.disks: # disk idx has not been passed in
4929
        to_skip.append(idx)
4930
        continue
4931

    
4932
    _CreateDisks(self, self.instance, to_skip=to_skip)
4933

    
4934

    
4935
class LURenameInstance(LogicalUnit):
4936
  """Rename an instance.
4937

4938
  """
4939
  HPATH = "instance-rename"
4940
  HTYPE = constants.HTYPE_INSTANCE
4941
  _OP_PARAMS = [
4942
    _PInstanceName,
4943
    ("new_name", _NoDefault, _TNonEmptyString),
4944
    ("ip_check", False, _TBool),
4945
    ("name_check", True, _TBool),
4946
    ]
4947

    
4948
  def CheckArguments(self):
4949
    """Check arguments.
4950

4951
    """
4952
    if self.op.ip_check and not self.op.name_check:
4953
      # TODO: make the ip check more flexible and not depend on the name check
4954
      raise errors.OpPrereqError("Cannot do ip check without a name check",
4955
                                 errors.ECODE_INVAL)
4956

    
4957
  def BuildHooksEnv(self):
4958
    """Build hooks env.
4959

4960
    This runs on master, primary and secondary nodes of the instance.
4961

4962
    """
4963
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4964
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4965
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4966
    return env, nl, nl
4967

    
4968
  def CheckPrereq(self):
4969
    """Check prerequisites.
4970

4971
    This checks that the instance is in the cluster and is not running.
4972

4973
    """
4974
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4975
                                                self.op.instance_name)
4976
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4977
    assert instance is not None
4978
    _CheckNodeOnline(self, instance.primary_node)
4979
    _CheckInstanceDown(self, instance, "cannot rename")
4980
    self.instance = instance
4981

    
4982
    new_name = self.op.new_name
4983
    if self.op.name_check:
4984
      hostname = netutils.GetHostname(name=new_name)
4985
      new_name = self.op.new_name = hostname.name
4986
      if (self.op.ip_check and
4987
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
4988
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4989
                                   (hostname.ip, new_name),
4990
                                   errors.ECODE_NOTUNIQUE)
4991

    
4992
    instance_list = self.cfg.GetInstanceList()
4993
    if new_name in instance_list:
4994
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4995
                                 new_name, errors.ECODE_EXISTS)
4996

    
4997
  def Exec(self, feedback_fn):
4998
    """Reinstall the instance.
4999

5000
    """
5001
    inst = self.instance
5002
    old_name = inst.name
5003

    
5004
    if inst.disk_template == constants.DT_FILE:
5005
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5006

    
5007
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5008
    # Change the instance lock. This is definitely safe while we hold the BGL
5009
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5010
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5011

    
5012
    # re-read the instance from the configuration after rename
5013
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5014

    
5015
    if inst.disk_template == constants.DT_FILE:
5016
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5017
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5018
                                                     old_file_storage_dir,
5019
                                                     new_file_storage_dir)
5020
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5021
                   " (but the instance has been renamed in Ganeti)" %
5022
                   (inst.primary_node, old_file_storage_dir,
5023
                    new_file_storage_dir))
5024

    
5025
    _StartInstanceDisks(self, inst, None)
5026
    try:
5027
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5028
                                                 old_name, self.op.debug_level)
5029
      msg = result.fail_msg
5030
      if msg:
5031
        msg = ("Could not run OS rename script for instance %s on node %s"
5032
               " (but the instance has been renamed in Ganeti): %s" %
5033
               (inst.name, inst.primary_node, msg))
5034
        self.proc.LogWarning(msg)
5035
    finally:
5036
      _ShutdownInstanceDisks(self, inst)
5037

    
5038
    return inst.name
5039

    
5040

    
5041
class LURemoveInstance(LogicalUnit):
5042
  """Remove an instance.
5043

5044
  """
5045
  HPATH = "instance-remove"
5046
  HTYPE = constants.HTYPE_INSTANCE
5047
  _OP_PARAMS = [
5048
    _PInstanceName,
5049
    ("ignore_failures", False, _TBool),
5050
    _PShutdownTimeout,
5051
    ]
5052
  REQ_BGL = False
5053

    
5054
  def ExpandNames(self):
5055
    self._ExpandAndLockInstance()
5056
    self.needed_locks[locking.LEVEL_NODE] = []
5057
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5058

    
5059
  def DeclareLocks(self, level):
5060
    if level == locking.LEVEL_NODE:
5061
      self._LockInstancesNodes()
5062

    
5063
  def BuildHooksEnv(self):
5064
    """Build hooks env.
5065

5066
    This runs on master, primary and secondary nodes of the instance.
5067

5068
    """
5069
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5070
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5071
    nl = [self.cfg.GetMasterNode()]
5072
    nl_post = list(self.instance.all_nodes) + nl
5073
    return env, nl, nl_post
5074

    
5075
  def CheckPrereq(self):
5076
    """Check prerequisites.
5077

5078
    This checks that the instance is in the cluster.
5079

5080
    """
5081
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5082
    assert self.instance is not None, \
5083
      "Cannot retrieve locked instance %s" % self.op.instance_name
5084

    
5085
  def Exec(self, feedback_fn):
5086
    """Remove the instance.
5087

5088
    """
5089
    instance = self.instance
5090
    logging.info("Shutting down instance %s on node %s",
5091
                 instance.name, instance.primary_node)
5092

    
5093
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5094
                                             self.op.shutdown_timeout)
5095
    msg = result.fail_msg
5096
    if msg:
5097
      if self.op.ignore_failures:
5098
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5099
      else:
5100
        raise errors.OpExecError("Could not shutdown instance %s on"
5101
                                 " node %s: %s" %
5102
                                 (instance.name, instance.primary_node, msg))
5103

    
5104
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5105

    
5106

    
5107
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5108
  """Utility function to remove an instance.
5109

5110
  """
5111
  logging.info("Removing block devices for instance %s", instance.name)
5112

    
5113
  if not _RemoveDisks(lu, instance):
5114
    if not ignore_failures:
5115
      raise errors.OpExecError("Can't remove instance's disks")
5116
    feedback_fn("Warning: can't remove instance's disks")
5117

    
5118
  logging.info("Removing instance %s out of cluster config", instance.name)
5119

    
5120
  lu.cfg.RemoveInstance(instance.name)
5121

    
5122
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5123
    "Instance lock removal conflict"
5124

    
5125
  # Remove lock for the instance
5126
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5127

    
5128

    
5129
class LUQueryInstances(NoHooksLU):
5130
  """Logical unit for querying instances.
5131

5132
  """
5133
  # pylint: disable-msg=W0142
5134
  _OP_PARAMS = [
5135
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5136
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5137
    ("use_locking", False, _TBool),
5138
    ]
5139
  REQ_BGL = False
5140
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5141
                    "serial_no", "ctime", "mtime", "uuid"]
5142
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5143
                                    "admin_state",
5144
                                    "disk_template", "ip", "mac", "bridge",
5145
                                    "nic_mode", "nic_link",
5146
                                    "sda_size", "sdb_size", "vcpus", "tags",
5147
                                    "network_port", "beparams",
5148
                                    r"(disk)\.(size)/([0-9]+)",
5149
                                    r"(disk)\.(sizes)", "disk_usage",
5150
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5151
                                    r"(nic)\.(bridge)/([0-9]+)",
5152
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
5153
                                    r"(disk|nic)\.(count)",
5154
                                    "hvparams", "custom_hvparams",
5155
                                    "custom_beparams", "custom_nicparams",
5156
                                    ] + _SIMPLE_FIELDS +
5157
                                  ["hv/%s" % name
5158
                                   for name in constants.HVS_PARAMETERS
5159
                                   if name not in constants.HVC_GLOBALS] +
5160
                                  ["be/%s" % name
5161
                                   for name in constants.BES_PARAMETERS])
5162
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5163
                                   "oper_ram",
5164
                                   "oper_vcpus",
5165
                                   "status")
5166

    
5167

    
5168
  def CheckArguments(self):
5169
    _CheckOutputFields(static=self._FIELDS_STATIC,
5170
                       dynamic=self._FIELDS_DYNAMIC,
5171
                       selected=self.op.output_fields)
5172

    
5173
  def ExpandNames(self):
5174
    self.needed_locks = {}
5175
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5176
    self.share_locks[locking.LEVEL_NODE] = 1
5177

    
5178
    if self.op.names:
5179
      self.wanted = _GetWantedInstances(self, self.op.names)
5180
    else:
5181
      self.wanted = locking.ALL_SET
5182

    
5183
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5184
    self.do_locking = self.do_node_query and self.op.use_locking
5185
    if self.do_locking:
5186
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5187
      self.needed_locks[locking.LEVEL_NODE] = []
5188
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5189

    
5190
  def DeclareLocks(self, level):
5191
    if level == locking.LEVEL_NODE and self.do_locking:
5192
      self._LockInstancesNodes()
5193

    
5194
  def Exec(self, feedback_fn):
5195
    """Computes the list of nodes and their attributes.
5196

5197
    """
5198
    # pylint: disable-msg=R0912
5199
    # way too many branches here
5200
    all_info = self.cfg.GetAllInstancesInfo()
5201
    if self.wanted == locking.ALL_SET:
5202
      # caller didn't specify instance names, so ordering is not important
5203
      if self.do_locking:
5204
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5205
      else:
5206
        instance_names = all_info.keys()
5207
      instance_names = utils.NiceSort(instance_names)
5208
    else:
5209
      # caller did specify names, so we must keep the ordering
5210
      if self.do_locking:
5211
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5212
      else:
5213
        tgt_set = all_info.keys()
5214
      missing = set(self.wanted).difference(tgt_set)
5215
      if missing:
5216
        raise errors.OpExecError("Some instances were removed before"
5217
                                 " retrieving their data: %s" % missing)
5218
      instance_names = self.wanted
5219

    
5220
    instance_list = [all_info[iname] for iname in instance_names]
5221

    
5222
    # begin data gathering
5223

    
5224
    nodes = frozenset([inst.primary_node for inst in instance_list])
5225
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
5226

    
5227
    bad_nodes = []
5228
    off_nodes = []
5229
    if self.do_node_query:
5230
      live_data = {}
5231
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5232
      for name in nodes:
5233
        result = node_data[name]
5234
        if result.offline:
5235
          # offline nodes will be in both lists
5236
          off_nodes.append(name)
5237
        if result.fail_msg:
5238
          bad_nodes.append(name)
5239
        else:
5240
          if result.payload:
5241
            live_data.update(result.payload)
5242
          # else no instance is alive
5243
    else:
5244
      live_data = dict([(name, {}) for name in instance_names])
5245

    
5246
    # end data gathering
5247

    
5248
    HVPREFIX = "hv/"
5249
    BEPREFIX = "be/"
5250
    output = []
5251
    cluster = self.cfg.GetClusterInfo()
5252
    for instance in instance_list:
5253
      iout = []
5254
      i_hv = cluster.FillHV(instance, skip_globals=True)
5255
      i_be = cluster.FillBE(instance)
5256
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5257
      for field in self.op.output_fields:
5258
        st_match = self._FIELDS_STATIC.Matches(field)
5259
        if field in self._SIMPLE_FIELDS:
5260
          val = getattr(instance, field)
5261
        elif field == "pnode":
5262
          val = instance.primary_node
5263
        elif field == "snodes":
5264
          val = list(instance.secondary_nodes)
5265
        elif field == "admin_state":
5266
          val = instance.admin_up
5267
        elif field == "oper_state":
5268
          if instance.primary_node in bad_nodes:
5269
            val = None
5270
          else:
5271
            val = bool(live_data.get(instance.name))
5272
        elif field == "status":
5273
          if instance.primary_node in off_nodes:
5274
            val = "ERROR_nodeoffline"
5275
          elif instance.primary_node in bad_nodes:
5276
            val = "ERROR_nodedown"
5277
          else:
5278
            running = bool(live_data.get(instance.name))
5279
            if running:
5280
              if instance.admin_up:
5281
                val = "running"
5282
              else:
5283
                val = "ERROR_up"
5284
            else:
5285
              if instance.admin_up:
5286
                val = "ERROR_down"
5287
              else:
5288
                val = "ADMIN_down"
5289
        elif field == "oper_ram":
5290
          if instance.primary_node in bad_nodes:
5291
            val = None
5292
          elif instance.name in live_data:
5293
            val = live_data[instance.name].get("memory", "?")
5294
          else:
5295
            val = "-"
5296
        elif field == "oper_vcpus":
5297
          if instance.primary_node in bad_nodes:
5298
            val = None
5299
          elif instance.name in live_data:
5300
            val = live_data[instance.name].get("vcpus", "?")
5301
          else:
5302
            val = "-"
5303
        elif field == "vcpus":
5304
          val = i_be[constants.BE_VCPUS]
5305
        elif field == "disk_template":
5306
          val = instance.disk_template
5307
        elif field == "ip":
5308
          if instance.nics:
5309
            val = instance.nics[0].ip
5310
          else:
5311
            val = None
5312
        elif field == "nic_mode":
5313
          if instance.nics:
5314
            val = i_nicp[0][constants.NIC_MODE]
5315
          else:
5316
            val = None
5317
        elif field == "nic_link":
5318
          if instance.nics:
5319
            val = i_nicp[0][constants.NIC_LINK]
5320
          else:
5321
            val = None
5322
        elif field == "bridge":
5323
          if (instance.nics and
5324
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5325
            val = i_nicp[0][constants.NIC_LINK]
5326
          else:
5327
            val = None
5328
        elif field == "mac":
5329
          if instance.nics:
5330
            val = instance.nics[0].mac
5331
          else:
5332
            val = None
5333
        elif field == "custom_nicparams":
5334
          val = [nic.nicparams for nic in instance.nics]
5335
        elif field == "sda_size" or field == "sdb_size":
5336
          idx = ord(field[2]) - ord('a')
5337
          try:
5338
            val = instance.FindDisk(idx).size
5339
          except errors.OpPrereqError:
5340
            val = None
5341
        elif field == "disk_usage": # total disk usage per node
5342
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5343
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5344
        elif field == "tags":
5345
          val = list(instance.GetTags())
5346
        elif field == "custom_hvparams":
5347
          val = instance.hvparams # not filled!
5348
        elif field == "hvparams":
5349
          val = i_hv
5350
        elif (field.startswith(HVPREFIX) and
5351
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5352
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5353
          val = i_hv.get(field[len(HVPREFIX):], None)
5354
        elif field == "custom_beparams":
5355
          val = instance.beparams
5356
        elif field == "beparams":
5357
          val = i_be
5358
        elif (field.startswith(BEPREFIX) and
5359
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5360
          val = i_be.get(field[len(BEPREFIX):], None)
5361
        elif st_match and st_match.groups():
5362
          # matches a variable list
5363
          st_groups = st_match.groups()
5364
          if st_groups and st_groups[0] == "disk":
5365
            if st_groups[1] == "count":
5366
              val = len(instance.disks)
5367
            elif st_groups[1] == "sizes":
5368
              val = [disk.size for disk in instance.disks]
5369
            elif st_groups[1] == "size":
5370
              try:
5371
                val = instance.FindDisk(st_groups[2]).size
5372
              except errors.OpPrereqError:
5373
                val = None
5374
            else:
5375
              assert False, "Unhandled disk parameter"
5376
          elif st_groups[0] == "nic":
5377
            if st_groups[1] == "count":
5378
              val = len(instance.nics)
5379
            elif st_groups[1] == "macs":
5380
              val = [nic.mac for nic in instance.nics]
5381
            elif st_groups[1] == "ips":
5382
              val = [nic.ip for nic in instance.nics]
5383
            elif st_groups[1] == "modes":
5384
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5385
            elif st_groups[1] == "links":
5386
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5387
            elif st_groups[1] == "bridges":
5388
              val = []
5389
              for nicp in i_nicp:
5390
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5391
                  val.append(nicp[constants.NIC_LINK])
5392
                else:
5393
                  val.append(None)
5394
            else:
5395
              # index-based item
5396
              nic_idx = int(st_groups[2])
5397
              if nic_idx >= len(instance.nics):
5398
                val = None
5399
              else:
5400
                if st_groups[1] == "mac":
5401
                  val = instance.nics[nic_idx].mac
5402
                elif st_groups[1] == "ip":
5403
                  val = instance.nics[nic_idx].ip
5404
                elif st_groups[1] == "mode":
5405
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5406
                elif st_groups[1] == "link":
5407
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5408
                elif st_groups[1] == "bridge":
5409
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5410
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5411
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5412
                  else:
5413
                    val = None
5414
                else:
5415
                  assert False, "Unhandled NIC parameter"
5416
          else:
5417
            assert False, ("Declared but unhandled variable parameter '%s'" %
5418
                           field)
5419
        else:
5420
          assert False, "Declared but unhandled parameter '%s'" % field
5421
        iout.append(val)
5422
      output.append(iout)
5423

    
5424
    return output
5425

    
5426

    
5427
class LUFailoverInstance(LogicalUnit):
5428
  """Failover an instance.
5429

5430
  """
5431
  HPATH = "instance-failover"
5432
  HTYPE = constants.HTYPE_INSTANCE
5433
  _OP_PARAMS = [
5434
    _PInstanceName,
5435
    ("ignore_consistency", False, _TBool),
5436
    _PShutdownTimeout,
5437
    ]
5438
  REQ_BGL = False
5439

    
5440
  def ExpandNames(self):
5441
    self._ExpandAndLockInstance()
5442
    self.needed_locks[locking.LEVEL_NODE] = []
5443
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5444

    
5445
  def DeclareLocks(self, level):
5446
    if level == locking.LEVEL_NODE:
5447
      self._LockInstancesNodes()
5448

    
5449
  def BuildHooksEnv(self):
5450
    """Build hooks env.
5451

5452
    This runs on master, primary and secondary nodes of the instance.
5453

5454
    """
5455
    instance = self.instance
5456
    source_node = instance.primary_node
5457
    target_node = instance.secondary_nodes[0]
5458
    env = {
5459
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5460
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5461
      "OLD_PRIMARY": source_node,
5462
      "OLD_SECONDARY": target_node,
5463
      "NEW_PRIMARY": target_node,
5464
      "NEW_SECONDARY": source_node,
5465
      }
5466
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5467
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5468
    nl_post = list(nl)
5469
    nl_post.append(source_node)
5470
    return env, nl, nl_post
5471

    
5472
  def CheckPrereq(self):
5473
    """Check prerequisites.
5474

5475
    This checks that the instance is in the cluster.
5476

5477
    """
5478
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5479
    assert self.instance is not None, \
5480
      "Cannot retrieve locked instance %s" % self.op.instance_name
5481

    
5482
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5483
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5484
      raise errors.OpPrereqError("Instance's disk layout is not"
5485
                                 " network mirrored, cannot failover.",
5486
                                 errors.ECODE_STATE)
5487

    
5488
    secondary_nodes = instance.secondary_nodes
5489
    if not secondary_nodes:
5490
      raise errors.ProgrammerError("no secondary node but using "
5491
                                   "a mirrored disk template")
5492

    
5493
    target_node = secondary_nodes[0]
5494
    _CheckNodeOnline(self, target_node)
5495
    _CheckNodeNotDrained(self, target_node)
5496
    if instance.admin_up:
5497
      # check memory requirements on the secondary node
5498
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5499
                           instance.name, bep[constants.BE_MEMORY],
5500
                           instance.hypervisor)
5501
    else:
5502
      self.LogInfo("Not checking memory on the secondary node as"
5503
                   " instance will not be started")
5504

    
5505
    # check bridge existance
5506
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5507

    
5508
  def Exec(self, feedback_fn):
5509
    """Failover an instance.
5510

5511
    The failover is done by shutting it down on its present node and
5512
    starting it on the secondary.
5513

5514
    """
5515
    instance = self.instance
5516
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5517

    
5518
    source_node = instance.primary_node
5519
    target_node = instance.secondary_nodes[0]
5520

    
5521
    if instance.admin_up:
5522
      feedback_fn("* checking disk consistency between source and target")
5523
      for dev in instance.disks:
5524
        # for drbd, these are drbd over lvm
5525
        if not _CheckDiskConsistency(self, dev, target_node, False):
5526
          if not self.op.ignore_consistency:
5527
            raise errors.OpExecError("Disk %s is degraded on target node,"
5528
                                     " aborting failover." % dev.iv_name)
5529
    else:
5530
      feedback_fn("* not checking disk consistency as instance is not running")
5531

    
5532
    feedback_fn("* shutting down instance on source node")
5533
    logging.info("Shutting down instance %s on node %s",
5534
                 instance.name, source_node)
5535

    
5536
    result = self.rpc.call_instance_shutdown(source_node, instance,
5537
                                             self.op.shutdown_timeout)
5538
    msg = result.fail_msg
5539
    if msg:
5540
      if self.op.ignore_consistency or primary_node.offline:
5541
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5542
                             " Proceeding anyway. Please make sure node"
5543
                             " %s is down. Error details: %s",
5544
                             instance.name, source_node, source_node, msg)
5545
      else:
5546
        raise errors.OpExecError("Could not shutdown instance %s on"
5547
                                 " node %s: %s" %
5548
                                 (instance.name, source_node, msg))
5549

    
5550
    feedback_fn("* deactivating the instance's disks on source node")
5551
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5552
      raise errors.OpExecError("Can't shut down the instance's disks.")
5553

    
5554
    instance.primary_node = target_node
5555
    # distribute new instance config to the other nodes
5556
    self.cfg.Update(instance, feedback_fn)
5557

    
5558
    # Only start the instance if it's marked as up
5559
    if instance.admin_up:
5560
      feedback_fn("* activating the instance's disks on target node")
5561
      logging.info("Starting instance %s on node %s",
5562
                   instance.name, target_node)
5563

    
5564
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5565
                                           ignore_secondaries=True)
5566
      if not disks_ok:
5567
        _ShutdownInstanceDisks(self, instance)
5568
        raise errors.OpExecError("Can't activate the instance's disks")
5569

    
5570
      feedback_fn("* starting the instance on the target node")
5571
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5572
      msg = result.fail_msg
5573
      if msg:
5574
        _ShutdownInstanceDisks(self, instance)
5575
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5576
                                 (instance.name, target_node, msg))
5577

    
5578

    
5579
class LUMigrateInstance(LogicalUnit):
5580
  """Migrate an instance.
5581

5582
  This is migration without shutting down, compared to the failover,
5583
  which is done with shutdown.
5584

5585
  """
5586
  HPATH = "instance-migrate"
5587
  HTYPE = constants.HTYPE_INSTANCE
5588
  _OP_PARAMS = [
5589
    _PInstanceName,
5590
    _PMigrationMode,
5591
    _PMigrationLive,
5592
    ("cleanup", False, _TBool),
5593
    ]
5594

    
5595
  REQ_BGL = False
5596

    
5597
  def ExpandNames(self):
5598
    self._ExpandAndLockInstance()
5599

    
5600
    self.needed_locks[locking.LEVEL_NODE] = []
5601
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5602

    
5603
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5604
                                       self.op.cleanup)
5605
    self.tasklets = [self._migrater]
5606

    
5607
  def DeclareLocks(self, level):
5608
    if level == locking.LEVEL_NODE:
5609
      self._LockInstancesNodes()
5610

    
5611
  def BuildHooksEnv(self):
5612
    """Build hooks env.
5613

5614
    This runs on master, primary and secondary nodes of the instance.
5615

5616
    """
5617
    instance = self._migrater.instance
5618
    source_node = instance.primary_node
5619
    target_node = instance.secondary_nodes[0]
5620
    env = _BuildInstanceHookEnvByObject(self, instance)
5621
    env["MIGRATE_LIVE"] = self._migrater.live
5622
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5623
    env.update({
5624
        "OLD_PRIMARY": source_node,
5625
        "OLD_SECONDARY": target_node,
5626
        "NEW_PRIMARY": target_node,
5627
        "NEW_SECONDARY": source_node,
5628
        })
5629
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5630
    nl_post = list(nl)
5631
    nl_post.append(source_node)
5632
    return env, nl, nl_post
5633

    
5634

    
5635
class LUMoveInstance(LogicalUnit):
5636
  """Move an instance by data-copying.
5637

5638
  """
5639
  HPATH = "instance-move"
5640
  HTYPE = constants.HTYPE_INSTANCE
5641
  _OP_PARAMS = [
5642
    _PInstanceName,
5643
    ("target_node", _NoDefault, _TNonEmptyString),
5644
    _PShutdownTimeout,
5645
    ]
5646
  REQ_BGL = False
5647

    
5648
  def ExpandNames(self):
5649
    self._ExpandAndLockInstance()
5650
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5651
    self.op.target_node = target_node
5652
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5653
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5654

    
5655
  def DeclareLocks(self, level):
5656
    if level == locking.LEVEL_NODE:
5657
      self._LockInstancesNodes(primary_only=True)
5658

    
5659
  def BuildHooksEnv(self):
5660
    """Build hooks env.
5661

5662
    This runs on master, primary and secondary nodes of the instance.
5663

5664
    """
5665
    env = {
5666
      "TARGET_NODE": self.op.target_node,
5667
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5668
      }
5669
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5670
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5671
                                       self.op.target_node]
5672
    return env, nl, nl
5673

    
5674
  def CheckPrereq(self):
5675
    """Check prerequisites.
5676

5677
    This checks that the instance is in the cluster.
5678

5679
    """
5680
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5681
    assert self.instance is not None, \
5682
      "Cannot retrieve locked instance %s" % self.op.instance_name
5683

    
5684
    node = self.cfg.GetNodeInfo(self.op.target_node)
5685
    assert node is not None, \
5686
      "Cannot retrieve locked node %s" % self.op.target_node
5687

    
5688
    self.target_node = target_node = node.name
5689

    
5690
    if target_node == instance.primary_node:
5691
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5692
                                 (instance.name, target_node),
5693
                                 errors.ECODE_STATE)
5694

    
5695
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5696

    
5697
    for idx, dsk in enumerate(instance.disks):
5698
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5699
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5700
                                   " cannot copy" % idx, errors.ECODE_STATE)
5701

    
5702
    _CheckNodeOnline(self, target_node)
5703
    _CheckNodeNotDrained(self, target_node)
5704

    
5705
    if instance.admin_up:
5706
      # check memory requirements on the secondary node
5707
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5708
                           instance.name, bep[constants.BE_MEMORY],
5709
                           instance.hypervisor)
5710
    else:
5711
      self.LogInfo("Not checking memory on the secondary node as"
5712
                   " instance will not be started")
5713

    
5714
    # check bridge existance
5715
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5716

    
5717
  def Exec(self, feedback_fn):
5718
    """Move an instance.
5719

5720
    The move is done by shutting it down on its present node, copying
5721
    the data over (slow) and starting it on the new node.
5722

5723
    """
5724
    instance = self.instance
5725

    
5726
    source_node = instance.primary_node
5727
    target_node = self.target_node
5728

    
5729
    self.LogInfo("Shutting down instance %s on source node %s",
5730
                 instance.name, source_node)
5731

    
5732
    result = self.rpc.call_instance_shutdown(source_node, instance,
5733
                                             self.op.shutdown_timeout)
5734
    msg = result.fail_msg
5735
    if msg:
5736
      if self.op.ignore_consistency:
5737
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5738
                             " Proceeding anyway. Please make sure node"
5739
                             " %s is down. Error details: %s",
5740
                             instance.name, source_node, source_node, msg)
5741
      else:
5742
        raise errors.OpExecError("Could not shutdown instance %s on"
5743
                                 " node %s: %s" %
5744
                                 (instance.name, source_node, msg))
5745

    
5746
    # create the target disks
5747
    try:
5748
      _CreateDisks(self, instance, target_node=target_node)
5749
    except errors.OpExecError:
5750
      self.LogWarning("Device creation failed, reverting...")
5751
      try:
5752
        _RemoveDisks(self, instance, target_node=target_node)
5753
      finally:
5754
        self.cfg.ReleaseDRBDMinors(instance.name)
5755
        raise
5756

    
5757
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5758

    
5759
    errs = []
5760
    # activate, get path, copy the data over
5761
    for idx, disk in enumerate(instance.disks):
5762
      self.LogInfo("Copying data for disk %d", idx)
5763
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5764
                                               instance.name, True)
5765
      if result.fail_msg:
5766
        self.LogWarning("Can't assemble newly created disk %d: %s",
5767
                        idx, result.fail_msg)
5768
        errs.append(result.fail_msg)
5769
        break
5770
      dev_path = result.payload
5771
      result = self.rpc.call_blockdev_export(source_node, disk,
5772
                                             target_node, dev_path,
5773
                                             cluster_name)
5774
      if result.fail_msg:
5775
        self.LogWarning("Can't copy data over for disk %d: %s",
5776
                        idx, result.fail_msg)
5777
        errs.append(result.fail_msg)
5778
        break
5779

    
5780
    if errs:
5781
      self.LogWarning("Some disks failed to copy, aborting")
5782
      try:
5783
        _RemoveDisks(self, instance, target_node=target_node)
5784
      finally:
5785
        self.cfg.ReleaseDRBDMinors(instance.name)
5786
        raise errors.OpExecError("Errors during disk copy: %s" %
5787
                                 (",".join(errs),))
5788

    
5789
    instance.primary_node = target_node
5790
    self.cfg.Update(instance, feedback_fn)
5791

    
5792
    self.LogInfo("Removing the disks on the original node")
5793
    _RemoveDisks(self, instance, target_node=source_node)
5794

    
5795
    # Only start the instance if it's marked as up
5796
    if instance.admin_up:
5797
      self.LogInfo("Starting instance %s on node %s",
5798
                   instance.name, target_node)
5799

    
5800
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5801
                                           ignore_secondaries=True)
5802
      if not disks_ok:
5803
        _ShutdownInstanceDisks(self, instance)
5804
        raise errors.OpExecError("Can't activate the instance's disks")
5805

    
5806
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5807
      msg = result.fail_msg
5808
      if msg:
5809
        _ShutdownInstanceDisks(self, instance)
5810
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5811
                                 (instance.name, target_node, msg))
5812

    
5813

    
5814
class LUMigrateNode(LogicalUnit):
5815
  """Migrate all instances from a node.
5816

5817
  """
5818
  HPATH = "node-migrate"
5819
  HTYPE = constants.HTYPE_NODE
5820
  _OP_PARAMS = [
5821
    _PNodeName,
5822
    _PMigrationMode,
5823
    _PMigrationLive,
5824
    ]
5825
  REQ_BGL = False
5826

    
5827
  def ExpandNames(self):
5828
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5829

    
5830
    self.needed_locks = {
5831
      locking.LEVEL_NODE: [self.op.node_name],
5832
      }
5833

    
5834
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5835

    
5836
    # Create tasklets for migrating instances for all instances on this node
5837
    names = []
5838
    tasklets = []
5839

    
5840
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5841
      logging.debug("Migrating instance %s", inst.name)
5842
      names.append(inst.name)
5843

    
5844
      tasklets.append(TLMigrateInstance(self, inst.name, False))
5845

    
5846
    self.tasklets = tasklets
5847

    
5848
    # Declare instance locks
5849
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5850

    
5851
  def DeclareLocks(self, level):
5852
    if level == locking.LEVEL_NODE:
5853
      self._LockInstancesNodes()
5854

    
5855
  def BuildHooksEnv(self):
5856
    """Build hooks env.
5857

5858
    This runs on the master, the primary and all the secondaries.
5859

5860
    """
5861
    env = {
5862
      "NODE_NAME": self.op.node_name,
5863
      }
5864

    
5865
    nl = [self.cfg.GetMasterNode()]
5866

    
5867
    return (env, nl, nl)
5868

    
5869

    
5870
class TLMigrateInstance(Tasklet):
5871
  """Tasklet class for instance migration.
5872

5873
  @type live: boolean
5874
  @ivar live: whether the migration will be done live or non-live;
5875
      this variable is initalized only after CheckPrereq has run
5876

5877
  """
5878
  def __init__(self, lu, instance_name, cleanup):
5879
    """Initializes this class.
5880

5881
    """
5882
    Tasklet.__init__(self, lu)
5883

    
5884
    # Parameters
5885
    self.instance_name = instance_name
5886
    self.cleanup = cleanup
5887
    self.live = False # will be overridden later
5888

    
5889
  def CheckPrereq(self):
5890
    """Check prerequisites.
5891

5892
    This checks that the instance is in the cluster.
5893

5894
    """
5895
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5896
    instance = self.cfg.GetInstanceInfo(instance_name)
5897
    assert instance is not None
5898

    
5899
    if instance.disk_template != constants.DT_DRBD8:
5900
      raise errors.OpPrereqError("Instance's disk layout is not"
5901
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5902

    
5903
    secondary_nodes = instance.secondary_nodes
5904
    if not secondary_nodes:
5905
      raise errors.ConfigurationError("No secondary node but using"
5906
                                      " drbd8 disk template")
5907

    
5908
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5909

    
5910
    target_node = secondary_nodes[0]
5911
    # check memory requirements on the secondary node
5912
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5913
                         instance.name, i_be[constants.BE_MEMORY],
5914
                         instance.hypervisor)
5915

    
5916
    # check bridge existance
5917
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5918

    
5919
    if not self.cleanup:
5920
      _CheckNodeNotDrained(self.lu, target_node)
5921
      result = self.rpc.call_instance_migratable(instance.primary_node,
5922
                                                 instance)
5923
      result.Raise("Can't migrate, please use failover",
5924
                   prereq=True, ecode=errors.ECODE_STATE)
5925

    
5926
    self.instance = instance
5927

    
5928
    if self.lu.op.live is not None and self.lu.op.mode is not None:
5929
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5930
                                 " parameters are accepted",
5931
                                 errors.ECODE_INVAL)
5932
    if self.lu.op.live is not None:
5933
      if self.lu.op.live:
5934
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
5935
      else:
5936
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5937
      # reset the 'live' parameter to None so that repeated
5938
      # invocations of CheckPrereq do not raise an exception
5939
      self.lu.op.live = None
5940
    elif self.lu.op.mode is None:
5941
      # read the default value from the hypervisor
5942
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5943
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5944

    
5945
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5946

    
5947
  def _WaitUntilSync(self):
5948
    """Poll with custom rpc for disk sync.
5949

5950
    This uses our own step-based rpc call.
5951

5952
    """
5953
    self.feedback_fn("* wait until resync is done")
5954
    all_done = False
5955
    while not all_done:
5956
      all_done = True
5957
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5958
                                            self.nodes_ip,
5959
                                            self.instance.disks)
5960
      min_percent = 100
5961
      for node, nres in result.items():
5962
        nres.Raise("Cannot resync disks on node %s" % node)
5963
        node_done, node_percent = nres.payload
5964
        all_done = all_done and node_done
5965
        if node_percent is not None:
5966
          min_percent = min(min_percent, node_percent)
5967
      if not all_done:
5968
        if min_percent < 100:
5969
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5970
        time.sleep(2)
5971

    
5972
  def _EnsureSecondary(self, node):
5973
    """Demote a node to secondary.
5974

5975
    """
5976
    self.feedback_fn("* switching node %s to secondary mode" % node)
5977

    
5978
    for dev in self.instance.disks:
5979
      self.cfg.SetDiskID(dev, node)
5980

    
5981
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5982
                                          self.instance.disks)
5983
    result.Raise("Cannot change disk to secondary on node %s" % node)
5984

    
5985
  def _GoStandalone(self):
5986
    """Disconnect from the network.
5987

5988
    """
5989
    self.feedback_fn("* changing into standalone mode")
5990
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5991
                                               self.instance.disks)
5992
    for node, nres in result.items():
5993
      nres.Raise("Cannot disconnect disks node %s" % node)
5994

    
5995
  def _GoReconnect(self, multimaster):
5996
    """Reconnect to the network.
5997

5998
    """
5999
    if multimaster:
6000
      msg = "dual-master"
6001
    else:
6002
      msg = "single-master"
6003
    self.feedback_fn("* changing disks into %s mode" % msg)
6004
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6005
                                           self.instance.disks,
6006
                                           self.instance.name, multimaster)
6007
    for node, nres in result.items():
6008
      nres.Raise("Cannot change disks config on node %s" % node)
6009

    
6010
  def _ExecCleanup(self):
6011
    """Try to cleanup after a failed migration.
6012

6013
    The cleanup is done by:
6014
      - check that the instance is running only on one node
6015
        (and update the config if needed)
6016
      - change disks on its secondary node to secondary
6017
      - wait until disks are fully synchronized
6018
      - disconnect from the network
6019
      - change disks into single-master mode
6020
      - wait again until disks are fully synchronized
6021

6022
    """
6023
    instance = self.instance
6024
    target_node = self.target_node
6025
    source_node = self.source_node
6026

    
6027
    # check running on only one node
6028
    self.feedback_fn("* checking where the instance actually runs"
6029
                     " (if this hangs, the hypervisor might be in"
6030
                     " a bad state)")
6031
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6032
    for node, result in ins_l.items():
6033
      result.Raise("Can't contact node %s" % node)
6034

    
6035
    runningon_source = instance.name in ins_l[source_node].payload
6036
    runningon_target = instance.name in ins_l[target_node].payload
6037

    
6038
    if runningon_source and runningon_target:
6039
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6040
                               " or the hypervisor is confused. You will have"
6041
                               " to ensure manually that it runs only on one"
6042
                               " and restart this operation.")
6043

    
6044
    if not (runningon_source or runningon_target):
6045
      raise errors.OpExecError("Instance does not seem to be running at all."
6046
                               " In this case, it's safer to repair by"
6047
                               " running 'gnt-instance stop' to ensure disk"
6048
                               " shutdown, and then restarting it.")
6049

    
6050
    if runningon_target:
6051
      # the migration has actually succeeded, we need to update the config
6052
      self.feedback_fn("* instance running on secondary node (%s),"
6053
                       " updating config" % target_node)
6054
      instance.primary_node = target_node
6055
      self.cfg.Update(instance, self.feedback_fn)
6056
      demoted_node = source_node
6057
    else:
6058
      self.feedback_fn("* instance confirmed to be running on its"
6059
                       " primary node (%s)" % source_node)
6060
      demoted_node = target_node
6061

    
6062
    self._EnsureSecondary(demoted_node)
6063
    try:
6064
      self._WaitUntilSync()
6065
    except errors.OpExecError:
6066
      # we ignore here errors, since if the device is standalone, it
6067
      # won't be able to sync
6068
      pass
6069
    self._GoStandalone()
6070
    self._GoReconnect(False)
6071
    self._WaitUntilSync()
6072

    
6073
    self.feedback_fn("* done")
6074

    
6075
  def _RevertDiskStatus(self):
6076
    """Try to revert the disk status after a failed migration.
6077

6078
    """
6079
    target_node = self.target_node
6080
    try:
6081
      self._EnsureSecondary(target_node)
6082
      self._GoStandalone()
6083
      self._GoReconnect(False)
6084
      self._WaitUntilSync()
6085
    except errors.OpExecError, err:
6086
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6087
                         " drives: error '%s'\n"
6088
                         "Please look and recover the instance status" %
6089
                         str(err))
6090

    
6091
  def _AbortMigration(self):
6092
    """Call the hypervisor code to abort a started migration.
6093

6094
    """
6095
    instance = self.instance
6096
    target_node = self.target_node
6097
    migration_info = self.migration_info
6098

    
6099
    abort_result = self.rpc.call_finalize_migration(target_node,
6100
                                                    instance,
6101
                                                    migration_info,
6102
                                                    False)
6103
    abort_msg = abort_result.fail_msg
6104
    if abort_msg:
6105
      logging.error("Aborting migration failed on target node %s: %s",
6106
                    target_node, abort_msg)
6107
      # Don't raise an exception here, as we stil have to try to revert the
6108
      # disk status, even if this step failed.
6109

    
6110
  def _ExecMigration(self):
6111
    """Migrate an instance.
6112

6113
    The migrate is done by:
6114
      - change the disks into dual-master mode
6115
      - wait until disks are fully synchronized again
6116
      - migrate the instance
6117
      - change disks on the new secondary node (the old primary) to secondary
6118
      - wait until disks are fully synchronized
6119
      - change disks into single-master mode
6120

6121
    """
6122
    instance = self.instance
6123
    target_node = self.target_node
6124
    source_node = self.source_node
6125

    
6126
    self.feedback_fn("* checking disk consistency between source and target")
6127
    for dev in instance.disks:
6128
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6129
        raise errors.OpExecError("Disk %s is degraded or not fully"
6130
                                 " synchronized on target node,"
6131
                                 " aborting migrate." % dev.iv_name)
6132

    
6133
    # First get the migration information from the remote node
6134
    result = self.rpc.call_migration_info(source_node, instance)
6135
    msg = result.fail_msg
6136
    if msg:
6137
      log_err = ("Failed fetching source migration information from %s: %s" %
6138
                 (source_node, msg))
6139
      logging.error(log_err)
6140
      raise errors.OpExecError(log_err)
6141

    
6142
    self.migration_info = migration_info = result.payload
6143

    
6144
    # Then switch the disks to master/master mode
6145
    self._EnsureSecondary(target_node)
6146
    self._GoStandalone()
6147
    self._GoReconnect(True)
6148
    self._WaitUntilSync()
6149

    
6150
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6151
    result = self.rpc.call_accept_instance(target_node,
6152
                                           instance,
6153
                                           migration_info,
6154
                                           self.nodes_ip[target_node])
6155

    
6156
    msg = result.fail_msg
6157
    if msg:
6158
      logging.error("Instance pre-migration failed, trying to revert"
6159
                    " disk status: %s", msg)
6160
      self.feedback_fn("Pre-migration failed, aborting")
6161
      self._AbortMigration()
6162
      self._RevertDiskStatus()
6163
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6164
                               (instance.name, msg))
6165

    
6166
    self.feedback_fn("* migrating instance to %s" % target_node)
6167
    time.sleep(10)
6168
    result = self.rpc.call_instance_migrate(source_node, instance,
6169
                                            self.nodes_ip[target_node],
6170
                                            self.live)
6171
    msg = result.fail_msg
6172
    if msg:
6173
      logging.error("Instance migration failed, trying to revert"
6174
                    " disk status: %s", msg)
6175
      self.feedback_fn("Migration failed, aborting")
6176
      self._AbortMigration()
6177
      self._RevertDiskStatus()
6178
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6179
                               (instance.name, msg))
6180
    time.sleep(10)
6181

    
6182
    instance.primary_node = target_node
6183
    # distribute new instance config to the other nodes
6184
    self.cfg.Update(instance, self.feedback_fn)
6185

    
6186
    result = self.rpc.call_finalize_migration(target_node,
6187
                                              instance,
6188
                                              migration_info,
6189
                                              True)
6190
    msg = result.fail_msg
6191
    if msg:
6192
      logging.error("Instance migration succeeded, but finalization failed:"
6193
                    " %s", msg)
6194
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6195
                               msg)
6196

    
6197
    self._EnsureSecondary(source_node)
6198
    self._WaitUntilSync()
6199
    self._GoStandalone()
6200
    self._GoReconnect(False)
6201
    self._WaitUntilSync()
6202

    
6203
    self.feedback_fn("* done")
6204

    
6205
  def Exec(self, feedback_fn):
6206
    """Perform the migration.
6207

6208
    """
6209
    feedback_fn("Migrating instance %s" % self.instance.name)
6210

    
6211
    self.feedback_fn = feedback_fn
6212

    
6213
    self.source_node = self.instance.primary_node
6214
    self.target_node = self.instance.secondary_nodes[0]
6215
    self.all_nodes = [self.source_node, self.target_node]
6216
    self.nodes_ip = {
6217
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6218
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6219
      }
6220

    
6221
    if self.cleanup:
6222
      return self._ExecCleanup()
6223
    else:
6224
      return self._ExecMigration()
6225

    
6226

    
6227
def _CreateBlockDev(lu, node, instance, device, force_create,
6228
                    info, force_open):
6229
  """Create a tree of block devices on a given node.
6230

6231
  If this device type has to be created on secondaries, create it and
6232
  all its children.
6233

6234
  If not, just recurse to children keeping the same 'force' value.
6235

6236
  @param lu: the lu on whose behalf we execute
6237
  @param node: the node on which to create the device
6238
  @type instance: L{objects.Instance}
6239
  @param instance: the instance which owns the device
6240
  @type device: L{objects.Disk}
6241
  @param device: the device to create
6242
  @type force_create: boolean
6243
  @param force_create: whether to force creation of this device; this
6244
      will be change to True whenever we find a device which has
6245
      CreateOnSecondary() attribute
6246
  @param info: the extra 'metadata' we should attach to the device
6247
      (this will be represented as a LVM tag)
6248
  @type force_open: boolean
6249
  @param force_open: this parameter will be passes to the
6250
      L{backend.BlockdevCreate} function where it specifies
6251
      whether we run on primary or not, and it affects both
6252
      the child assembly and the device own Open() execution
6253

6254
  """
6255
  if device.CreateOnSecondary():
6256
    force_create = True
6257

    
6258
  if device.children:
6259
    for child in device.children:
6260
      _CreateBlockDev(lu, node, instance, child, force_create,
6261
                      info, force_open)
6262

    
6263
  if not force_create:
6264
    return
6265

    
6266
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6267

    
6268

    
6269
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6270
  """Create a single block device on a given node.
6271

6272
  This will not recurse over children of the device, so they must be
6273
  created in advance.
6274

6275
  @param lu: the lu on whose behalf we execute
6276
  @param node: the node on which to create the device
6277
  @type instance: L{objects.Instance}
6278
  @param instance: the instance which owns the device
6279
  @type device: L{objects.Disk}
6280
  @param device: the device to create
6281
  @param info: the extra 'metadata' we should attach to the device
6282
      (this will be represented as a LVM tag)
6283
  @type force_open: boolean
6284
  @param force_open: this parameter will be passes to the
6285
      L{backend.BlockdevCreate} function where it specifies
6286
      whether we run on primary or not, and it affects both
6287
      the child assembly and the device own Open() execution
6288

6289
  """
6290
  lu.cfg.SetDiskID(device, node)
6291
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6292
                                       instance.name, force_open, info)
6293
  result.Raise("Can't create block device %s on"
6294
               " node %s for instance %s" % (device, node, instance.name))
6295
  if device.physical_id is None:
6296
    device.physical_id = result.payload
6297

    
6298

    
6299
def _GenerateUniqueNames(lu, exts):
6300
  """Generate a suitable LV name.
6301

6302
  This will generate a logical volume name for the given instance.
6303

6304
  """
6305
  results = []
6306
  for val in exts:
6307
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6308
    results.append("%s%s" % (new_id, val))
6309
  return results
6310

    
6311

    
6312
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6313
                         p_minor, s_minor):
6314
  """Generate a drbd8 device complete with its children.
6315

6316
  """
6317
  port = lu.cfg.AllocatePort()
6318
  vgname = lu.cfg.GetVGName()
6319
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6320
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6321
                          logical_id=(vgname, names[0]))
6322
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6323
                          logical_id=(vgname, names[1]))
6324
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6325
                          logical_id=(primary, secondary, port,
6326
                                      p_minor, s_minor,
6327
                                      shared_secret),
6328
                          children=[dev_data, dev_meta],
6329
                          iv_name=iv_name)
6330
  return drbd_dev
6331

    
6332

    
6333
def _GenerateDiskTemplate(lu, template_name,
6334
                          instance_name, primary_node,
6335
                          secondary_nodes, disk_info,
6336
                          file_storage_dir, file_driver,
6337
                          base_index):
6338
  """Generate the entire disk layout for a given template type.
6339

6340
  """
6341
  #TODO: compute space requirements
6342

    
6343
  vgname = lu.cfg.GetVGName()
6344
  disk_count = len(disk_info)
6345
  disks = []
6346
  if template_name == constants.DT_DISKLESS:
6347
    pass
6348
  elif template_name == constants.DT_PLAIN:
6349
    if len(secondary_nodes) != 0:
6350
      raise errors.ProgrammerError("Wrong template configuration")
6351

    
6352
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6353
                                      for i in range(disk_count)])
6354
    for idx, disk in enumerate(disk_info):
6355
      disk_index = idx + base_index
6356
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6357
                              logical_id=(vgname, names[idx]),
6358
                              iv_name="disk/%d" % disk_index,
6359
                              mode=disk["mode"])
6360
      disks.append(disk_dev)
6361
  elif template_name == constants.DT_DRBD8:
6362
    if len(secondary_nodes) != 1:
6363
      raise errors.ProgrammerError("Wrong template configuration")
6364
    remote_node = secondary_nodes[0]
6365
    minors = lu.cfg.AllocateDRBDMinor(
6366
      [primary_node, remote_node] * len(disk_info), instance_name)
6367

    
6368
    names = []
6369
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6370
                                               for i in range(disk_count)]):
6371
      names.append(lv_prefix + "_data")
6372
      names.append(lv_prefix + "_meta")
6373
    for idx, disk in enumerate(disk_info):
6374
      disk_index = idx + base_index
6375
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6376
                                      disk["size"], names[idx*2:idx*2+2],
6377
                                      "disk/%d" % disk_index,
6378
                                      minors[idx*2], minors[idx*2+1])
6379
      disk_dev.mode = disk["mode"]
6380
      disks.append(disk_dev)
6381
  elif template_name == constants.DT_FILE:
6382
    if len(secondary_nodes) != 0:
6383
      raise errors.ProgrammerError("Wrong template configuration")
6384

    
6385
    _RequireFileStorage()
6386

    
6387
    for idx, disk in enumerate(disk_info):
6388
      disk_index = idx + base_index
6389
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6390
                              iv_name="disk/%d" % disk_index,
6391
                              logical_id=(file_driver,
6392
                                          "%s/disk%d" % (file_storage_dir,
6393
                                                         disk_index)),
6394
                              mode=disk["mode"])
6395
      disks.append(disk_dev)
6396
  else:
6397
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6398
  return disks
6399

    
6400

    
6401
def _GetInstanceInfoText(instance):
6402
  """Compute that text that should be added to the disk's metadata.
6403

6404
  """
6405
  return "originstname+%s" % instance.name
6406

    
6407

    
6408
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6409
  """Create all disks for an instance.
6410

6411
  This abstracts away some work from AddInstance.
6412

6413
  @type lu: L{LogicalUnit}
6414
  @param lu: the logical unit on whose behalf we execute
6415
  @type instance: L{objects.Instance}
6416
  @param instance: the instance whose disks we should create
6417
  @type to_skip: list
6418
  @param to_skip: list of indices to skip
6419
  @type target_node: string
6420
  @param target_node: if passed, overrides the target node for creation
6421
  @rtype: boolean
6422
  @return: the success of the creation
6423

6424
  """
6425
  info = _GetInstanceInfoText(instance)
6426
  if target_node is None:
6427
    pnode = instance.primary_node
6428
    all_nodes = instance.all_nodes
6429
  else:
6430
    pnode = target_node
6431
    all_nodes = [pnode]
6432

    
6433
  if instance.disk_template == constants.DT_FILE:
6434
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6435
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6436

    
6437
    result.Raise("Failed to create directory '%s' on"
6438
                 " node %s" % (file_storage_dir, pnode))
6439

    
6440
  # Note: this needs to be kept in sync with adding of disks in
6441
  # LUSetInstanceParams
6442
  for idx, device in enumerate(instance.disks):
6443
    if to_skip and idx in to_skip:
6444
      continue
6445
    logging.info("Creating volume %s for instance %s",
6446
                 device.iv_name, instance.name)
6447
    #HARDCODE
6448
    for node in all_nodes:
6449
      f_create = node == pnode
6450
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6451

    
6452

    
6453
def _RemoveDisks(lu, instance, target_node=None):
6454
  """Remove all disks for an instance.
6455

6456
  This abstracts away some work from `AddInstance()` and
6457
  `RemoveInstance()`. Note that in case some of the devices couldn't
6458
  be removed, the removal will continue with the other ones (compare
6459
  with `_CreateDisks()`).
6460

6461
  @type lu: L{LogicalUnit}
6462
  @param lu: the logical unit on whose behalf we execute
6463
  @type instance: L{objects.Instance}
6464
  @param instance: the instance whose disks we should remove
6465
  @type target_node: string
6466
  @param target_node: used to override the node on which to remove the disks
6467
  @rtype: boolean
6468
  @return: the success of the removal
6469

6470
  """
6471
  logging.info("Removing block devices for instance %s", instance.name)
6472

    
6473
  all_result = True
6474
  for device in instance.disks:
6475
    if target_node:
6476
      edata = [(target_node, device)]
6477
    else:
6478
      edata = device.ComputeNodeTree(instance.primary_node)
6479
    for node, disk in edata:
6480
      lu.cfg.SetDiskID(disk, node)
6481
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6482
      if msg:
6483
        lu.LogWarning("Could not remove block device %s on node %s,"
6484
                      " continuing anyway: %s", device.iv_name, node, msg)
6485
        all_result = False
6486

    
6487
  if instance.disk_template == constants.DT_FILE:
6488
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6489
    if target_node:
6490
      tgt = target_node
6491
    else:
6492
      tgt = instance.primary_node
6493
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6494
    if result.fail_msg:
6495
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6496
                    file_storage_dir, instance.primary_node, result.fail_msg)
6497
      all_result = False
6498

    
6499
  return all_result
6500

    
6501

    
6502
def _ComputeDiskSize(disk_template, disks):
6503
  """Compute disk size requirements in the volume group
6504

6505
  """
6506
  # Required free disk space as a function of disk and swap space
6507
  req_size_dict = {
6508
    constants.DT_DISKLESS: None,
6509
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6510
    # 128 MB are added for drbd metadata for each disk
6511
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6512
    constants.DT_FILE: None,
6513
  }
6514

    
6515
  if disk_template not in req_size_dict:
6516
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6517
                                 " is unknown" %  disk_template)
6518

    
6519
  return req_size_dict[disk_template]
6520

    
6521

    
6522
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6523
  """Hypervisor parameter validation.
6524

6525
  This function abstract the hypervisor parameter validation to be
6526
  used in both instance create and instance modify.
6527

6528
  @type lu: L{LogicalUnit}
6529
  @param lu: the logical unit for which we check
6530
  @type nodenames: list
6531
  @param nodenames: the list of nodes on which we should check
6532
  @type hvname: string
6533
  @param hvname: the name of the hypervisor we should use
6534
  @type hvparams: dict
6535
  @param hvparams: the parameters which we need to check
6536
  @raise errors.OpPrereqError: if the parameters are not valid
6537

6538
  """
6539
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6540
                                                  hvname,
6541
                                                  hvparams)
6542
  for node in nodenames:
6543
    info = hvinfo[node]
6544
    if info.offline:
6545
      continue
6546
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6547

    
6548

    
6549
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6550
  """OS parameters validation.
6551

6552
  @type lu: L{LogicalUnit}
6553
  @param lu: the logical unit for which we check
6554
  @type required: boolean
6555
  @param required: whether the validation should fail if the OS is not
6556
      found
6557
  @type nodenames: list
6558
  @param nodenames: the list of nodes on which we should check
6559
  @type osname: string
6560
  @param osname: the name of the hypervisor we should use
6561
  @type osparams: dict
6562
  @param osparams: the parameters which we need to check
6563
  @raise errors.OpPrereqError: if the parameters are not valid
6564

6565
  """
6566
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6567
                                   [constants.OS_VALIDATE_PARAMETERS],
6568
                                   osparams)
6569
  for node, nres in result.items():
6570
    # we don't check for offline cases since this should be run only
6571
    # against the master node and/or an instance's nodes
6572
    nres.Raise("OS Parameters validation failed on node %s" % node)
6573
    if not nres.payload:
6574
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6575
                 osname, node)
6576

    
6577

    
6578
class LUCreateInstance(LogicalUnit):
6579
  """Create an instance.
6580

6581
  """
6582
  HPATH = "instance-add"
6583
  HTYPE = constants.HTYPE_INSTANCE
6584
  _OP_PARAMS = [
6585
    _PInstanceName,
6586
    ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6587
    ("start", True, _TBool),
6588
    ("wait_for_sync", True, _TBool),
6589
    ("ip_check", True, _TBool),
6590
    ("name_check", True, _TBool),
6591
    ("disks", _NoDefault, _TListOf(_TDict)),
6592
    ("nics", _NoDefault, _TListOf(_TDict)),
6593
    ("hvparams", _EmptyDict, _TDict),
6594
    ("beparams", _EmptyDict, _TDict),
6595
    ("osparams", _EmptyDict, _TDict),
6596
    ("no_install", None, _TMaybeBool),
6597
    ("os_type", None, _TMaybeString),
6598
    ("force_variant", False, _TBool),
6599
    ("source_handshake", None, _TOr(_TList, _TNone)),
6600
    ("source_x509_ca", None, _TMaybeString),
6601
    ("source_instance_name", None, _TMaybeString),
6602
    ("src_node", None, _TMaybeString),
6603
    ("src_path", None, _TMaybeString),
6604
    ("pnode", None, _TMaybeString),
6605
    ("snode", None, _TMaybeString),
6606
    ("iallocator", None, _TMaybeString),
6607
    ("hypervisor", None, _TMaybeString),
6608
    ("disk_template", _NoDefault, _CheckDiskTemplate),
6609
    ("identify_defaults", False, _TBool),
6610
    ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6611
    ("file_storage_dir", None, _TMaybeString),
6612
    ]
6613
  REQ_BGL = False
6614

    
6615
  def CheckArguments(self):
6616
    """Check arguments.
6617

6618
    """
6619
    # do not require name_check to ease forward/backward compatibility
6620
    # for tools
6621
    if self.op.no_install and self.op.start:
6622
      self.LogInfo("No-installation mode selected, disabling startup")
6623
      self.op.start = False
6624
    # validate/normalize the instance name
6625
    self.op.instance_name = \
6626
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
6627

    
6628
    if self.op.ip_check and not self.op.name_check:
6629
      # TODO: make the ip check more flexible and not depend on the name check
6630
      raise errors.OpPrereqError("Cannot do ip check without a name check",
6631
                                 errors.ECODE_INVAL)
6632

    
6633
    # check nics' parameter names
6634
    for nic in self.op.nics:
6635
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6636

    
6637
    # check disks. parameter names and consistent adopt/no-adopt strategy
6638
    has_adopt = has_no_adopt = False
6639
    for disk in self.op.disks:
6640
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6641
      if "adopt" in disk:
6642
        has_adopt = True
6643
      else:
6644
        has_no_adopt = True
6645
    if has_adopt and has_no_adopt:
6646
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6647
                                 errors.ECODE_INVAL)
6648
    if has_adopt:
6649
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6650
        raise errors.OpPrereqError("Disk adoption is not supported for the"
6651
                                   " '%s' disk template" %
6652
                                   self.op.disk_template,
6653
                                   errors.ECODE_INVAL)
6654
      if self.op.iallocator is not None:
6655
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6656
                                   " iallocator script", errors.ECODE_INVAL)
6657
      if self.op.mode == constants.INSTANCE_IMPORT:
6658
        raise errors.OpPrereqError("Disk adoption not allowed for"
6659
                                   " instance import", errors.ECODE_INVAL)
6660

    
6661
    self.adopt_disks = has_adopt
6662

    
6663
    # instance name verification
6664
    if self.op.name_check:
6665
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6666
      self.op.instance_name = self.hostname1.name
6667
      # used in CheckPrereq for ip ping check
6668
      self.check_ip = self.hostname1.ip
6669
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6670
      raise errors.OpPrereqError("Remote imports require names to be checked" %
6671
                                 errors.ECODE_INVAL)
6672
    else:
6673
      self.check_ip = None
6674

    
6675
    # file storage checks
6676
    if (self.op.file_driver and
6677
        not self.op.file_driver in constants.FILE_DRIVER):
6678
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6679
                                 self.op.file_driver, errors.ECODE_INVAL)
6680

    
6681
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6682
      raise errors.OpPrereqError("File storage directory path not absolute",
6683
                                 errors.ECODE_INVAL)
6684

    
6685
    ### Node/iallocator related checks
6686
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6687

    
6688
    if self.op.pnode is not None:
6689
      if self.op.disk_template in constants.DTS_NET_MIRROR:
6690
        if self.op.snode is None:
6691
          raise errors.OpPrereqError("The networked disk templates need"
6692
                                     " a mirror node", errors.ECODE_INVAL)
6693
      elif self.op.snode:
6694
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6695
                        " template")
6696
        self.op.snode = None
6697

    
6698
    self._cds = _GetClusterDomainSecret()
6699

    
6700
    if self.op.mode == constants.INSTANCE_IMPORT:
6701
      # On import force_variant must be True, because if we forced it at
6702
      # initial install, our only chance when importing it back is that it
6703
      # works again!
6704
      self.op.force_variant = True
6705

    
6706
      if self.op.no_install:
6707
        self.LogInfo("No-installation mode has no effect during import")
6708

    
6709
    elif self.op.mode == constants.INSTANCE_CREATE:
6710
      if self.op.os_type is None:
6711
        raise errors.OpPrereqError("No guest OS specified",
6712
                                   errors.ECODE_INVAL)
6713
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6714
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6715
                                   " installation" % self.op.os_type,
6716
                                   errors.ECODE_STATE)
6717
      if self.op.disk_template is None:
6718
        raise errors.OpPrereqError("No disk template specified",
6719
                                   errors.ECODE_INVAL)
6720

    
6721
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6722
      # Check handshake to ensure both clusters have the same domain secret
6723
      src_handshake = self.op.source_handshake
6724
      if not src_handshake:
6725
        raise errors.OpPrereqError("Missing source handshake",
6726
                                   errors.ECODE_INVAL)
6727

    
6728
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6729
                                                           src_handshake)
6730
      if errmsg:
6731
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6732
                                   errors.ECODE_INVAL)
6733

    
6734
      # Load and check source CA
6735
      self.source_x509_ca_pem = self.op.source_x509_ca
6736
      if not self.source_x509_ca_pem:
6737
        raise errors.OpPrereqError("Missing source X509 CA",
6738
                                   errors.ECODE_INVAL)
6739

    
6740
      try:
6741
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6742
                                                    self._cds)
6743
      except OpenSSL.crypto.Error, err:
6744
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6745
                                   (err, ), errors.ECODE_INVAL)
6746

    
6747
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6748
      if errcode is not None:
6749
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6750
                                   errors.ECODE_INVAL)
6751

    
6752
      self.source_x509_ca = cert
6753

    
6754
      src_instance_name = self.op.source_instance_name
6755
      if not src_instance_name:
6756
        raise errors.OpPrereqError("Missing source instance name",
6757
                                   errors.ECODE_INVAL)
6758

    
6759
      self.source_instance_name = \
6760
          netutils.GetHostname(name=src_instance_name).name
6761

    
6762
    else:
6763
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6764
                                 self.op.mode, errors.ECODE_INVAL)
6765

    
6766
  def ExpandNames(self):
6767
    """ExpandNames for CreateInstance.
6768

6769
    Figure out the right locks for instance creation.
6770

6771
    """
6772
    self.needed_locks = {}
6773

    
6774
    instance_name = self.op.instance_name
6775
    # this is just a preventive check, but someone might still add this
6776
    # instance in the meantime, and creation will fail at lock-add time
6777
    if instance_name in self.cfg.GetInstanceList():
6778
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6779
                                 instance_name, errors.ECODE_EXISTS)
6780

    
6781
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6782

    
6783
    if self.op.iallocator:
6784
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6785
    else:
6786
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6787
      nodelist = [self.op.pnode]
6788
      if self.op.snode is not None:
6789
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6790
        nodelist.append(self.op.snode)
6791
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6792

    
6793
    # in case of import lock the source node too
6794
    if self.op.mode == constants.INSTANCE_IMPORT:
6795
      src_node = self.op.src_node
6796
      src_path = self.op.src_path
6797

    
6798
      if src_path is None:
6799
        self.op.src_path = src_path = self.op.instance_name
6800

    
6801
      if src_node is None:
6802
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6803
        self.op.src_node = None
6804
        if os.path.isabs(src_path):
6805
          raise errors.OpPrereqError("Importing an instance from an absolute"
6806
                                     " path requires a source node option.",
6807
                                     errors.ECODE_INVAL)
6808
      else:
6809
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6810
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6811
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6812
        if not os.path.isabs(src_path):
6813
          self.op.src_path = src_path = \
6814
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6815

    
6816
  def _RunAllocator(self):
6817
    """Run the allocator based on input opcode.
6818

6819
    """
6820
    nics = [n.ToDict() for n in self.nics]
6821
    ial = IAllocator(self.cfg, self.rpc,
6822
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6823
                     name=self.op.instance_name,
6824
                     disk_template=self.op.disk_template,
6825
                     tags=[],
6826
                     os=self.op.os_type,
6827
                     vcpus=self.be_full[constants.BE_VCPUS],
6828
                     mem_size=self.be_full[constants.BE_MEMORY],
6829
                     disks=self.disks,
6830
                     nics=nics,
6831
                     hypervisor=self.op.hypervisor,
6832
                     )
6833

    
6834
    ial.Run(self.op.iallocator)
6835

    
6836
    if not ial.success:
6837
      raise errors.OpPrereqError("Can't compute nodes using"
6838
                                 " iallocator '%s': %s" %
6839
                                 (self.op.iallocator, ial.info),
6840
                                 errors.ECODE_NORES)
6841
    if len(ial.result) != ial.required_nodes:
6842
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6843
                                 " of nodes (%s), required %s" %
6844
                                 (self.op.iallocator, len(ial.result),
6845
                                  ial.required_nodes), errors.ECODE_FAULT)
6846
    self.op.pnode = ial.result[0]
6847
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6848
                 self.op.instance_name, self.op.iallocator,
6849
                 utils.CommaJoin(ial.result))
6850
    if ial.required_nodes == 2:
6851
      self.op.snode = ial.result[1]
6852

    
6853
  def BuildHooksEnv(self):
6854
    """Build hooks env.
6855

6856
    This runs on master, primary and secondary nodes of the instance.
6857

6858
    """
6859
    env = {
6860
      "ADD_MODE": self.op.mode,
6861
      }
6862
    if self.op.mode == constants.INSTANCE_IMPORT:
6863
      env["SRC_NODE"] = self.op.src_node
6864
      env["SRC_PATH"] = self.op.src_path
6865
      env["SRC_IMAGES"] = self.src_images
6866

    
6867
    env.update(_BuildInstanceHookEnv(
6868
      name=self.op.instance_name,
6869
      primary_node=self.op.pnode,
6870
      secondary_nodes=self.secondaries,
6871
      status=self.op.start,
6872
      os_type=self.op.os_type,
6873
      memory=self.be_full[constants.BE_MEMORY],
6874
      vcpus=self.be_full[constants.BE_VCPUS],
6875
      nics=_NICListToTuple(self, self.nics),
6876
      disk_template=self.op.disk_template,
6877
      disks=[(d["size"], d["mode"]) for d in self.disks],
6878
      bep=self.be_full,
6879
      hvp=self.hv_full,
6880
      hypervisor_name=self.op.hypervisor,
6881
    ))
6882

    
6883
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6884
          self.secondaries)
6885
    return env, nl, nl
6886

    
6887
  def _ReadExportInfo(self):
6888
    """Reads the export information from disk.
6889

6890
    It will override the opcode source node and path with the actual
6891
    information, if these two were not specified before.
6892

6893
    @return: the export information
6894

6895
    """
6896
    assert self.op.mode == constants.INSTANCE_IMPORT
6897

    
6898
    src_node = self.op.src_node
6899
    src_path = self.op.src_path
6900

    
6901
    if src_node is None:
6902
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6903
      exp_list = self.rpc.call_export_list(locked_nodes)
6904
      found = False
6905
      for node in exp_list:
6906
        if exp_list[node].fail_msg:
6907
          continue
6908
        if src_path in exp_list[node].payload:
6909
          found = True
6910
          self.op.src_node = src_node = node
6911
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6912
                                                       src_path)
6913
          break
6914
      if not found:
6915
        raise errors.OpPrereqError("No export found for relative path %s" %
6916
                                    src_path, errors.ECODE_INVAL)
6917

    
6918
    _CheckNodeOnline(self, src_node)
6919
    result = self.rpc.call_export_info(src_node, src_path)
6920
    result.Raise("No export or invalid export found in dir %s" % src_path)
6921

    
6922
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6923
    if not export_info.has_section(constants.INISECT_EXP):
6924
      raise errors.ProgrammerError("Corrupted export config",
6925
                                   errors.ECODE_ENVIRON)
6926

    
6927
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6928
    if (int(ei_version) != constants.EXPORT_VERSION):
6929
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6930
                                 (ei_version, constants.EXPORT_VERSION),
6931
                                 errors.ECODE_ENVIRON)
6932
    return export_info
6933

    
6934
  def _ReadExportParams(self, einfo):
6935
    """Use export parameters as defaults.
6936

6937
    In case the opcode doesn't specify (as in override) some instance
6938
    parameters, then try to use them from the export information, if
6939
    that declares them.
6940

6941
    """
6942
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6943

    
6944
    if self.op.disk_template is None:
6945
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6946
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6947
                                          "disk_template")
6948
      else:
6949
        raise errors.OpPrereqError("No disk template specified and the export"
6950
                                   " is missing the disk_template information",
6951
                                   errors.ECODE_INVAL)
6952

    
6953
    if not self.op.disks:
6954
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6955
        disks = []
6956
        # TODO: import the disk iv_name too
6957
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6958
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6959
          disks.append({"size": disk_sz})
6960
        self.op.disks = disks
6961
      else:
6962
        raise errors.OpPrereqError("No disk info specified and the export"
6963
                                   " is missing the disk information",
6964
                                   errors.ECODE_INVAL)
6965

    
6966
    if (not self.op.nics and
6967
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6968
      nics = []
6969
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6970
        ndict = {}
6971
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6972
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6973
          ndict[name] = v
6974
        nics.append(ndict)
6975
      self.op.nics = nics
6976

    
6977
    if (self.op.hypervisor is None and
6978
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6979
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6980
    if einfo.has_section(constants.INISECT_HYP):
6981
      # use the export parameters but do not override the ones
6982
      # specified by the user
6983
      for name, value in einfo.items(constants.INISECT_HYP):
6984
        if name not in self.op.hvparams:
6985
          self.op.hvparams[name] = value
6986

    
6987
    if einfo.has_section(constants.INISECT_BEP):
6988
      # use the parameters, without overriding
6989
      for name, value in einfo.items(constants.INISECT_BEP):
6990
        if name not in self.op.beparams:
6991
          self.op.beparams[name] = value
6992
    else:
6993
      # try to read the parameters old style, from the main section
6994
      for name in constants.BES_PARAMETERS:
6995
        if (name not in self.op.beparams and
6996
            einfo.has_option(constants.INISECT_INS, name)):
6997
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6998

    
6999
    if einfo.has_section(constants.INISECT_OSP):
7000
      # use the parameters, without overriding
7001
      for name, value in einfo.items(constants.INISECT_OSP):
7002
        if name not in self.op.osparams:
7003
          self.op.osparams[name] = value
7004

    
7005
  def _RevertToDefaults(self, cluster):
7006
    """Revert the instance parameters to the default values.
7007

7008
    """
7009
    # hvparams
7010
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7011
    for name in self.op.hvparams.keys():
7012
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7013
        del self.op.hvparams[name]
7014
    # beparams
7015
    be_defs = cluster.SimpleFillBE({})
7016
    for name in self.op.beparams.keys():
7017
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7018
        del self.op.beparams[name]
7019
    # nic params
7020
    nic_defs = cluster.SimpleFillNIC({})
7021
    for nic in self.op.nics:
7022
      for name in constants.NICS_PARAMETERS:
7023
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7024
          del nic[name]
7025
    # osparams
7026
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7027
    for name in self.op.osparams.keys():
7028
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7029
        del self.op.osparams[name]
7030

    
7031
  def CheckPrereq(self):
7032
    """Check prerequisites.
7033

7034
    """
7035
    if self.op.mode == constants.INSTANCE_IMPORT:
7036
      export_info = self._ReadExportInfo()
7037
      self._ReadExportParams(export_info)
7038

    
7039
    _CheckDiskTemplate(self.op.disk_template)
7040

    
7041
    if (not self.cfg.GetVGName() and
7042
        self.op.disk_template not in constants.DTS_NOT_LVM):
7043
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7044
                                 " instances", errors.ECODE_STATE)
7045

    
7046
    if self.op.hypervisor is None:
7047
      self.op.hypervisor = self.cfg.GetHypervisorType()
7048

    
7049
    cluster = self.cfg.GetClusterInfo()
7050
    enabled_hvs = cluster.enabled_hypervisors
7051
    if self.op.hypervisor not in enabled_hvs:
7052
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7053
                                 " cluster (%s)" % (self.op.hypervisor,
7054
                                  ",".join(enabled_hvs)),
7055
                                 errors.ECODE_STATE)
7056

    
7057
    # check hypervisor parameter syntax (locally)
7058
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7059
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7060
                                      self.op.hvparams)
7061
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7062
    hv_type.CheckParameterSyntax(filled_hvp)
7063
    self.hv_full = filled_hvp
7064
    # check that we don't specify global parameters on an instance
7065
    _CheckGlobalHvParams(self.op.hvparams)
7066

    
7067
    # fill and remember the beparams dict
7068
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7069
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7070

    
7071
    # build os parameters
7072
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7073

    
7074
    # now that hvp/bep are in final format, let's reset to defaults,
7075
    # if told to do so
7076
    if self.op.identify_defaults:
7077
      self._RevertToDefaults(cluster)
7078

    
7079
    # NIC buildup
7080
    self.nics = []
7081
    for idx, nic in enumerate(self.op.nics):
7082
      nic_mode_req = nic.get("mode", None)
7083
      nic_mode = nic_mode_req
7084
      if nic_mode is None:
7085
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7086

    
7087
      # in routed mode, for the first nic, the default ip is 'auto'
7088
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7089
        default_ip_mode = constants.VALUE_AUTO
7090
      else:
7091
        default_ip_mode = constants.VALUE_NONE
7092

    
7093
      # ip validity checks
7094
      ip = nic.get("ip", default_ip_mode)
7095
      if ip is None or ip.lower() == constants.VALUE_NONE:
7096
        nic_ip = None
7097
      elif ip.lower() == constants.VALUE_AUTO:
7098
        if not self.op.name_check:
7099
          raise errors.OpPrereqError("IP address set to auto but name checks"
7100
                                     " have been skipped",
7101
                                     errors.ECODE_INVAL)
7102
        nic_ip = self.hostname1.ip
7103
      else:
7104
        if not netutils.IPAddress.IsValid(ip):
7105
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7106
                                     errors.ECODE_INVAL)
7107
        nic_ip = ip
7108

    
7109
      # TODO: check the ip address for uniqueness
7110
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7111
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7112
                                   errors.ECODE_INVAL)
7113

    
7114
      # MAC address verification
7115
      mac = nic.get("mac", constants.VALUE_AUTO)
7116
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7117
        mac = utils.NormalizeAndValidateMac(mac)
7118

    
7119
        try:
7120
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7121
        except errors.ReservationError:
7122
          raise errors.OpPrereqError("MAC address %s already in use"
7123
                                     " in cluster" % mac,
7124
                                     errors.ECODE_NOTUNIQUE)
7125

    
7126
      # bridge verification
7127
      bridge = nic.get("bridge", None)
7128
      link = nic.get("link", None)
7129
      if bridge and link:
7130
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7131
                                   " at the same time", errors.ECODE_INVAL)
7132
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7133
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7134
                                   errors.ECODE_INVAL)
7135
      elif bridge:
7136
        link = bridge
7137

    
7138
      nicparams = {}
7139
      if nic_mode_req:
7140
        nicparams[constants.NIC_MODE] = nic_mode_req
7141
      if link:
7142
        nicparams[constants.NIC_LINK] = link
7143

    
7144
      check_params = cluster.SimpleFillNIC(nicparams)
7145
      objects.NIC.CheckParameterSyntax(check_params)
7146
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7147

    
7148
    # disk checks/pre-build
7149
    self.disks = []
7150
    for disk in self.op.disks:
7151
      mode = disk.get("mode", constants.DISK_RDWR)
7152
      if mode not in constants.DISK_ACCESS_SET:
7153
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7154
                                   mode, errors.ECODE_INVAL)
7155
      size = disk.get("size", None)
7156
      if size is None:
7157
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7158
      try:
7159
        size = int(size)
7160
      except (TypeError, ValueError):
7161
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7162
                                   errors.ECODE_INVAL)
7163
      new_disk = {"size": size, "mode": mode}
7164
      if "adopt" in disk:
7165
        new_disk["adopt"] = disk["adopt"]
7166
      self.disks.append(new_disk)
7167

    
7168
    if self.op.mode == constants.INSTANCE_IMPORT:
7169

    
7170
      # Check that the new instance doesn't have less disks than the export
7171
      instance_disks = len(self.disks)
7172
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7173
      if instance_disks < export_disks:
7174
        raise errors.OpPrereqError("Not enough disks to import."
7175
                                   " (instance: %d, export: %d)" %
7176
                                   (instance_disks, export_disks),
7177
                                   errors.ECODE_INVAL)
7178

    
7179
      disk_images = []
7180
      for idx in range(export_disks):
7181
        option = 'disk%d_dump' % idx
7182
        if export_info.has_option(constants.INISECT_INS, option):
7183
          # FIXME: are the old os-es, disk sizes, etc. useful?
7184
          export_name = export_info.get(constants.INISECT_INS, option)
7185
          image = utils.PathJoin(self.op.src_path, export_name)
7186
          disk_images.append(image)
7187
        else:
7188
          disk_images.append(False)
7189

    
7190
      self.src_images = disk_images
7191

    
7192
      old_name = export_info.get(constants.INISECT_INS, 'name')
7193
      try:
7194
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7195
      except (TypeError, ValueError), err:
7196
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7197
                                   " an integer: %s" % str(err),
7198
                                   errors.ECODE_STATE)
7199
      if self.op.instance_name == old_name:
7200
        for idx, nic in enumerate(self.nics):
7201
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7202
            nic_mac_ini = 'nic%d_mac' % idx
7203
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7204

    
7205
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7206

    
7207
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7208
    if self.op.ip_check:
7209
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7210
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7211
                                   (self.check_ip, self.op.instance_name),
7212
                                   errors.ECODE_NOTUNIQUE)
7213

    
7214
    #### mac address generation
7215
    # By generating here the mac address both the allocator and the hooks get
7216
    # the real final mac address rather than the 'auto' or 'generate' value.
7217
    # There is a race condition between the generation and the instance object
7218
    # creation, which means that we know the mac is valid now, but we're not
7219
    # sure it will be when we actually add the instance. If things go bad
7220
    # adding the instance will abort because of a duplicate mac, and the
7221
    # creation job will fail.
7222
    for nic in self.nics:
7223
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7224
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7225

    
7226
    #### allocator run
7227

    
7228
    if self.op.iallocator is not None:
7229
      self._RunAllocator()
7230

    
7231
    #### node related checks
7232

    
7233
    # check primary node
7234
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7235
    assert self.pnode is not None, \
7236
      "Cannot retrieve locked node %s" % self.op.pnode
7237
    if pnode.offline:
7238
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7239
                                 pnode.name, errors.ECODE_STATE)
7240
    if pnode.drained:
7241
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7242
                                 pnode.name, errors.ECODE_STATE)
7243

    
7244
    self.secondaries = []
7245

    
7246
    # mirror node verification
7247
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7248
      if self.op.snode == pnode.name:
7249
        raise errors.OpPrereqError("The secondary node cannot be the"
7250
                                   " primary node.", errors.ECODE_INVAL)
7251
      _CheckNodeOnline(self, self.op.snode)
7252
      _CheckNodeNotDrained(self, self.op.snode)
7253
      self.secondaries.append(self.op.snode)
7254

    
7255
    nodenames = [pnode.name] + self.secondaries
7256

    
7257
    req_size = _ComputeDiskSize(self.op.disk_template,
7258
                                self.disks)
7259

    
7260
    # Check lv size requirements, if not adopting
7261
    if req_size is not None and not self.adopt_disks:
7262
      _CheckNodesFreeDisk(self, nodenames, req_size)
7263

    
7264
    if self.adopt_disks: # instead, we must check the adoption data
7265
      all_lvs = set([i["adopt"] for i in self.disks])
7266
      if len(all_lvs) != len(self.disks):
7267
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7268
                                   errors.ECODE_INVAL)
7269
      for lv_name in all_lvs:
7270
        try:
7271
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7272
        except errors.ReservationError:
7273
          raise errors.OpPrereqError("LV named %s used by another instance" %
7274
                                     lv_name, errors.ECODE_NOTUNIQUE)
7275

    
7276
      node_lvs = self.rpc.call_lv_list([pnode.name],
7277
                                       self.cfg.GetVGName())[pnode.name]
7278
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7279
      node_lvs = node_lvs.payload
7280
      delta = all_lvs.difference(node_lvs.keys())
7281
      if delta:
7282
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7283
                                   utils.CommaJoin(delta),
7284
                                   errors.ECODE_INVAL)
7285
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7286
      if online_lvs:
7287
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7288
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7289
                                   errors.ECODE_STATE)
7290
      # update the size of disk based on what is found
7291
      for dsk in self.disks:
7292
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7293

    
7294
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7295

    
7296
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7297
    # check OS parameters (remotely)
7298
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7299

    
7300
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7301

    
7302
    # memory check on primary node
7303
    if self.op.start:
7304
      _CheckNodeFreeMemory(self, self.pnode.name,
7305
                           "creating instance %s" % self.op.instance_name,
7306
                           self.be_full[constants.BE_MEMORY],
7307
                           self.op.hypervisor)
7308

    
7309
    self.dry_run_result = list(nodenames)
7310

    
7311
  def Exec(self, feedback_fn):
7312
    """Create and add the instance to the cluster.
7313

7314
    """
7315
    instance = self.op.instance_name
7316
    pnode_name = self.pnode.name
7317

    
7318
    ht_kind = self.op.hypervisor
7319
    if ht_kind in constants.HTS_REQ_PORT:
7320
      network_port = self.cfg.AllocatePort()
7321
    else:
7322
      network_port = None
7323

    
7324
    if constants.ENABLE_FILE_STORAGE:
7325
      # this is needed because os.path.join does not accept None arguments
7326
      if self.op.file_storage_dir is None:
7327
        string_file_storage_dir = ""
7328
      else:
7329
        string_file_storage_dir = self.op.file_storage_dir
7330

    
7331
      # build the full file storage dir path
7332
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7333
                                        string_file_storage_dir, instance)
7334
    else:
7335
      file_storage_dir = ""
7336

    
7337
    disks = _GenerateDiskTemplate(self,
7338
                                  self.op.disk_template,
7339
                                  instance, pnode_name,
7340
                                  self.secondaries,
7341
                                  self.disks,
7342
                                  file_storage_dir,
7343
                                  self.op.file_driver,
7344
                                  0)
7345

    
7346
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7347
                            primary_node=pnode_name,
7348
                            nics=self.nics, disks=disks,
7349
                            disk_template=self.op.disk_template,
7350
                            admin_up=False,
7351
                            network_port=network_port,
7352
                            beparams=self.op.beparams,
7353
                            hvparams=self.op.hvparams,
7354
                            hypervisor=self.op.hypervisor,
7355
                            osparams=self.op.osparams,
7356
                            )
7357

    
7358
    if self.adopt_disks:
7359
      # rename LVs to the newly-generated names; we need to construct
7360
      # 'fake' LV disks with the old data, plus the new unique_id
7361
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7362
      rename_to = []
7363
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7364
        rename_to.append(t_dsk.logical_id)
7365
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7366
        self.cfg.SetDiskID(t_dsk, pnode_name)
7367
      result = self.rpc.call_blockdev_rename(pnode_name,
7368
                                             zip(tmp_disks, rename_to))
7369
      result.Raise("Failed to rename adoped LVs")
7370
    else:
7371
      feedback_fn("* creating instance disks...")
7372
      try:
7373
        _CreateDisks(self, iobj)
7374
      except errors.OpExecError:
7375
        self.LogWarning("Device creation failed, reverting...")
7376
        try:
7377
          _RemoveDisks(self, iobj)
7378
        finally:
7379
          self.cfg.ReleaseDRBDMinors(instance)
7380
          raise
7381

    
7382
    feedback_fn("adding instance %s to cluster config" % instance)
7383

    
7384
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7385

    
7386
    # Declare that we don't want to remove the instance lock anymore, as we've
7387
    # added the instance to the config
7388
    del self.remove_locks[locking.LEVEL_INSTANCE]
7389
    # Unlock all the nodes
7390
    if self.op.mode == constants.INSTANCE_IMPORT:
7391
      nodes_keep = [self.op.src_node]
7392
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7393
                       if node != self.op.src_node]
7394
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7395
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7396
    else:
7397
      self.context.glm.release(locking.LEVEL_NODE)
7398
      del self.acquired_locks[locking.LEVEL_NODE]
7399

    
7400
    if self.op.wait_for_sync:
7401
      disk_abort = not _WaitForSync(self, iobj)
7402
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7403
      # make sure the disks are not degraded (still sync-ing is ok)
7404
      time.sleep(15)
7405
      feedback_fn("* checking mirrors status")
7406
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7407
    else:
7408
      disk_abort = False
7409

    
7410
    if disk_abort:
7411
      _RemoveDisks(self, iobj)
7412
      self.cfg.RemoveInstance(iobj.name)
7413
      # Make sure the instance lock gets removed
7414
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7415
      raise errors.OpExecError("There are some degraded disks for"
7416
                               " this instance")
7417

    
7418
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7419
      if self.op.mode == constants.INSTANCE_CREATE:
7420
        if not self.op.no_install:
7421
          feedback_fn("* running the instance OS create scripts...")
7422
          # FIXME: pass debug option from opcode to backend
7423
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7424
                                                 self.op.debug_level)
7425
          result.Raise("Could not add os for instance %s"
7426
                       " on node %s" % (instance, pnode_name))
7427

    
7428
      elif self.op.mode == constants.INSTANCE_IMPORT:
7429
        feedback_fn("* running the instance OS import scripts...")
7430

    
7431
        transfers = []
7432

    
7433
        for idx, image in enumerate(self.src_images):
7434
          if not image:
7435
            continue
7436

    
7437
          # FIXME: pass debug option from opcode to backend
7438
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7439
                                             constants.IEIO_FILE, (image, ),
7440
                                             constants.IEIO_SCRIPT,
7441
                                             (iobj.disks[idx], idx),
7442
                                             None)
7443
          transfers.append(dt)
7444

    
7445
        import_result = \
7446
          masterd.instance.TransferInstanceData(self, feedback_fn,
7447
                                                self.op.src_node, pnode_name,
7448
                                                self.pnode.secondary_ip,
7449
                                                iobj, transfers)
7450
        if not compat.all(import_result):
7451
          self.LogWarning("Some disks for instance %s on node %s were not"
7452
                          " imported successfully" % (instance, pnode_name))
7453

    
7454
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7455
        feedback_fn("* preparing remote import...")
7456
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
7457
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7458

    
7459
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7460
                                                     self.source_x509_ca,
7461
                                                     self._cds, timeouts)
7462
        if not compat.all(disk_results):
7463
          # TODO: Should the instance still be started, even if some disks
7464
          # failed to import (valid for local imports, too)?
7465
          self.LogWarning("Some disks for instance %s on node %s were not"
7466
                          " imported successfully" % (instance, pnode_name))
7467

    
7468
        # Run rename script on newly imported instance
7469
        assert iobj.name == instance
7470
        feedback_fn("Running rename script for %s" % instance)
7471
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7472
                                                   self.source_instance_name,
7473
                                                   self.op.debug_level)
7474
        if result.fail_msg:
7475
          self.LogWarning("Failed to run rename script for %s on node"
7476
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7477

    
7478
      else:
7479
        # also checked in the prereq part
7480
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7481
                                     % self.op.mode)
7482

    
7483
    if self.op.start:
7484
      iobj.admin_up = True
7485
      self.cfg.Update(iobj, feedback_fn)
7486
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7487
      feedback_fn("* starting instance...")
7488
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7489
      result.Raise("Could not start instance")
7490

    
7491
    return list(iobj.all_nodes)
7492

    
7493

    
7494
class LUConnectConsole(NoHooksLU):
7495
  """Connect to an instance's console.
7496

7497
  This is somewhat special in that it returns the command line that
7498
  you need to run on the master node in order to connect to the
7499
  console.
7500

7501
  """
7502
  _OP_PARAMS = [
7503
    _PInstanceName
7504
    ]
7505
  REQ_BGL = False
7506

    
7507
  def ExpandNames(self):
7508
    self._ExpandAndLockInstance()
7509

    
7510
  def CheckPrereq(self):
7511
    """Check prerequisites.
7512

7513
    This checks that the instance is in the cluster.
7514

7515
    """
7516
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7517
    assert self.instance is not None, \
7518
      "Cannot retrieve locked instance %s" % self.op.instance_name
7519
    _CheckNodeOnline(self, self.instance.primary_node)
7520

    
7521
  def Exec(self, feedback_fn):
7522
    """Connect to the console of an instance
7523

7524
    """
7525
    instance = self.instance
7526
    node = instance.primary_node
7527

    
7528
    node_insts = self.rpc.call_instance_list([node],
7529
                                             [instance.hypervisor])[node]
7530
    node_insts.Raise("Can't get node information from %s" % node)
7531

    
7532
    if instance.name not in node_insts.payload:
7533
      if instance.admin_up:
7534
        state = "ERROR_down"
7535
      else:
7536
        state = "ADMIN_down"
7537
      raise errors.OpExecError("Instance %s is not running (state %s)" %
7538
                               (instance.name, state))
7539

    
7540
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7541

    
7542
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7543
    cluster = self.cfg.GetClusterInfo()
7544
    # beparams and hvparams are passed separately, to avoid editing the
7545
    # instance and then saving the defaults in the instance itself.
7546
    hvparams = cluster.FillHV(instance)
7547
    beparams = cluster.FillBE(instance)
7548
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7549

    
7550
    # build ssh cmdline
7551
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7552

    
7553

    
7554
class LUReplaceDisks(LogicalUnit):
7555
  """Replace the disks of an instance.
7556

7557
  """
7558
  HPATH = "mirrors-replace"
7559
  HTYPE = constants.HTYPE_INSTANCE
7560
  _OP_PARAMS = [
7561
    _PInstanceName,
7562
    ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7563
    ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7564
    ("remote_node", None, _TMaybeString),
7565
    ("iallocator", None, _TMaybeString),
7566
    ("early_release", False, _TBool),
7567
    ]
7568
  REQ_BGL = False
7569

    
7570
  def CheckArguments(self):
7571
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7572
                                  self.op.iallocator)
7573

    
7574
  def ExpandNames(self):
7575
    self._ExpandAndLockInstance()
7576

    
7577
    if self.op.iallocator is not None:
7578
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7579

    
7580
    elif self.op.remote_node is not None:
7581
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7582
      self.op.remote_node = remote_node
7583

    
7584
      # Warning: do not remove the locking of the new secondary here
7585
      # unless DRBD8.AddChildren is changed to work in parallel;
7586
      # currently it doesn't since parallel invocations of
7587
      # FindUnusedMinor will conflict
7588
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7589
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7590

    
7591
    else:
7592
      self.needed_locks[locking.LEVEL_NODE] = []
7593
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7594

    
7595
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7596
                                   self.op.iallocator, self.op.remote_node,
7597
                                   self.op.disks, False, self.op.early_release)
7598

    
7599
    self.tasklets = [self.replacer]
7600

    
7601
  def DeclareLocks(self, level):
7602
    # If we're not already locking all nodes in the set we have to declare the
7603
    # instance's primary/secondary nodes.
7604
    if (level == locking.LEVEL_NODE and
7605
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7606
      self._LockInstancesNodes()
7607

    
7608
  def BuildHooksEnv(self):
7609
    """Build hooks env.
7610

7611
    This runs on the master, the primary and all the secondaries.
7612

7613
    """
7614
    instance = self.replacer.instance
7615
    env = {
7616
      "MODE": self.op.mode,
7617
      "NEW_SECONDARY": self.op.remote_node,
7618
      "OLD_SECONDARY": instance.secondary_nodes[0],
7619
      }
7620
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7621
    nl = [
7622
      self.cfg.GetMasterNode(),
7623
      instance.primary_node,
7624
      ]
7625
    if self.op.remote_node is not None:
7626
      nl.append(self.op.remote_node)
7627
    return env, nl, nl
7628

    
7629

    
7630
class TLReplaceDisks(Tasklet):
7631
  """Replaces disks for an instance.
7632

7633
  Note: Locking is not within the scope of this class.
7634

7635
  """
7636
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7637
               disks, delay_iallocator, early_release):
7638
    """Initializes this class.
7639

7640
    """
7641
    Tasklet.__init__(self, lu)
7642

    
7643
    # Parameters
7644
    self.instance_name = instance_name
7645
    self.mode = mode
7646
    self.iallocator_name = iallocator_name
7647
    self.remote_node = remote_node
7648
    self.disks = disks
7649
    self.delay_iallocator = delay_iallocator
7650
    self.early_release = early_release
7651

    
7652
    # Runtime data
7653
    self.instance = None
7654
    self.new_node = None
7655
    self.target_node = None
7656
    self.other_node = None
7657
    self.remote_node_info = None
7658
    self.node_secondary_ip = None
7659

    
7660
  @staticmethod
7661
  def CheckArguments(mode, remote_node, iallocator):
7662
    """Helper function for users of this class.
7663

7664
    """
7665
    # check for valid parameter combination
7666
    if mode == constants.REPLACE_DISK_CHG:
7667
      if remote_node is None and iallocator is None:
7668
        raise errors.OpPrereqError("When changing the secondary either an"
7669
                                   " iallocator script must be used or the"
7670
                                   " new node given", errors.ECODE_INVAL)
7671

    
7672
      if remote_node is not None and iallocator is not None:
7673
        raise errors.OpPrereqError("Give either the iallocator or the new"
7674
                                   " secondary, not both", errors.ECODE_INVAL)
7675

    
7676
    elif remote_node is not None or iallocator is not None:
7677
      # Not replacing the secondary
7678
      raise errors.OpPrereqError("The iallocator and new node options can"
7679
                                 " only be used when changing the"
7680
                                 " secondary node", errors.ECODE_INVAL)
7681

    
7682
  @staticmethod
7683
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7684
    """Compute a new secondary node using an IAllocator.
7685

7686
    """
7687
    ial = IAllocator(lu.cfg, lu.rpc,
7688
                     mode=constants.IALLOCATOR_MODE_RELOC,
7689
                     name=instance_name,
7690
                     relocate_from=relocate_from)
7691

    
7692
    ial.Run(iallocator_name)
7693

    
7694
    if not ial.success:
7695
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7696
                                 " %s" % (iallocator_name, ial.info),
7697
                                 errors.ECODE_NORES)
7698

    
7699
    if len(ial.result) != ial.required_nodes:
7700
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7701
                                 " of nodes (%s), required %s" %
7702
                                 (iallocator_name,
7703
                                  len(ial.result), ial.required_nodes),
7704
                                 errors.ECODE_FAULT)
7705

    
7706
    remote_node_name = ial.result[0]
7707

    
7708
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7709
               instance_name, remote_node_name)
7710

    
7711
    return remote_node_name
7712

    
7713
  def _FindFaultyDisks(self, node_name):
7714
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7715
                                    node_name, True)
7716

    
7717
  def CheckPrereq(self):
7718
    """Check prerequisites.
7719

7720
    This checks that the instance is in the cluster.
7721

7722
    """
7723
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7724
    assert instance is not None, \
7725
      "Cannot retrieve locked instance %s" % self.instance_name
7726

    
7727
    if instance.disk_template != constants.DT_DRBD8:
7728
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7729
                                 " instances", errors.ECODE_INVAL)
7730

    
7731
    if len(instance.secondary_nodes) != 1:
7732
      raise errors.OpPrereqError("The instance has a strange layout,"
7733
                                 " expected one secondary but found %d" %
7734
                                 len(instance.secondary_nodes),
7735
                                 errors.ECODE_FAULT)
7736

    
7737
    if not self.delay_iallocator:
7738
      self._CheckPrereq2()
7739

    
7740
  def _CheckPrereq2(self):
7741
    """Check prerequisites, second part.
7742

7743
    This function should always be part of CheckPrereq. It was separated and is
7744
    now called from Exec because during node evacuation iallocator was only
7745
    called with an unmodified cluster model, not taking planned changes into
7746
    account.
7747

7748
    """
7749
    instance = self.instance
7750
    secondary_node = instance.secondary_nodes[0]
7751

    
7752
    if self.iallocator_name is None:
7753
      remote_node = self.remote_node
7754
    else:
7755
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7756
                                       instance.name, instance.secondary_nodes)
7757

    
7758
    if remote_node is not None:
7759
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7760
      assert self.remote_node_info is not None, \
7761
        "Cannot retrieve locked node %s" % remote_node
7762
    else:
7763
      self.remote_node_info = None
7764

    
7765
    if remote_node == self.instance.primary_node:
7766
      raise errors.OpPrereqError("The specified node is the primary node of"
7767
                                 " the instance.", errors.ECODE_INVAL)
7768

    
7769
    if remote_node == secondary_node:
7770
      raise errors.OpPrereqError("The specified node is already the"
7771
                                 " secondary node of the instance.",
7772
                                 errors.ECODE_INVAL)
7773

    
7774
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7775
                                    constants.REPLACE_DISK_CHG):
7776
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7777
                                 errors.ECODE_INVAL)
7778

    
7779
    if self.mode == constants.REPLACE_DISK_AUTO:
7780
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7781
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7782

    
7783
      if faulty_primary and faulty_secondary:
7784
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7785
                                   " one node and can not be repaired"
7786
                                   " automatically" % self.instance_name,
7787
                                   errors.ECODE_STATE)
7788

    
7789
      if faulty_primary:
7790
        self.disks = faulty_primary
7791
        self.target_node = instance.primary_node
7792
        self.other_node = secondary_node
7793
        check_nodes = [self.target_node, self.other_node]
7794
      elif faulty_secondary:
7795
        self.disks = faulty_secondary
7796
        self.target_node = secondary_node
7797
        self.other_node = instance.primary_node
7798
        check_nodes = [self.target_node, self.other_node]
7799
      else:
7800
        self.disks = []
7801
        check_nodes = []
7802

    
7803
    else:
7804
      # Non-automatic modes
7805
      if self.mode == constants.REPLACE_DISK_PRI:
7806
        self.target_node = instance.primary_node
7807
        self.other_node = secondary_node
7808
        check_nodes = [self.target_node, self.other_node]
7809

    
7810
      elif self.mode == constants.REPLACE_DISK_SEC:
7811
        self.target_node = secondary_node
7812
        self.other_node = instance.primary_node
7813
        check_nodes = [self.target_node, self.other_node]
7814

    
7815
      elif self.mode == constants.REPLACE_DISK_CHG:
7816
        self.new_node = remote_node
7817
        self.other_node = instance.primary_node
7818
        self.target_node = secondary_node
7819
        check_nodes = [self.new_node, self.other_node]
7820

    
7821
        _CheckNodeNotDrained(self.lu, remote_node)
7822

    
7823
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7824
        assert old_node_info is not None
7825
        if old_node_info.offline and not self.early_release:
7826
          # doesn't make sense to delay the release
7827
          self.early_release = True
7828
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7829
                          " early-release mode", secondary_node)
7830

    
7831
      else:
7832
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7833
                                     self.mode)
7834

    
7835
      # If not specified all disks should be replaced
7836
      if not self.disks:
7837
        self.disks = range(len(self.instance.disks))
7838

    
7839
    for node in check_nodes:
7840
      _CheckNodeOnline(self.lu, node)
7841

    
7842
    # Check whether disks are valid
7843
    for disk_idx in self.disks:
7844
      instance.FindDisk(disk_idx)
7845

    
7846
    # Get secondary node IP addresses
7847
    node_2nd_ip = {}
7848

    
7849
    for node_name in [self.target_node, self.other_node, self.new_node]:
7850
      if node_name is not None:
7851
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7852

    
7853
    self.node_secondary_ip = node_2nd_ip
7854

    
7855
  def Exec(self, feedback_fn):
7856
    """Execute disk replacement.
7857

7858
    This dispatches the disk replacement to the appropriate handler.
7859

7860
    """
7861
    if self.delay_iallocator:
7862
      self._CheckPrereq2()
7863

    
7864
    if not self.disks:
7865
      feedback_fn("No disks need replacement")
7866
      return
7867

    
7868
    feedback_fn("Replacing disk(s) %s for %s" %
7869
                (utils.CommaJoin(self.disks), self.instance.name))
7870

    
7871
    activate_disks = (not self.instance.admin_up)
7872

    
7873
    # Activate the instance disks if we're replacing them on a down instance
7874
    if activate_disks:
7875
      _StartInstanceDisks(self.lu, self.instance, True)
7876

    
7877
    try:
7878
      # Should we replace the secondary node?
7879
      if self.new_node is not None:
7880
        fn = self._ExecDrbd8Secondary
7881
      else:
7882
        fn = self._ExecDrbd8DiskOnly
7883

    
7884
      return fn(feedback_fn)
7885

    
7886
    finally:
7887
      # Deactivate the instance disks if we're replacing them on a
7888
      # down instance
7889
      if activate_disks:
7890
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7891

    
7892
  def _CheckVolumeGroup(self, nodes):
7893
    self.lu.LogInfo("Checking volume groups")
7894

    
7895
    vgname = self.cfg.GetVGName()
7896

    
7897
    # Make sure volume group exists on all involved nodes
7898
    results = self.rpc.call_vg_list(nodes)
7899
    if not results:
7900
      raise errors.OpExecError("Can't list volume groups on the nodes")
7901

    
7902
    for node in nodes:
7903
      res = results[node]
7904
      res.Raise("Error checking node %s" % node)
7905
      if vgname not in res.payload:
7906
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7907
                                 (vgname, node))
7908

    
7909
  def _CheckDisksExistence(self, nodes):
7910
    # Check disk existence
7911
    for idx, dev in enumerate(self.instance.disks):
7912
      if idx not in self.disks:
7913
        continue
7914

    
7915
      for node in nodes:
7916
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7917
        self.cfg.SetDiskID(dev, node)
7918

    
7919
        result = self.rpc.call_blockdev_find(node, dev)
7920

    
7921
        msg = result.fail_msg
7922
        if msg or not result.payload:
7923
          if not msg:
7924
            msg = "disk not found"
7925
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7926
                                   (idx, node, msg))
7927

    
7928
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7929
    for idx, dev in enumerate(self.instance.disks):
7930
      if idx not in self.disks:
7931
        continue
7932

    
7933
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7934
                      (idx, node_name))
7935

    
7936
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7937
                                   ldisk=ldisk):
7938
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7939
                                 " replace disks for instance %s" %
7940
                                 (node_name, self.instance.name))
7941

    
7942
  def _CreateNewStorage(self, node_name):
7943
    vgname = self.cfg.GetVGName()
7944
    iv_names = {}
7945

    
7946
    for idx, dev in enumerate(self.instance.disks):
7947
      if idx not in self.disks:
7948
        continue
7949

    
7950
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7951

    
7952
      self.cfg.SetDiskID(dev, node_name)
7953

    
7954
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7955
      names = _GenerateUniqueNames(self.lu, lv_names)
7956

    
7957
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7958
                             logical_id=(vgname, names[0]))
7959
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7960
                             logical_id=(vgname, names[1]))
7961

    
7962
      new_lvs = [lv_data, lv_meta]
7963
      old_lvs = dev.children
7964
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7965

    
7966
      # we pass force_create=True to force the LVM creation
7967
      for new_lv in new_lvs:
7968
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7969
                        _GetInstanceInfoText(self.instance), False)
7970

    
7971
    return iv_names
7972

    
7973
  def _CheckDevices(self, node_name, iv_names):
7974
    for name, (dev, _, _) in iv_names.iteritems():
7975
      self.cfg.SetDiskID(dev, node_name)
7976

    
7977
      result = self.rpc.call_blockdev_find(node_name, dev)
7978

    
7979
      msg = result.fail_msg
7980
      if msg or not result.payload:
7981
        if not msg:
7982
          msg = "disk not found"
7983
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7984
                                 (name, msg))
7985

    
7986
      if result.payload.is_degraded:
7987
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7988

    
7989
  def _RemoveOldStorage(self, node_name, iv_names):
7990
    for name, (_, old_lvs, _) in iv_names.iteritems():
7991
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7992

    
7993
      for lv in old_lvs:
7994
        self.cfg.SetDiskID(lv, node_name)
7995

    
7996
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7997
        if msg:
7998
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7999
                             hint="remove unused LVs manually")
8000

    
8001
  def _ReleaseNodeLock(self, node_name):
8002
    """Releases the lock for a given node."""
8003
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8004

    
8005
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8006
    """Replace a disk on the primary or secondary for DRBD 8.
8007

8008
    The algorithm for replace is quite complicated:
8009

8010
      1. for each disk to be replaced:
8011

8012
        1. create new LVs on the target node with unique names
8013
        1. detach old LVs from the drbd device
8014
        1. rename old LVs to name_replaced.<time_t>
8015
        1. rename new LVs to old LVs
8016
        1. attach the new LVs (with the old names now) to the drbd device
8017

8018
      1. wait for sync across all devices
8019

8020
      1. for each modified disk:
8021

8022
        1. remove old LVs (which have the name name_replaces.<time_t>)
8023

8024
    Failures are not very well handled.
8025

8026
    """
8027
    steps_total = 6
8028

    
8029
    # Step: check device activation
8030
    self.lu.LogStep(1, steps_total, "Check device existence")
8031
    self._CheckDisksExistence([self.other_node, self.target_node])
8032
    self._CheckVolumeGroup([self.target_node, self.other_node])
8033

    
8034
    # Step: check other node consistency
8035
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8036
    self._CheckDisksConsistency(self.other_node,
8037
                                self.other_node == self.instance.primary_node,
8038
                                False)
8039

    
8040
    # Step: create new storage
8041
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8042
    iv_names = self._CreateNewStorage(self.target_node)
8043

    
8044
    # Step: for each lv, detach+rename*2+attach
8045
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8046
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8047
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8048

    
8049
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8050
                                                     old_lvs)
8051
      result.Raise("Can't detach drbd from local storage on node"
8052
                   " %s for device %s" % (self.target_node, dev.iv_name))
8053
      #dev.children = []
8054
      #cfg.Update(instance)
8055

    
8056
      # ok, we created the new LVs, so now we know we have the needed
8057
      # storage; as such, we proceed on the target node to rename
8058
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8059
      # using the assumption that logical_id == physical_id (which in
8060
      # turn is the unique_id on that node)
8061

    
8062
      # FIXME(iustin): use a better name for the replaced LVs
8063
      temp_suffix = int(time.time())
8064
      ren_fn = lambda d, suff: (d.physical_id[0],
8065
                                d.physical_id[1] + "_replaced-%s" % suff)
8066

    
8067
      # Build the rename list based on what LVs exist on the node
8068
      rename_old_to_new = []
8069
      for to_ren in old_lvs:
8070
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8071
        if not result.fail_msg and result.payload:
8072
          # device exists
8073
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8074

    
8075
      self.lu.LogInfo("Renaming the old LVs on the target node")
8076
      result = self.rpc.call_blockdev_rename(self.target_node,
8077
                                             rename_old_to_new)
8078
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8079

    
8080
      # Now we rename the new LVs to the old LVs
8081
      self.lu.LogInfo("Renaming the new LVs on the target node")
8082
      rename_new_to_old = [(new, old.physical_id)
8083
                           for old, new in zip(old_lvs, new_lvs)]
8084
      result = self.rpc.call_blockdev_rename(self.target_node,
8085
                                             rename_new_to_old)
8086
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8087

    
8088
      for old, new in zip(old_lvs, new_lvs):
8089
        new.logical_id = old.logical_id
8090
        self.cfg.SetDiskID(new, self.target_node)
8091

    
8092
      for disk in old_lvs:
8093
        disk.logical_id = ren_fn(disk, temp_suffix)
8094
        self.cfg.SetDiskID(disk, self.target_node)
8095

    
8096
      # Now that the new lvs have the old name, we can add them to the device
8097
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8098
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8099
                                                  new_lvs)
8100
      msg = result.fail_msg
8101
      if msg:
8102
        for new_lv in new_lvs:
8103
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8104
                                               new_lv).fail_msg
8105
          if msg2:
8106
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8107
                               hint=("cleanup manually the unused logical"
8108
                                     "volumes"))
8109
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8110

    
8111
      dev.children = new_lvs
8112

    
8113
      self.cfg.Update(self.instance, feedback_fn)
8114

    
8115
    cstep = 5
8116
    if self.early_release:
8117
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8118
      cstep += 1
8119
      self._RemoveOldStorage(self.target_node, iv_names)
8120
      # WARNING: we release both node locks here, do not do other RPCs
8121
      # than WaitForSync to the primary node
8122
      self._ReleaseNodeLock([self.target_node, self.other_node])
8123

    
8124
    # Wait for sync
8125
    # This can fail as the old devices are degraded and _WaitForSync
8126
    # does a combined result over all disks, so we don't check its return value
8127
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8128
    cstep += 1
8129
    _WaitForSync(self.lu, self.instance)
8130

    
8131
    # Check all devices manually
8132
    self._CheckDevices(self.instance.primary_node, iv_names)
8133

    
8134
    # Step: remove old storage
8135
    if not self.early_release:
8136
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8137
      cstep += 1
8138
      self._RemoveOldStorage(self.target_node, iv_names)
8139

    
8140
  def _ExecDrbd8Secondary(self, feedback_fn):
8141
    """Replace the secondary node for DRBD 8.
8142

8143
    The algorithm for replace is quite complicated:
8144
      - for all disks of the instance:
8145
        - create new LVs on the new node with same names
8146
        - shutdown the drbd device on the old secondary
8147
        - disconnect the drbd network on the primary
8148
        - create the drbd device on the new secondary
8149
        - network attach the drbd on the primary, using an artifice:
8150
          the drbd code for Attach() will connect to the network if it
8151
          finds a device which is connected to the good local disks but
8152
          not network enabled
8153
      - wait for sync across all devices
8154
      - remove all disks from the old secondary
8155

8156
    Failures are not very well handled.
8157

8158
    """
8159
    steps_total = 6
8160

    
8161
    # Step: check device activation
8162
    self.lu.LogStep(1, steps_total, "Check device existence")
8163
    self._CheckDisksExistence([self.instance.primary_node])
8164
    self._CheckVolumeGroup([self.instance.primary_node])
8165

    
8166
    # Step: check other node consistency
8167
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8168
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8169

    
8170
    # Step: create new storage
8171
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8172
    for idx, dev in enumerate(self.instance.disks):
8173
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8174
                      (self.new_node, idx))
8175
      # we pass force_create=True to force LVM creation
8176
      for new_lv in dev.children:
8177
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8178
                        _GetInstanceInfoText(self.instance), False)
8179

    
8180
    # Step 4: dbrd minors and drbd setups changes
8181
    # after this, we must manually remove the drbd minors on both the
8182
    # error and the success paths
8183
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8184
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8185
                                         for dev in self.instance.disks],
8186
                                        self.instance.name)
8187
    logging.debug("Allocated minors %r", minors)
8188

    
8189
    iv_names = {}
8190
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8191
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8192
                      (self.new_node, idx))
8193
      # create new devices on new_node; note that we create two IDs:
8194
      # one without port, so the drbd will be activated without
8195
      # networking information on the new node at this stage, and one
8196
      # with network, for the latter activation in step 4
8197
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8198
      if self.instance.primary_node == o_node1:
8199
        p_minor = o_minor1
8200
      else:
8201
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8202
        p_minor = o_minor2
8203

    
8204
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8205
                      p_minor, new_minor, o_secret)
8206
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8207
                    p_minor, new_minor, o_secret)
8208

    
8209
      iv_names[idx] = (dev, dev.children, new_net_id)
8210
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8211
                    new_net_id)
8212
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8213
                              logical_id=new_alone_id,
8214
                              children=dev.children,
8215
                              size=dev.size)
8216
      try:
8217
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8218
                              _GetInstanceInfoText(self.instance), False)
8219
      except errors.GenericError:
8220
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8221
        raise
8222

    
8223
    # We have new devices, shutdown the drbd on the old secondary
8224
    for idx, dev in enumerate(self.instance.disks):
8225
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8226
      self.cfg.SetDiskID(dev, self.target_node)
8227
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8228
      if msg:
8229
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8230
                           "node: %s" % (idx, msg),
8231
                           hint=("Please cleanup this device manually as"
8232
                                 " soon as possible"))
8233

    
8234
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8235
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8236
                                               self.node_secondary_ip,
8237
                                               self.instance.disks)\
8238
                                              [self.instance.primary_node]
8239

    
8240
    msg = result.fail_msg
8241
    if msg:
8242
      # detaches didn't succeed (unlikely)
8243
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8244
      raise errors.OpExecError("Can't detach the disks from the network on"
8245
                               " old node: %s" % (msg,))
8246

    
8247
    # if we managed to detach at least one, we update all the disks of
8248
    # the instance to point to the new secondary
8249
    self.lu.LogInfo("Updating instance configuration")
8250
    for dev, _, new_logical_id in iv_names.itervalues():
8251
      dev.logical_id = new_logical_id
8252
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8253

    
8254
    self.cfg.Update(self.instance, feedback_fn)
8255

    
8256
    # and now perform the drbd attach
8257
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8258
                    " (standalone => connected)")
8259
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8260
                                            self.new_node],
8261
                                           self.node_secondary_ip,
8262
                                           self.instance.disks,
8263
                                           self.instance.name,
8264
                                           False)
8265
    for to_node, to_result in result.items():
8266
      msg = to_result.fail_msg
8267
      if msg:
8268
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8269
                           to_node, msg,
8270
                           hint=("please do a gnt-instance info to see the"
8271
                                 " status of disks"))
8272
    cstep = 5
8273
    if self.early_release:
8274
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8275
      cstep += 1
8276
      self._RemoveOldStorage(self.target_node, iv_names)
8277
      # WARNING: we release all node locks here, do not do other RPCs
8278
      # than WaitForSync to the primary node
8279
      self._ReleaseNodeLock([self.instance.primary_node,
8280
                             self.target_node,
8281
                             self.new_node])
8282

    
8283
    # Wait for sync
8284
    # This can fail as the old devices are degraded and _WaitForSync
8285
    # does a combined result over all disks, so we don't check its return value
8286
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8287
    cstep += 1
8288
    _WaitForSync(self.lu, self.instance)
8289

    
8290
    # Check all devices manually
8291
    self._CheckDevices(self.instance.primary_node, iv_names)
8292

    
8293
    # Step: remove old storage
8294
    if not self.early_release:
8295
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8296
      self._RemoveOldStorage(self.target_node, iv_names)
8297

    
8298

    
8299
class LURepairNodeStorage(NoHooksLU):
8300
  """Repairs the volume group on a node.
8301

8302
  """
8303
  _OP_PARAMS = [
8304
    _PNodeName,
8305
    ("storage_type", _NoDefault, _CheckStorageType),
8306
    ("name", _NoDefault, _TNonEmptyString),
8307
    ("ignore_consistency", False, _TBool),
8308
    ]
8309
  REQ_BGL = False
8310

    
8311
  def CheckArguments(self):
8312
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8313

    
8314
    storage_type = self.op.storage_type
8315

    
8316
    if (constants.SO_FIX_CONSISTENCY not in
8317
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8318
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8319
                                 " repaired" % storage_type,
8320
                                 errors.ECODE_INVAL)
8321

    
8322
  def ExpandNames(self):
8323
    self.needed_locks = {
8324
      locking.LEVEL_NODE: [self.op.node_name],
8325
      }
8326

    
8327
  def _CheckFaultyDisks(self, instance, node_name):
8328
    """Ensure faulty disks abort the opcode or at least warn."""
8329
    try:
8330
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8331
                                  node_name, True):
8332
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8333
                                   " node '%s'" % (instance.name, node_name),
8334
                                   errors.ECODE_STATE)
8335
    except errors.OpPrereqError, err:
8336
      if self.op.ignore_consistency:
8337
        self.proc.LogWarning(str(err.args[0]))
8338
      else:
8339
        raise
8340

    
8341
  def CheckPrereq(self):
8342
    """Check prerequisites.
8343

8344
    """
8345
    # Check whether any instance on this node has faulty disks
8346
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8347
      if not inst.admin_up:
8348
        continue
8349
      check_nodes = set(inst.all_nodes)
8350
      check_nodes.discard(self.op.node_name)
8351
      for inst_node_name in check_nodes:
8352
        self._CheckFaultyDisks(inst, inst_node_name)
8353

    
8354
  def Exec(self, feedback_fn):
8355
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8356
                (self.op.name, self.op.node_name))
8357

    
8358
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8359
    result = self.rpc.call_storage_execute(self.op.node_name,
8360
                                           self.op.storage_type, st_args,
8361
                                           self.op.name,
8362
                                           constants.SO_FIX_CONSISTENCY)
8363
    result.Raise("Failed to repair storage unit '%s' on %s" %
8364
                 (self.op.name, self.op.node_name))
8365

    
8366

    
8367
class LUNodeEvacuationStrategy(NoHooksLU):
8368
  """Computes the node evacuation strategy.
8369

8370
  """
8371
  _OP_PARAMS = [
8372
    ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8373
    ("remote_node", None, _TMaybeString),
8374
    ("iallocator", None, _TMaybeString),
8375
    ]
8376
  REQ_BGL = False
8377

    
8378
  def CheckArguments(self):
8379
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8380

    
8381
  def ExpandNames(self):
8382
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8383
    self.needed_locks = locks = {}
8384
    if self.op.remote_node is None:
8385
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8386
    else:
8387
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8388
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8389

    
8390
  def Exec(self, feedback_fn):
8391
    if self.op.remote_node is not None:
8392
      instances = []
8393
      for node in self.op.nodes:
8394
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8395
      result = []
8396
      for i in instances:
8397
        if i.primary_node == self.op.remote_node:
8398
          raise errors.OpPrereqError("Node %s is the primary node of"
8399
                                     " instance %s, cannot use it as"
8400
                                     " secondary" %
8401
                                     (self.op.remote_node, i.name),
8402
                                     errors.ECODE_INVAL)
8403
        result.append([i.name, self.op.remote_node])
8404
    else:
8405
      ial = IAllocator(self.cfg, self.rpc,
8406
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8407
                       evac_nodes=self.op.nodes)
8408
      ial.Run(self.op.iallocator, validate=True)
8409
      if not ial.success:
8410
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8411
                                 errors.ECODE_NORES)
8412
      result = ial.result
8413
    return result
8414

    
8415

    
8416
class LUGrowDisk(LogicalUnit):
8417
  """Grow a disk of an instance.
8418

8419
  """
8420
  HPATH = "disk-grow"
8421
  HTYPE = constants.HTYPE_INSTANCE
8422
  _OP_PARAMS = [
8423
    _PInstanceName,
8424
    ("disk", _NoDefault, _TInt),
8425
    ("amount", _NoDefault, _TInt),
8426
    ("wait_for_sync", True, _TBool),
8427
    ]
8428
  REQ_BGL = False
8429

    
8430
  def ExpandNames(self):
8431
    self._ExpandAndLockInstance()
8432
    self.needed_locks[locking.LEVEL_NODE] = []
8433
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8434

    
8435
  def DeclareLocks(self, level):
8436
    if level == locking.LEVEL_NODE:
8437
      self._LockInstancesNodes()
8438

    
8439
  def BuildHooksEnv(self):
8440
    """Build hooks env.
8441

8442
    This runs on the master, the primary and all the secondaries.
8443

8444
    """
8445
    env = {
8446
      "DISK": self.op.disk,
8447
      "AMOUNT": self.op.amount,
8448
      }
8449
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8450
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8451
    return env, nl, nl
8452

    
8453
  def CheckPrereq(self):
8454
    """Check prerequisites.
8455

8456
    This checks that the instance is in the cluster.
8457

8458
    """
8459
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8460
    assert instance is not None, \
8461
      "Cannot retrieve locked instance %s" % self.op.instance_name
8462
    nodenames = list(instance.all_nodes)
8463
    for node in nodenames:
8464
      _CheckNodeOnline(self, node)
8465

    
8466
    self.instance = instance
8467

    
8468
    if instance.disk_template not in constants.DTS_GROWABLE:
8469
      raise errors.OpPrereqError("Instance's disk layout does not support"
8470
                                 " growing.", errors.ECODE_INVAL)
8471

    
8472
    self.disk = instance.FindDisk(self.op.disk)
8473

    
8474
    if instance.disk_template != constants.DT_FILE:
8475
      # TODO: check the free disk space for file, when that feature will be
8476
      # supported
8477
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8478

    
8479
  def Exec(self, feedback_fn):
8480
    """Execute disk grow.
8481

8482
    """
8483
    instance = self.instance
8484
    disk = self.disk
8485

    
8486
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8487
    if not disks_ok:
8488
      raise errors.OpExecError("Cannot activate block device to grow")
8489

    
8490
    for node in instance.all_nodes:
8491
      self.cfg.SetDiskID(disk, node)
8492
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8493
      result.Raise("Grow request failed to node %s" % node)
8494

    
8495
      # TODO: Rewrite code to work properly
8496
      # DRBD goes into sync mode for a short amount of time after executing the
8497
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8498
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8499
      # time is a work-around.
8500
      time.sleep(5)
8501

    
8502
    disk.RecordGrow(self.op.amount)
8503
    self.cfg.Update(instance, feedback_fn)
8504
    if self.op.wait_for_sync:
8505
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8506
      if disk_abort:
8507
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8508
                             " status.\nPlease check the instance.")
8509
      if not instance.admin_up:
8510
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8511
    elif not instance.admin_up:
8512
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8513
                           " not supposed to be running because no wait for"
8514
                           " sync mode was requested.")
8515

    
8516

    
8517
class LUQueryInstanceData(NoHooksLU):
8518
  """Query runtime instance data.
8519

8520
  """
8521
  _OP_PARAMS = [
8522
    ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8523
    ("static", False, _TBool),
8524
    ]
8525
  REQ_BGL = False
8526

    
8527
  def ExpandNames(self):
8528
    self.needed_locks = {}
8529
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8530

    
8531
    if self.op.instances:
8532
      self.wanted_names = []
8533
      for name in self.op.instances:
8534
        full_name = _ExpandInstanceName(self.cfg, name)
8535
        self.wanted_names.append(full_name)
8536
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8537
    else:
8538
      self.wanted_names = None
8539
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8540

    
8541
    self.needed_locks[locking.LEVEL_NODE] = []
8542
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8543

    
8544
  def DeclareLocks(self, level):
8545
    if level == locking.LEVEL_NODE:
8546
      self._LockInstancesNodes()
8547

    
8548
  def CheckPrereq(self):
8549
    """Check prerequisites.
8550

8551
    This only checks the optional instance list against the existing names.
8552

8553
    """
8554
    if self.wanted_names is None:
8555
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8556

    
8557
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8558
                             in self.wanted_names]
8559

    
8560
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8561
    """Returns the status of a block device
8562

8563
    """
8564
    if self.op.static or not node:
8565
      return None
8566

    
8567
    self.cfg.SetDiskID(dev, node)
8568

    
8569
    result = self.rpc.call_blockdev_find(node, dev)
8570
    if result.offline:
8571
      return None
8572

    
8573
    result.Raise("Can't compute disk status for %s" % instance_name)
8574

    
8575
    status = result.payload
8576
    if status is None:
8577
      return None
8578

    
8579
    return (status.dev_path, status.major, status.minor,
8580
            status.sync_percent, status.estimated_time,
8581
            status.is_degraded, status.ldisk_status)
8582

    
8583
  def _ComputeDiskStatus(self, instance, snode, dev):
8584
    """Compute block device status.
8585

8586
    """
8587
    if dev.dev_type in constants.LDS_DRBD:
8588
      # we change the snode then (otherwise we use the one passed in)
8589
      if dev.logical_id[0] == instance.primary_node:
8590
        snode = dev.logical_id[1]
8591
      else:
8592
        snode = dev.logical_id[0]
8593

    
8594
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8595
                                              instance.name, dev)
8596
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8597

    
8598
    if dev.children:
8599
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8600
                      for child in dev.children]
8601
    else:
8602
      dev_children = []
8603

    
8604
    data = {
8605
      "iv_name": dev.iv_name,
8606
      "dev_type": dev.dev_type,
8607
      "logical_id": dev.logical_id,
8608
      "physical_id": dev.physical_id,
8609
      "pstatus": dev_pstatus,
8610
      "sstatus": dev_sstatus,
8611
      "children": dev_children,
8612
      "mode": dev.mode,
8613
      "size": dev.size,
8614
      }
8615

    
8616
    return data
8617

    
8618
  def Exec(self, feedback_fn):
8619
    """Gather and return data"""
8620
    result = {}
8621

    
8622
    cluster = self.cfg.GetClusterInfo()
8623

    
8624
    for instance in self.wanted_instances:
8625
      if not self.op.static:
8626
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8627
                                                  instance.name,
8628
                                                  instance.hypervisor)
8629
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8630
        remote_info = remote_info.payload
8631
        if remote_info and "state" in remote_info:
8632
          remote_state = "up"
8633
        else:
8634
          remote_state = "down"
8635
      else:
8636
        remote_state = None
8637
      if instance.admin_up:
8638
        config_state = "up"
8639
      else:
8640
        config_state = "down"
8641

    
8642
      disks = [self._ComputeDiskStatus(instance, None, device)
8643
               for device in instance.disks]
8644

    
8645
      idict = {
8646
        "name": instance.name,
8647
        "config_state": config_state,
8648
        "run_state": remote_state,
8649
        "pnode": instance.primary_node,
8650
        "snodes": instance.secondary_nodes,
8651
        "os": instance.os,
8652
        # this happens to be the same format used for hooks
8653
        "nics": _NICListToTuple(self, instance.nics),
8654
        "disk_template": instance.disk_template,
8655
        "disks": disks,
8656
        "hypervisor": instance.hypervisor,
8657
        "network_port": instance.network_port,
8658
        "hv_instance": instance.hvparams,
8659
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8660
        "be_instance": instance.beparams,
8661
        "be_actual": cluster.FillBE(instance),
8662
        "os_instance": instance.osparams,
8663
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8664
        "serial_no": instance.serial_no,
8665
        "mtime": instance.mtime,
8666
        "ctime": instance.ctime,
8667
        "uuid": instance.uuid,
8668
        }
8669

    
8670
      result[instance.name] = idict
8671

    
8672
    return result
8673

    
8674

    
8675
class LUSetInstanceParams(LogicalUnit):
8676
  """Modifies an instances's parameters.
8677

8678
  """
8679
  HPATH = "instance-modify"
8680
  HTYPE = constants.HTYPE_INSTANCE
8681
  _OP_PARAMS = [
8682
    _PInstanceName,
8683
    ("nics", _EmptyList, _TList),
8684
    ("disks", _EmptyList, _TList),
8685
    ("beparams", _EmptyDict, _TDict),
8686
    ("hvparams", _EmptyDict, _TDict),
8687
    ("disk_template", None, _TMaybeString),
8688
    ("remote_node", None, _TMaybeString),
8689
    ("os_name", None, _TMaybeString),
8690
    ("force_variant", False, _TBool),
8691
    ("osparams", None, _TOr(_TDict, _TNone)),
8692
    _PForce,
8693
    ]
8694
  REQ_BGL = False
8695

    
8696
  def CheckArguments(self):
8697
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8698
            self.op.hvparams or self.op.beparams or self.op.os_name):
8699
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8700

    
8701
    if self.op.hvparams:
8702
      _CheckGlobalHvParams(self.op.hvparams)
8703

    
8704
    # Disk validation
8705
    disk_addremove = 0
8706
    for disk_op, disk_dict in self.op.disks:
8707
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8708
      if disk_op == constants.DDM_REMOVE:
8709
        disk_addremove += 1
8710
        continue
8711
      elif disk_op == constants.DDM_ADD:
8712
        disk_addremove += 1
8713
      else:
8714
        if not isinstance(disk_op, int):
8715
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8716
        if not isinstance(disk_dict, dict):
8717
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8718
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8719

    
8720
      if disk_op == constants.DDM_ADD:
8721
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8722
        if mode not in constants.DISK_ACCESS_SET:
8723
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8724
                                     errors.ECODE_INVAL)
8725
        size = disk_dict.get('size', None)
8726
        if size is None:
8727
          raise errors.OpPrereqError("Required disk parameter size missing",
8728
                                     errors.ECODE_INVAL)
8729
        try:
8730
          size = int(size)
8731
        except (TypeError, ValueError), err:
8732
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8733
                                     str(err), errors.ECODE_INVAL)
8734
        disk_dict['size'] = size
8735
      else:
8736
        # modification of disk
8737
        if 'size' in disk_dict:
8738
          raise errors.OpPrereqError("Disk size change not possible, use"
8739
                                     " grow-disk", errors.ECODE_INVAL)
8740

    
8741
    if disk_addremove > 1:
8742
      raise errors.OpPrereqError("Only one disk add or remove operation"
8743
                                 " supported at a time", errors.ECODE_INVAL)
8744

    
8745
    if self.op.disks and self.op.disk_template is not None:
8746
      raise errors.OpPrereqError("Disk template conversion and other disk"
8747
                                 " changes not supported at the same time",
8748
                                 errors.ECODE_INVAL)
8749

    
8750
    if self.op.disk_template:
8751
      _CheckDiskTemplate(self.op.disk_template)
8752
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8753
          self.op.remote_node is None):
8754
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8755
                                   " one requires specifying a secondary node",
8756
                                   errors.ECODE_INVAL)
8757

    
8758
    # NIC validation
8759
    nic_addremove = 0
8760
    for nic_op, nic_dict in self.op.nics:
8761
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8762
      if nic_op == constants.DDM_REMOVE:
8763
        nic_addremove += 1
8764
        continue
8765
      elif nic_op == constants.DDM_ADD:
8766
        nic_addremove += 1
8767
      else:
8768
        if not isinstance(nic_op, int):
8769
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8770
        if not isinstance(nic_dict, dict):
8771
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8772
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8773

    
8774
      # nic_dict should be a dict
8775
      nic_ip = nic_dict.get('ip', None)
8776
      if nic_ip is not None:
8777
        if nic_ip.lower() == constants.VALUE_NONE:
8778
          nic_dict['ip'] = None
8779
        else:
8780
          if not netutils.IPAddress.IsValid(nic_ip):
8781
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8782
                                       errors.ECODE_INVAL)
8783

    
8784
      nic_bridge = nic_dict.get('bridge', None)
8785
      nic_link = nic_dict.get('link', None)
8786
      if nic_bridge and nic_link:
8787
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8788
                                   " at the same time", errors.ECODE_INVAL)
8789
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8790
        nic_dict['bridge'] = None
8791
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8792
        nic_dict['link'] = None
8793

    
8794
      if nic_op == constants.DDM_ADD:
8795
        nic_mac = nic_dict.get('mac', None)
8796
        if nic_mac is None:
8797
          nic_dict['mac'] = constants.VALUE_AUTO
8798

    
8799
      if 'mac' in nic_dict:
8800
        nic_mac = nic_dict['mac']
8801
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8802
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8803

    
8804
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8805
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8806
                                     " modifying an existing nic",
8807
                                     errors.ECODE_INVAL)
8808

    
8809
    if nic_addremove > 1:
8810
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8811
                                 " supported at a time", errors.ECODE_INVAL)
8812

    
8813
  def ExpandNames(self):
8814
    self._ExpandAndLockInstance()
8815
    self.needed_locks[locking.LEVEL_NODE] = []
8816
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8817

    
8818
  def DeclareLocks(self, level):
8819
    if level == locking.LEVEL_NODE:
8820
      self._LockInstancesNodes()
8821
      if self.op.disk_template and self.op.remote_node:
8822
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8823
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8824

    
8825
  def BuildHooksEnv(self):
8826
    """Build hooks env.
8827

8828
    This runs on the master, primary and secondaries.
8829

8830
    """
8831
    args = dict()
8832
    if constants.BE_MEMORY in self.be_new:
8833
      args['memory'] = self.be_new[constants.BE_MEMORY]
8834
    if constants.BE_VCPUS in self.be_new:
8835
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8836
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8837
    # information at all.
8838
    if self.op.nics:
8839
      args['nics'] = []
8840
      nic_override = dict(self.op.nics)
8841
      for idx, nic in enumerate(self.instance.nics):
8842
        if idx in nic_override:
8843
          this_nic_override = nic_override[idx]
8844
        else:
8845
          this_nic_override = {}
8846
        if 'ip' in this_nic_override:
8847
          ip = this_nic_override['ip']
8848
        else:
8849
          ip = nic.ip
8850
        if 'mac' in this_nic_override:
8851
          mac = this_nic_override['mac']
8852
        else:
8853
          mac = nic.mac
8854
        if idx in self.nic_pnew:
8855
          nicparams = self.nic_pnew[idx]
8856
        else:
8857
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8858
        mode = nicparams[constants.NIC_MODE]
8859
        link = nicparams[constants.NIC_LINK]
8860
        args['nics'].append((ip, mac, mode, link))
8861
      if constants.DDM_ADD in nic_override:
8862
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8863
        mac = nic_override[constants.DDM_ADD]['mac']
8864
        nicparams = self.nic_pnew[constants.DDM_ADD]
8865
        mode = nicparams[constants.NIC_MODE]
8866
        link = nicparams[constants.NIC_LINK]
8867
        args['nics'].append((ip, mac, mode, link))
8868
      elif constants.DDM_REMOVE in nic_override:
8869
        del args['nics'][-1]
8870

    
8871
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8872
    if self.op.disk_template:
8873
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8874
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8875
    return env, nl, nl
8876

    
8877
  def CheckPrereq(self):
8878
    """Check prerequisites.
8879

8880
    This only checks the instance list against the existing names.
8881

8882
    """
8883
    # checking the new params on the primary/secondary nodes
8884

    
8885
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8886
    cluster = self.cluster = self.cfg.GetClusterInfo()
8887
    assert self.instance is not None, \
8888
      "Cannot retrieve locked instance %s" % self.op.instance_name
8889
    pnode = instance.primary_node
8890
    nodelist = list(instance.all_nodes)
8891

    
8892
    # OS change
8893
    if self.op.os_name and not self.op.force:
8894
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8895
                      self.op.force_variant)
8896
      instance_os = self.op.os_name
8897
    else:
8898
      instance_os = instance.os
8899

    
8900
    if self.op.disk_template:
8901
      if instance.disk_template == self.op.disk_template:
8902
        raise errors.OpPrereqError("Instance already has disk template %s" %
8903
                                   instance.disk_template, errors.ECODE_INVAL)
8904

    
8905
      if (instance.disk_template,
8906
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8907
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8908
                                   " %s to %s" % (instance.disk_template,
8909
                                                  self.op.disk_template),
8910
                                   errors.ECODE_INVAL)
8911
      _CheckInstanceDown(self, instance, "cannot change disk template")
8912
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8913
        if self.op.remote_node == pnode:
8914
          raise errors.OpPrereqError("Given new secondary node %s is the same"
8915
                                     " as the primary node of the instance" %
8916
                                     self.op.remote_node, errors.ECODE_STATE)
8917
        _CheckNodeOnline(self, self.op.remote_node)
8918
        _CheckNodeNotDrained(self, self.op.remote_node)
8919
        disks = [{"size": d.size} for d in instance.disks]
8920
        required = _ComputeDiskSize(self.op.disk_template, disks)
8921
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8922

    
8923
    # hvparams processing
8924
    if self.op.hvparams:
8925
      hv_type = instance.hypervisor
8926
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8927
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8928
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8929

    
8930
      # local check
8931
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8932
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8933
      self.hv_new = hv_new # the new actual values
8934
      self.hv_inst = i_hvdict # the new dict (without defaults)
8935
    else:
8936
      self.hv_new = self.hv_inst = {}
8937

    
8938
    # beparams processing
8939
    if self.op.beparams:
8940
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8941
                                   use_none=True)
8942
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8943
      be_new = cluster.SimpleFillBE(i_bedict)
8944
      self.be_new = be_new # the new actual values
8945
      self.be_inst = i_bedict # the new dict (without defaults)
8946
    else:
8947
      self.be_new = self.be_inst = {}
8948

    
8949
    # osparams processing
8950
    if self.op.osparams:
8951
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8952
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8953
      self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8954
      self.os_inst = i_osdict # the new dict (without defaults)
8955
    else:
8956
      self.os_new = self.os_inst = {}
8957

    
8958
    self.warn = []
8959

    
8960
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8961
      mem_check_list = [pnode]
8962
      if be_new[constants.BE_AUTO_BALANCE]:
8963
        # either we changed auto_balance to yes or it was from before
8964
        mem_check_list.extend(instance.secondary_nodes)
8965
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8966
                                                  instance.hypervisor)
8967
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8968
                                         instance.hypervisor)
8969
      pninfo = nodeinfo[pnode]
8970
      msg = pninfo.fail_msg
8971
      if msg:
8972
        # Assume the primary node is unreachable and go ahead
8973
        self.warn.append("Can't get info from primary node %s: %s" %
8974
                         (pnode,  msg))
8975
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8976
        self.warn.append("Node data from primary node %s doesn't contain"
8977
                         " free memory information" % pnode)
8978
      elif instance_info.fail_msg:
8979
        self.warn.append("Can't get instance runtime information: %s" %
8980
                        instance_info.fail_msg)
8981
      else:
8982
        if instance_info.payload:
8983
          current_mem = int(instance_info.payload['memory'])
8984
        else:
8985
          # Assume instance not running
8986
          # (there is a slight race condition here, but it's not very probable,
8987
          # and we have no other way to check)
8988
          current_mem = 0
8989
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8990
                    pninfo.payload['memory_free'])
8991
        if miss_mem > 0:
8992
          raise errors.OpPrereqError("This change will prevent the instance"
8993
                                     " from starting, due to %d MB of memory"
8994
                                     " missing on its primary node" % miss_mem,
8995
                                     errors.ECODE_NORES)
8996

    
8997
      if be_new[constants.BE_AUTO_BALANCE]:
8998
        for node, nres in nodeinfo.items():
8999
          if node not in instance.secondary_nodes:
9000
            continue
9001
          msg = nres.fail_msg
9002
          if msg:
9003
            self.warn.append("Can't get info from secondary node %s: %s" %
9004
                             (node, msg))
9005
          elif not isinstance(nres.payload.get('memory_free', None), int):
9006
            self.warn.append("Secondary node %s didn't return free"
9007
                             " memory information" % node)
9008
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9009
            self.warn.append("Not enough memory to failover instance to"
9010
                             " secondary node %s" % node)
9011

    
9012
    # NIC processing
9013
    self.nic_pnew = {}
9014
    self.nic_pinst = {}
9015
    for nic_op, nic_dict in self.op.nics:
9016
      if nic_op == constants.DDM_REMOVE:
9017
        if not instance.nics:
9018
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9019
                                     errors.ECODE_INVAL)
9020
        continue
9021
      if nic_op != constants.DDM_ADD:
9022
        # an existing nic
9023
        if not instance.nics:
9024
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9025
                                     " no NICs" % nic_op,
9026
                                     errors.ECODE_INVAL)
9027
        if nic_op < 0 or nic_op >= len(instance.nics):
9028
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9029
                                     " are 0 to %d" %
9030
                                     (nic_op, len(instance.nics) - 1),
9031
                                     errors.ECODE_INVAL)
9032
        old_nic_params = instance.nics[nic_op].nicparams
9033
        old_nic_ip = instance.nics[nic_op].ip
9034
      else:
9035
        old_nic_params = {}
9036
        old_nic_ip = None
9037

    
9038
      update_params_dict = dict([(key, nic_dict[key])
9039
                                 for key in constants.NICS_PARAMETERS
9040
                                 if key in nic_dict])
9041

    
9042
      if 'bridge' in nic_dict:
9043
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9044

    
9045
      new_nic_params = _GetUpdatedParams(old_nic_params,
9046
                                         update_params_dict)
9047
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9048
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9049
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9050
      self.nic_pinst[nic_op] = new_nic_params
9051
      self.nic_pnew[nic_op] = new_filled_nic_params
9052
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9053

    
9054
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9055
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9056
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9057
        if msg:
9058
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9059
          if self.op.force:
9060
            self.warn.append(msg)
9061
          else:
9062
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9063
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9064
        if 'ip' in nic_dict:
9065
          nic_ip = nic_dict['ip']
9066
        else:
9067
          nic_ip = old_nic_ip
9068
        if nic_ip is None:
9069
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9070
                                     ' on a routed nic', errors.ECODE_INVAL)
9071
      if 'mac' in nic_dict:
9072
        nic_mac = nic_dict['mac']
9073
        if nic_mac is None:
9074
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9075
                                     errors.ECODE_INVAL)
9076
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9077
          # otherwise generate the mac
9078
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9079
        else:
9080
          # or validate/reserve the current one
9081
          try:
9082
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9083
          except errors.ReservationError:
9084
            raise errors.OpPrereqError("MAC address %s already in use"
9085
                                       " in cluster" % nic_mac,
9086
                                       errors.ECODE_NOTUNIQUE)
9087

    
9088
    # DISK processing
9089
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9090
      raise errors.OpPrereqError("Disk operations not supported for"
9091
                                 " diskless instances",
9092
                                 errors.ECODE_INVAL)
9093
    for disk_op, _ in self.op.disks:
9094
      if disk_op == constants.DDM_REMOVE:
9095
        if len(instance.disks) == 1:
9096
          raise errors.OpPrereqError("Cannot remove the last disk of"
9097
                                     " an instance", errors.ECODE_INVAL)
9098
        _CheckInstanceDown(self, instance, "cannot remove disks")
9099

    
9100
      if (disk_op == constants.DDM_ADD and
9101
          len(instance.nics) >= constants.MAX_DISKS):
9102
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9103
                                   " add more" % constants.MAX_DISKS,
9104
                                   errors.ECODE_STATE)
9105
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9106
        # an existing disk
9107
        if disk_op < 0 or disk_op >= len(instance.disks):
9108
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9109
                                     " are 0 to %d" %
9110
                                     (disk_op, len(instance.disks)),
9111
                                     errors.ECODE_INVAL)
9112

    
9113
    return
9114

    
9115
  def _ConvertPlainToDrbd(self, feedback_fn):
9116
    """Converts an instance from plain to drbd.
9117

9118
    """
9119
    feedback_fn("Converting template to drbd")
9120
    instance = self.instance
9121
    pnode = instance.primary_node
9122
    snode = self.op.remote_node
9123

    
9124
    # create a fake disk info for _GenerateDiskTemplate
9125
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9126
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9127
                                      instance.name, pnode, [snode],
9128
                                      disk_info, None, None, 0)
9129
    info = _GetInstanceInfoText(instance)
9130
    feedback_fn("Creating aditional volumes...")
9131
    # first, create the missing data and meta devices
9132
    for disk in new_disks:
9133
      # unfortunately this is... not too nice
9134
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9135
                            info, True)
9136
      for child in disk.children:
9137
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9138
    # at this stage, all new LVs have been created, we can rename the
9139
    # old ones
9140
    feedback_fn("Renaming original volumes...")
9141
    rename_list = [(o, n.children[0].logical_id)
9142
                   for (o, n) in zip(instance.disks, new_disks)]
9143
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9144
    result.Raise("Failed to rename original LVs")
9145

    
9146
    feedback_fn("Initializing DRBD devices...")
9147
    # all child devices are in place, we can now create the DRBD devices
9148
    for disk in new_disks:
9149
      for node in [pnode, snode]:
9150
        f_create = node == pnode
9151
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9152

    
9153
    # at this point, the instance has been modified
9154
    instance.disk_template = constants.DT_DRBD8
9155
    instance.disks = new_disks
9156
    self.cfg.Update(instance, feedback_fn)
9157

    
9158
    # disks are created, waiting for sync
9159
    disk_abort = not _WaitForSync(self, instance)
9160
    if disk_abort:
9161
      raise errors.OpExecError("There are some degraded disks for"
9162
                               " this instance, please cleanup manually")
9163

    
9164
  def _ConvertDrbdToPlain(self, feedback_fn):
9165
    """Converts an instance from drbd to plain.
9166

9167
    """
9168
    instance = self.instance
9169
    assert len(instance.secondary_nodes) == 1
9170
    pnode = instance.primary_node
9171
    snode = instance.secondary_nodes[0]
9172
    feedback_fn("Converting template to plain")
9173

    
9174
    old_disks = instance.disks
9175
    new_disks = [d.children[0] for d in old_disks]
9176

    
9177
    # copy over size and mode
9178
    for parent, child in zip(old_disks, new_disks):
9179
      child.size = parent.size
9180
      child.mode = parent.mode
9181

    
9182
    # update instance structure
9183
    instance.disks = new_disks
9184
    instance.disk_template = constants.DT_PLAIN
9185
    self.cfg.Update(instance, feedback_fn)
9186

    
9187
    feedback_fn("Removing volumes on the secondary node...")
9188
    for disk in old_disks:
9189
      self.cfg.SetDiskID(disk, snode)
9190
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9191
      if msg:
9192
        self.LogWarning("Could not remove block device %s on node %s,"
9193
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9194

    
9195
    feedback_fn("Removing unneeded volumes on the primary node...")
9196
    for idx, disk in enumerate(old_disks):
9197
      meta = disk.children[1]
9198
      self.cfg.SetDiskID(meta, pnode)
9199
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9200
      if msg:
9201
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9202
                        " continuing anyway: %s", idx, pnode, msg)
9203

    
9204

    
9205
  def Exec(self, feedback_fn):
9206
    """Modifies an instance.
9207

9208
    All parameters take effect only at the next restart of the instance.
9209

9210
    """
9211
    # Process here the warnings from CheckPrereq, as we don't have a
9212
    # feedback_fn there.
9213
    for warn in self.warn:
9214
      feedback_fn("WARNING: %s" % warn)
9215

    
9216
    result = []
9217
    instance = self.instance
9218
    # disk changes
9219
    for disk_op, disk_dict in self.op.disks:
9220
      if disk_op == constants.DDM_REMOVE:
9221
        # remove the last disk
9222
        device = instance.disks.pop()
9223
        device_idx = len(instance.disks)
9224
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9225
          self.cfg.SetDiskID(disk, node)
9226
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9227
          if msg:
9228
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9229
                            " continuing anyway", device_idx, node, msg)
9230
        result.append(("disk/%d" % device_idx, "remove"))
9231
      elif disk_op == constants.DDM_ADD:
9232
        # add a new disk
9233
        if instance.disk_template == constants.DT_FILE:
9234
          file_driver, file_path = instance.disks[0].logical_id
9235
          file_path = os.path.dirname(file_path)
9236
        else:
9237
          file_driver = file_path = None
9238
        disk_idx_base = len(instance.disks)
9239
        new_disk = _GenerateDiskTemplate(self,
9240
                                         instance.disk_template,
9241
                                         instance.name, instance.primary_node,
9242
                                         instance.secondary_nodes,
9243
                                         [disk_dict],
9244
                                         file_path,
9245
                                         file_driver,
9246
                                         disk_idx_base)[0]
9247
        instance.disks.append(new_disk)
9248
        info = _GetInstanceInfoText(instance)
9249

    
9250
        logging.info("Creating volume %s for instance %s",
9251
                     new_disk.iv_name, instance.name)
9252
        # Note: this needs to be kept in sync with _CreateDisks
9253
        #HARDCODE
9254
        for node in instance.all_nodes:
9255
          f_create = node == instance.primary_node
9256
          try:
9257
            _CreateBlockDev(self, node, instance, new_disk,
9258
                            f_create, info, f_create)
9259
          except errors.OpExecError, err:
9260
            self.LogWarning("Failed to create volume %s (%s) on"
9261
                            " node %s: %s",
9262
                            new_disk.iv_name, new_disk, node, err)
9263
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9264
                       (new_disk.size, new_disk.mode)))
9265
      else:
9266
        # change a given disk
9267
        instance.disks[disk_op].mode = disk_dict['mode']
9268
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9269

    
9270
    if self.op.disk_template:
9271
      r_shut = _ShutdownInstanceDisks(self, instance)
9272
      if not r_shut:
9273
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9274
                                 " proceed with disk template conversion")
9275
      mode = (instance.disk_template, self.op.disk_template)
9276
      try:
9277
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9278
      except:
9279
        self.cfg.ReleaseDRBDMinors(instance.name)
9280
        raise
9281
      result.append(("disk_template", self.op.disk_template))
9282

    
9283
    # NIC changes
9284
    for nic_op, nic_dict in self.op.nics:
9285
      if nic_op == constants.DDM_REMOVE:
9286
        # remove the last nic
9287
        del instance.nics[-1]
9288
        result.append(("nic.%d" % len(instance.nics), "remove"))
9289
      elif nic_op == constants.DDM_ADD:
9290
        # mac and bridge should be set, by now
9291
        mac = nic_dict['mac']
9292
        ip = nic_dict.get('ip', None)
9293
        nicparams = self.nic_pinst[constants.DDM_ADD]
9294
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9295
        instance.nics.append(new_nic)
9296
        result.append(("nic.%d" % (len(instance.nics) - 1),
9297
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9298
                       (new_nic.mac, new_nic.ip,
9299
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9300
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9301
                       )))
9302
      else:
9303
        for key in 'mac', 'ip':
9304
          if key in nic_dict:
9305
            setattr(instance.nics[nic_op], key, nic_dict[key])
9306
        if nic_op in self.nic_pinst:
9307
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9308
        for key, val in nic_dict.iteritems():
9309
          result.append(("nic.%s/%d" % (key, nic_op), val))
9310

    
9311
    # hvparams changes
9312
    if self.op.hvparams:
9313
      instance.hvparams = self.hv_inst
9314
      for key, val in self.op.hvparams.iteritems():
9315
        result.append(("hv/%s" % key, val))
9316

    
9317
    # beparams changes
9318
    if self.op.beparams:
9319
      instance.beparams = self.be_inst
9320
      for key, val in self.op.beparams.iteritems():
9321
        result.append(("be/%s" % key, val))
9322

    
9323
    # OS change
9324
    if self.op.os_name:
9325
      instance.os = self.op.os_name
9326

    
9327
    # osparams changes
9328
    if self.op.osparams:
9329
      instance.osparams = self.os_inst
9330
      for key, val in self.op.osparams.iteritems():
9331
        result.append(("os/%s" % key, val))
9332

    
9333
    self.cfg.Update(instance, feedback_fn)
9334

    
9335
    return result
9336

    
9337
  _DISK_CONVERSIONS = {
9338
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9339
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9340
    }
9341

    
9342

    
9343
class LUQueryExports(NoHooksLU):
9344
  """Query the exports list
9345

9346
  """
9347
  _OP_PARAMS = [
9348
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9349
    ("use_locking", False, _TBool),
9350
    ]
9351
  REQ_BGL = False
9352

    
9353
  def ExpandNames(self):
9354
    self.needed_locks = {}
9355
    self.share_locks[locking.LEVEL_NODE] = 1
9356
    if not self.op.nodes:
9357
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9358
    else:
9359
      self.needed_locks[locking.LEVEL_NODE] = \
9360
        _GetWantedNodes(self, self.op.nodes)
9361

    
9362
  def Exec(self, feedback_fn):
9363
    """Compute the list of all the exported system images.
9364

9365
    @rtype: dict
9366
    @return: a dictionary with the structure node->(export-list)
9367
        where export-list is a list of the instances exported on
9368
        that node.
9369

9370
    """
9371
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9372
    rpcresult = self.rpc.call_export_list(self.nodes)
9373
    result = {}
9374
    for node in rpcresult:
9375
      if rpcresult[node].fail_msg:
9376
        result[node] = False
9377
      else:
9378
        result[node] = rpcresult[node].payload
9379

    
9380
    return result
9381

    
9382

    
9383
class LUPrepareExport(NoHooksLU):
9384
  """Prepares an instance for an export and returns useful information.
9385

9386
  """
9387
  _OP_PARAMS = [
9388
    _PInstanceName,
9389
    ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9390
    ]
9391
  REQ_BGL = False
9392

    
9393
  def ExpandNames(self):
9394
    self._ExpandAndLockInstance()
9395

    
9396
  def CheckPrereq(self):
9397
    """Check prerequisites.
9398

9399
    """
9400
    instance_name = self.op.instance_name
9401

    
9402
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9403
    assert self.instance is not None, \
9404
          "Cannot retrieve locked instance %s" % self.op.instance_name
9405
    _CheckNodeOnline(self, self.instance.primary_node)
9406

    
9407
    self._cds = _GetClusterDomainSecret()
9408

    
9409
  def Exec(self, feedback_fn):
9410
    """Prepares an instance for an export.
9411

9412
    """
9413
    instance = self.instance
9414

    
9415
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9416
      salt = utils.GenerateSecret(8)
9417

    
9418
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9419
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9420
                                              constants.RIE_CERT_VALIDITY)
9421
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9422

    
9423
      (name, cert_pem) = result.payload
9424

    
9425
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9426
                                             cert_pem)
9427

    
9428
      return {
9429
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9430
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9431
                          salt),
9432
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9433
        }
9434

    
9435
    return None
9436

    
9437

    
9438
class LUExportInstance(LogicalUnit):
9439
  """Export an instance to an image in the cluster.
9440

9441
  """
9442
  HPATH = "instance-export"
9443
  HTYPE = constants.HTYPE_INSTANCE
9444
  _OP_PARAMS = [
9445
    _PInstanceName,
9446
    ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9447
    ("shutdown", True, _TBool),
9448
    _PShutdownTimeout,
9449
    ("remove_instance", False, _TBool),
9450
    ("ignore_remove_failures", False, _TBool),
9451
    ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9452
    ("x509_key_name", None, _TOr(_TList, _TNone)),
9453
    ("destination_x509_ca", None, _TMaybeString),
9454
    ]
9455
  REQ_BGL = False
9456

    
9457
  def CheckArguments(self):
9458
    """Check the arguments.
9459

9460
    """
9461
    self.x509_key_name = self.op.x509_key_name
9462
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9463

    
9464
    if self.op.remove_instance and not self.op.shutdown:
9465
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9466
                                 " down before")
9467

    
9468
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9469
      if not self.x509_key_name:
9470
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9471
                                   errors.ECODE_INVAL)
9472

    
9473
      if not self.dest_x509_ca_pem:
9474
        raise errors.OpPrereqError("Missing destination X509 CA",
9475
                                   errors.ECODE_INVAL)
9476

    
9477
  def ExpandNames(self):
9478
    self._ExpandAndLockInstance()
9479

    
9480
    # Lock all nodes for local exports
9481
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9482
      # FIXME: lock only instance primary and destination node
9483
      #
9484
      # Sad but true, for now we have do lock all nodes, as we don't know where
9485
      # the previous export might be, and in this LU we search for it and
9486
      # remove it from its current node. In the future we could fix this by:
9487
      #  - making a tasklet to search (share-lock all), then create the
9488
      #    new one, then one to remove, after
9489
      #  - removing the removal operation altogether
9490
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9491

    
9492
  def DeclareLocks(self, level):
9493
    """Last minute lock declaration."""
9494
    # All nodes are locked anyway, so nothing to do here.
9495

    
9496
  def BuildHooksEnv(self):
9497
    """Build hooks env.
9498

9499
    This will run on the master, primary node and target node.
9500

9501
    """
9502
    env = {
9503
      "EXPORT_MODE": self.op.mode,
9504
      "EXPORT_NODE": self.op.target_node,
9505
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9506
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9507
      # TODO: Generic function for boolean env variables
9508
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9509
      }
9510

    
9511
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9512

    
9513
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9514

    
9515
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9516
      nl.append(self.op.target_node)
9517

    
9518
    return env, nl, nl
9519

    
9520
  def CheckPrereq(self):
9521
    """Check prerequisites.
9522

9523
    This checks that the instance and node names are valid.
9524

9525
    """
9526
    instance_name = self.op.instance_name
9527

    
9528
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9529
    assert self.instance is not None, \
9530
          "Cannot retrieve locked instance %s" % self.op.instance_name
9531
    _CheckNodeOnline(self, self.instance.primary_node)
9532

    
9533
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9534
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9535
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9536
      assert self.dst_node is not None
9537

    
9538
      _CheckNodeOnline(self, self.dst_node.name)
9539
      _CheckNodeNotDrained(self, self.dst_node.name)
9540

    
9541
      self._cds = None
9542
      self.dest_disk_info = None
9543
      self.dest_x509_ca = None
9544

    
9545
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9546
      self.dst_node = None
9547

    
9548
      if len(self.op.target_node) != len(self.instance.disks):
9549
        raise errors.OpPrereqError(("Received destination information for %s"
9550
                                    " disks, but instance %s has %s disks") %
9551
                                   (len(self.op.target_node), instance_name,
9552
                                    len(self.instance.disks)),
9553
                                   errors.ECODE_INVAL)
9554

    
9555
      cds = _GetClusterDomainSecret()
9556

    
9557
      # Check X509 key name
9558
      try:
9559
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9560
      except (TypeError, ValueError), err:
9561
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9562

    
9563
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9564
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9565
                                   errors.ECODE_INVAL)
9566

    
9567
      # Load and verify CA
9568
      try:
9569
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9570
      except OpenSSL.crypto.Error, err:
9571
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9572
                                   (err, ), errors.ECODE_INVAL)
9573

    
9574
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9575
      if errcode is not None:
9576
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9577
                                   (msg, ), errors.ECODE_INVAL)
9578

    
9579
      self.dest_x509_ca = cert
9580

    
9581
      # Verify target information
9582
      disk_info = []
9583
      for idx, disk_data in enumerate(self.op.target_node):
9584
        try:
9585
          (host, port, magic) = \
9586
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9587
        except errors.GenericError, err:
9588
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9589
                                     (idx, err), errors.ECODE_INVAL)
9590

    
9591
        disk_info.append((host, port, magic))
9592

    
9593
      assert len(disk_info) == len(self.op.target_node)
9594
      self.dest_disk_info = disk_info
9595

    
9596
    else:
9597
      raise errors.ProgrammerError("Unhandled export mode %r" %
9598
                                   self.op.mode)
9599

    
9600
    # instance disk type verification
9601
    # TODO: Implement export support for file-based disks
9602
    for disk in self.instance.disks:
9603
      if disk.dev_type == constants.LD_FILE:
9604
        raise errors.OpPrereqError("Export not supported for instances with"
9605
                                   " file-based disks", errors.ECODE_INVAL)
9606

    
9607
  def _CleanupExports(self, feedback_fn):
9608
    """Removes exports of current instance from all other nodes.
9609

9610
    If an instance in a cluster with nodes A..D was exported to node C, its
9611
    exports will be removed from the nodes A, B and D.
9612

9613
    """
9614
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9615

    
9616
    nodelist = self.cfg.GetNodeList()
9617
    nodelist.remove(self.dst_node.name)
9618

    
9619
    # on one-node clusters nodelist will be empty after the removal
9620
    # if we proceed the backup would be removed because OpQueryExports
9621
    # substitutes an empty list with the full cluster node list.
9622
    iname = self.instance.name
9623
    if nodelist:
9624
      feedback_fn("Removing old exports for instance %s" % iname)
9625
      exportlist = self.rpc.call_export_list(nodelist)
9626
      for node in exportlist:
9627
        if exportlist[node].fail_msg:
9628
          continue
9629
        if iname in exportlist[node].payload:
9630
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9631
          if msg:
9632
            self.LogWarning("Could not remove older export for instance %s"
9633
                            " on node %s: %s", iname, node, msg)
9634

    
9635
  def Exec(self, feedback_fn):
9636
    """Export an instance to an image in the cluster.
9637

9638
    """
9639
    assert self.op.mode in constants.EXPORT_MODES
9640

    
9641
    instance = self.instance
9642
    src_node = instance.primary_node
9643

    
9644
    if self.op.shutdown:
9645
      # shutdown the instance, but not the disks
9646
      feedback_fn("Shutting down instance %s" % instance.name)
9647
      result = self.rpc.call_instance_shutdown(src_node, instance,
9648
                                               self.op.shutdown_timeout)
9649
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9650
      result.Raise("Could not shutdown instance %s on"
9651
                   " node %s" % (instance.name, src_node))
9652

    
9653
    # set the disks ID correctly since call_instance_start needs the
9654
    # correct drbd minor to create the symlinks
9655
    for disk in instance.disks:
9656
      self.cfg.SetDiskID(disk, src_node)
9657

    
9658
    activate_disks = (not instance.admin_up)
9659

    
9660
    if activate_disks:
9661
      # Activate the instance disks if we'exporting a stopped instance
9662
      feedback_fn("Activating disks for %s" % instance.name)
9663
      _StartInstanceDisks(self, instance, None)
9664

    
9665
    try:
9666
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9667
                                                     instance)
9668

    
9669
      helper.CreateSnapshots()
9670
      try:
9671
        if (self.op.shutdown and instance.admin_up and
9672
            not self.op.remove_instance):
9673
          assert not activate_disks
9674
          feedback_fn("Starting instance %s" % instance.name)
9675
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9676
          msg = result.fail_msg
9677
          if msg:
9678
            feedback_fn("Failed to start instance: %s" % msg)
9679
            _ShutdownInstanceDisks(self, instance)
9680
            raise errors.OpExecError("Could not start instance: %s" % msg)
9681

    
9682
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9683
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9684
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9685
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9686
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9687

    
9688
          (key_name, _, _) = self.x509_key_name
9689

    
9690
          dest_ca_pem = \
9691
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9692
                                            self.dest_x509_ca)
9693

    
9694
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9695
                                                     key_name, dest_ca_pem,
9696
                                                     timeouts)
9697
      finally:
9698
        helper.Cleanup()
9699

    
9700
      # Check for backwards compatibility
9701
      assert len(dresults) == len(instance.disks)
9702
      assert compat.all(isinstance(i, bool) for i in dresults), \
9703
             "Not all results are boolean: %r" % dresults
9704

    
9705
    finally:
9706
      if activate_disks:
9707
        feedback_fn("Deactivating disks for %s" % instance.name)
9708
        _ShutdownInstanceDisks(self, instance)
9709

    
9710
    if not (compat.all(dresults) and fin_resu):
9711
      failures = []
9712
      if not fin_resu:
9713
        failures.append("export finalization")
9714
      if not compat.all(dresults):
9715
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9716
                               if not dsk)
9717
        failures.append("disk export: disk(s) %s" % fdsk)
9718

    
9719
      raise errors.OpExecError("Export failed, errors in %s" %
9720
                               utils.CommaJoin(failures))
9721

    
9722
    # At this point, the export was successful, we can cleanup/finish
9723

    
9724
    # Remove instance if requested
9725
    if self.op.remove_instance:
9726
      feedback_fn("Removing instance %s" % instance.name)
9727
      _RemoveInstance(self, feedback_fn, instance,
9728
                      self.op.ignore_remove_failures)
9729

    
9730
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9731
      self._CleanupExports(feedback_fn)
9732

    
9733
    return fin_resu, dresults
9734

    
9735

    
9736
class LURemoveExport(NoHooksLU):
9737
  """Remove exports related to the named instance.
9738

9739
  """
9740
  _OP_PARAMS = [
9741
    _PInstanceName,
9742
    ]
9743
  REQ_BGL = False
9744

    
9745
  def ExpandNames(self):
9746
    self.needed_locks = {}
9747
    # We need all nodes to be locked in order for RemoveExport to work, but we
9748
    # don't need to lock the instance itself, as nothing will happen to it (and
9749
    # we can remove exports also for a removed instance)
9750
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9751

    
9752
  def Exec(self, feedback_fn):
9753
    """Remove any export.
9754

9755
    """
9756
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9757
    # If the instance was not found we'll try with the name that was passed in.
9758
    # This will only work if it was an FQDN, though.
9759
    fqdn_warn = False
9760
    if not instance_name:
9761
      fqdn_warn = True
9762
      instance_name = self.op.instance_name
9763

    
9764
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9765
    exportlist = self.rpc.call_export_list(locked_nodes)
9766
    found = False
9767
    for node in exportlist:
9768
      msg = exportlist[node].fail_msg
9769
      if msg:
9770
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9771
        continue
9772
      if instance_name in exportlist[node].payload:
9773
        found = True
9774
        result = self.rpc.call_export_remove(node, instance_name)
9775
        msg = result.fail_msg
9776
        if msg:
9777
          logging.error("Could not remove export for instance %s"
9778
                        " on node %s: %s", instance_name, node, msg)
9779

    
9780
    if fqdn_warn and not found:
9781
      feedback_fn("Export not found. If trying to remove an export belonging"
9782
                  " to a deleted instance please use its Fully Qualified"
9783
                  " Domain Name.")
9784

    
9785

    
9786
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9787
  """Generic tags LU.
9788

9789
  This is an abstract class which is the parent of all the other tags LUs.
9790

9791
  """
9792

    
9793
  def ExpandNames(self):
9794
    self.needed_locks = {}
9795
    if self.op.kind == constants.TAG_NODE:
9796
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9797
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9798
    elif self.op.kind == constants.TAG_INSTANCE:
9799
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9800
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9801

    
9802
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
9803
    # not possible to acquire the BGL based on opcode parameters)
9804

    
9805
  def CheckPrereq(self):
9806
    """Check prerequisites.
9807

9808
    """
9809
    if self.op.kind == constants.TAG_CLUSTER:
9810
      self.target = self.cfg.GetClusterInfo()
9811
    elif self.op.kind == constants.TAG_NODE:
9812
      self.target = self.cfg.GetNodeInfo(self.op.name)
9813
    elif self.op.kind == constants.TAG_INSTANCE:
9814
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9815
    else:
9816
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9817
                                 str(self.op.kind), errors.ECODE_INVAL)
9818

    
9819

    
9820
class LUGetTags(TagsLU):
9821
  """Returns the tags of a given object.
9822

9823
  """
9824
  _OP_PARAMS = [
9825
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9826
    # Name is only meaningful for nodes and instances
9827
    ("name", _NoDefault, _TMaybeString),
9828
    ]
9829
  REQ_BGL = False
9830

    
9831
  def ExpandNames(self):
9832
    TagsLU.ExpandNames(self)
9833

    
9834
    # Share locks as this is only a read operation
9835
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9836

    
9837
  def Exec(self, feedback_fn):
9838
    """Returns the tag list.
9839

9840
    """
9841
    return list(self.target.GetTags())
9842

    
9843

    
9844
class LUSearchTags(NoHooksLU):
9845
  """Searches the tags for a given pattern.
9846

9847
  """
9848
  _OP_PARAMS = [
9849
    ("pattern", _NoDefault, _TNonEmptyString),
9850
    ]
9851
  REQ_BGL = False
9852

    
9853
  def ExpandNames(self):
9854
    self.needed_locks = {}
9855

    
9856
  def CheckPrereq(self):
9857
    """Check prerequisites.
9858

9859
    This checks the pattern passed for validity by compiling it.
9860

9861
    """
9862
    try:
9863
      self.re = re.compile(self.op.pattern)
9864
    except re.error, err:
9865
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9866
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9867

    
9868
  def Exec(self, feedback_fn):
9869
    """Returns the tag list.
9870

9871
    """
9872
    cfg = self.cfg
9873
    tgts = [("/cluster", cfg.GetClusterInfo())]
9874
    ilist = cfg.GetAllInstancesInfo().values()
9875
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9876
    nlist = cfg.GetAllNodesInfo().values()
9877
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9878
    results = []
9879
    for path, target in tgts:
9880
      for tag in target.GetTags():
9881
        if self.re.search(tag):
9882
          results.append((path, tag))
9883
    return results
9884

    
9885

    
9886
class LUAddTags(TagsLU):
9887
  """Sets a tag on a given object.
9888

9889
  """
9890
  _OP_PARAMS = [
9891
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9892
    # Name is only meaningful for nodes and instances
9893
    ("name", _NoDefault, _TMaybeString),
9894
    ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9895
    ]
9896
  REQ_BGL = False
9897

    
9898
  def CheckPrereq(self):
9899
    """Check prerequisites.
9900

9901
    This checks the type and length of the tag name and value.
9902

9903
    """
9904
    TagsLU.CheckPrereq(self)
9905
    for tag in self.op.tags:
9906
      objects.TaggableObject.ValidateTag(tag)
9907

    
9908
  def Exec(self, feedback_fn):
9909
    """Sets the tag.
9910

9911
    """
9912
    try:
9913
      for tag in self.op.tags:
9914
        self.target.AddTag(tag)
9915
    except errors.TagError, err:
9916
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9917
    self.cfg.Update(self.target, feedback_fn)
9918

    
9919

    
9920
class LUDelTags(TagsLU):
9921
  """Delete a list of tags from a given object.
9922

9923
  """
9924
  _OP_PARAMS = [
9925
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9926
    # Name is only meaningful for nodes and instances
9927
    ("name", _NoDefault, _TMaybeString),
9928
    ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9929
    ]
9930
  REQ_BGL = False
9931

    
9932
  def CheckPrereq(self):
9933
    """Check prerequisites.
9934

9935
    This checks that we have the given tag.
9936

9937
    """
9938
    TagsLU.CheckPrereq(self)
9939
    for tag in self.op.tags:
9940
      objects.TaggableObject.ValidateTag(tag)
9941
    del_tags = frozenset(self.op.tags)
9942
    cur_tags = self.target.GetTags()
9943

    
9944
    diff_tags = del_tags - cur_tags
9945
    if diff_tags:
9946
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
9947
      raise errors.OpPrereqError("Tag(s) %s not found" %
9948
                                 (utils.CommaJoin(diff_names), ),
9949
                                 errors.ECODE_NOENT)
9950

    
9951
  def Exec(self, feedback_fn):
9952
    """Remove the tag from the object.
9953

9954
    """
9955
    for tag in self.op.tags:
9956
      self.target.RemoveTag(tag)
9957
    self.cfg.Update(self.target, feedback_fn)
9958

    
9959

    
9960
class LUTestDelay(NoHooksLU):
9961
  """Sleep for a specified amount of time.
9962

9963
  This LU sleeps on the master and/or nodes for a specified amount of
9964
  time.
9965

9966
  """
9967
  _OP_PARAMS = [
9968
    ("duration", _NoDefault, _TFloat),
9969
    ("on_master", True, _TBool),
9970
    ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9971
    ("repeat", 0, _TPositiveInt)
9972
    ]
9973
  REQ_BGL = False
9974

    
9975
  def ExpandNames(self):
9976
    """Expand names and set required locks.
9977

9978
    This expands the node list, if any.
9979

9980
    """
9981
    self.needed_locks = {}
9982
    if self.op.on_nodes:
9983
      # _GetWantedNodes can be used here, but is not always appropriate to use
9984
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9985
      # more information.
9986
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9987
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9988

    
9989
  def _TestDelay(self):
9990
    """Do the actual sleep.
9991

9992
    """
9993
    if self.op.on_master:
9994
      if not utils.TestDelay(self.op.duration):
9995
        raise errors.OpExecError("Error during master delay test")
9996
    if self.op.on_nodes:
9997
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9998
      for node, node_result in result.items():
9999
        node_result.Raise("Failure during rpc call to node %s" % node)
10000

    
10001
  def Exec(self, feedback_fn):
10002
    """Execute the test delay opcode, with the wanted repetitions.
10003

10004
    """
10005
    if self.op.repeat == 0:
10006
      self._TestDelay()
10007
    else:
10008
      top_value = self.op.repeat - 1
10009
      for i in range(self.op.repeat):
10010
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10011
        self._TestDelay()
10012

    
10013

    
10014
class LUTestJobqueue(NoHooksLU):
10015
  """Utility LU to test some aspects of the job queue.
10016

10017
  """
10018
  _OP_PARAMS = [
10019
    ("notify_waitlock", False, _TBool),
10020
    ("notify_exec", False, _TBool),
10021
    ("log_messages", _EmptyList, _TListOf(_TString)),
10022
    ("fail", False, _TBool),
10023
    ]
10024
  REQ_BGL = False
10025

    
10026
  # Must be lower than default timeout for WaitForJobChange to see whether it
10027
  # notices changed jobs
10028
  _CLIENT_CONNECT_TIMEOUT = 20.0
10029
  _CLIENT_CONFIRM_TIMEOUT = 60.0
10030

    
10031
  @classmethod
10032
  def _NotifyUsingSocket(cls, cb, errcls):
10033
    """Opens a Unix socket and waits for another program to connect.
10034

10035
    @type cb: callable
10036
    @param cb: Callback to send socket name to client
10037
    @type errcls: class
10038
    @param errcls: Exception class to use for errors
10039

10040
    """
10041
    # Using a temporary directory as there's no easy way to create temporary
10042
    # sockets without writing a custom loop around tempfile.mktemp and
10043
    # socket.bind
10044
    tmpdir = tempfile.mkdtemp()
10045
    try:
10046
      tmpsock = utils.PathJoin(tmpdir, "sock")
10047

    
10048
      logging.debug("Creating temporary socket at %s", tmpsock)
10049
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10050
      try:
10051
        sock.bind(tmpsock)
10052
        sock.listen(1)
10053

    
10054
        # Send details to client
10055
        cb(tmpsock)
10056

    
10057
        # Wait for client to connect before continuing
10058
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10059
        try:
10060
          (conn, _) = sock.accept()
10061
        except socket.error, err:
10062
          raise errcls("Client didn't connect in time (%s)" % err)
10063
      finally:
10064
        sock.close()
10065
    finally:
10066
      # Remove as soon as client is connected
10067
      shutil.rmtree(tmpdir)
10068

    
10069
    # Wait for client to close
10070
    try:
10071
      try:
10072
        # pylint: disable-msg=E1101
10073
        # Instance of '_socketobject' has no ... member
10074
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10075
        conn.recv(1)
10076
      except socket.error, err:
10077
        raise errcls("Client failed to confirm notification (%s)" % err)
10078
    finally:
10079
      conn.close()
10080

    
10081
  def _SendNotification(self, test, arg, sockname):
10082
    """Sends a notification to the client.
10083

10084
    @type test: string
10085
    @param test: Test name
10086
    @param arg: Test argument (depends on test)
10087
    @type sockname: string
10088
    @param sockname: Socket path
10089

10090
    """
10091
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10092

    
10093
  def _Notify(self, prereq, test, arg):
10094
    """Notifies the client of a test.
10095

10096
    @type prereq: bool
10097
    @param prereq: Whether this is a prereq-phase test
10098
    @type test: string
10099
    @param test: Test name
10100
    @param arg: Test argument (depends on test)
10101

10102
    """
10103
    if prereq:
10104
      errcls = errors.OpPrereqError
10105
    else:
10106
      errcls = errors.OpExecError
10107

    
10108
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10109
                                                  test, arg),
10110
                                   errcls)
10111

    
10112
  def CheckArguments(self):
10113
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10114
    self.expandnames_calls = 0
10115

    
10116
  def ExpandNames(self):
10117
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10118
    if checkargs_calls < 1:
10119
      raise errors.ProgrammerError("CheckArguments was not called")
10120

    
10121
    self.expandnames_calls += 1
10122

    
10123
    if self.op.notify_waitlock:
10124
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10125

    
10126
    self.LogInfo("Expanding names")
10127

    
10128
    # Get lock on master node (just to get a lock, not for a particular reason)
10129
    self.needed_locks = {
10130
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10131
      }
10132

    
10133
  def Exec(self, feedback_fn):
10134
    if self.expandnames_calls < 1:
10135
      raise errors.ProgrammerError("ExpandNames was not called")
10136

    
10137
    if self.op.notify_exec:
10138
      self._Notify(False, constants.JQT_EXEC, None)
10139

    
10140
    self.LogInfo("Executing")
10141

    
10142
    if self.op.log_messages:
10143
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10144
      for idx, msg in enumerate(self.op.log_messages):
10145
        self.LogInfo("Sending log message %s", idx + 1)
10146
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10147
        # Report how many test messages have been sent
10148
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10149

    
10150
    if self.op.fail:
10151
      raise errors.OpExecError("Opcode failure was requested")
10152

    
10153
    return True
10154

    
10155

    
10156
class IAllocator(object):
10157
  """IAllocator framework.
10158

10159
  An IAllocator instance has three sets of attributes:
10160
    - cfg that is needed to query the cluster
10161
    - input data (all members of the _KEYS class attribute are required)
10162
    - four buffer attributes (in|out_data|text), that represent the
10163
      input (to the external script) in text and data structure format,
10164
      and the output from it, again in two formats
10165
    - the result variables from the script (success, info, nodes) for
10166
      easy usage
10167

10168
  """
10169
  # pylint: disable-msg=R0902
10170
  # lots of instance attributes
10171
  _ALLO_KEYS = [
10172
    "name", "mem_size", "disks", "disk_template",
10173
    "os", "tags", "nics", "vcpus", "hypervisor",
10174
    ]
10175
  _RELO_KEYS = [
10176
    "name", "relocate_from",
10177
    ]
10178
  _EVAC_KEYS = [
10179
    "evac_nodes",
10180
    ]
10181

    
10182
  def __init__(self, cfg, rpc, mode, **kwargs):
10183
    self.cfg = cfg
10184
    self.rpc = rpc
10185
    # init buffer variables
10186
    self.in_text = self.out_text = self.in_data = self.out_data = None
10187
    # init all input fields so that pylint is happy
10188
    self.mode = mode
10189
    self.mem_size = self.disks = self.disk_template = None
10190
    self.os = self.tags = self.nics = self.vcpus = None
10191
    self.hypervisor = None
10192
    self.relocate_from = None
10193
    self.name = None
10194
    self.evac_nodes = None
10195
    # computed fields
10196
    self.required_nodes = None
10197
    # init result fields
10198
    self.success = self.info = self.result = None
10199
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10200
      keyset = self._ALLO_KEYS
10201
      fn = self._AddNewInstance
10202
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10203
      keyset = self._RELO_KEYS
10204
      fn = self._AddRelocateInstance
10205
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10206
      keyset = self._EVAC_KEYS
10207
      fn = self._AddEvacuateNodes
10208
    else:
10209
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10210
                                   " IAllocator" % self.mode)
10211
    for key in kwargs:
10212
      if key not in keyset:
10213
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
10214
                                     " IAllocator" % key)
10215
      setattr(self, key, kwargs[key])
10216

    
10217
    for key in keyset:
10218
      if key not in kwargs:
10219
        raise errors.ProgrammerError("Missing input parameter '%s' to"
10220
                                     " IAllocator" % key)
10221
    self._BuildInputData(fn)
10222

    
10223
  def _ComputeClusterData(self):
10224
    """Compute the generic allocator input data.
10225

10226
    This is the data that is independent of the actual operation.
10227

10228
    """
10229
    cfg = self.cfg
10230
    cluster_info = cfg.GetClusterInfo()
10231
    # cluster data
10232
    data = {
10233
      "version": constants.IALLOCATOR_VERSION,
10234
      "cluster_name": cfg.GetClusterName(),
10235
      "cluster_tags": list(cluster_info.GetTags()),
10236
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10237
      # we don't have job IDs
10238
      }
10239
    iinfo = cfg.GetAllInstancesInfo().values()
10240
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10241

    
10242
    # node data
10243
    node_results = {}
10244
    node_list = cfg.GetNodeList()
10245

    
10246
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10247
      hypervisor_name = self.hypervisor
10248
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10249
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10250
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10251
      hypervisor_name = cluster_info.enabled_hypervisors[0]
10252

    
10253
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10254
                                        hypervisor_name)
10255
    node_iinfo = \
10256
      self.rpc.call_all_instances_info(node_list,
10257
                                       cluster_info.enabled_hypervisors)
10258
    for nname, nresult in node_data.items():
10259
      # first fill in static (config-based) values
10260
      ninfo = cfg.GetNodeInfo(nname)
10261
      pnr = {
10262
        "tags": list(ninfo.GetTags()),
10263
        "primary_ip": ninfo.primary_ip,
10264
        "secondary_ip": ninfo.secondary_ip,
10265
        "offline": ninfo.offline,
10266
        "drained": ninfo.drained,
10267
        "master_candidate": ninfo.master_candidate,
10268
        }
10269

    
10270
      if not (ninfo.offline or ninfo.drained):
10271
        nresult.Raise("Can't get data for node %s" % nname)
10272
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10273
                                nname)
10274
        remote_info = nresult.payload
10275

    
10276
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
10277
                     'vg_size', 'vg_free', 'cpu_total']:
10278
          if attr not in remote_info:
10279
            raise errors.OpExecError("Node '%s' didn't return attribute"
10280
                                     " '%s'" % (nname, attr))
10281
          if not isinstance(remote_info[attr], int):
10282
            raise errors.OpExecError("Node '%s' returned invalid value"
10283
                                     " for '%s': %s" %
10284
                                     (nname, attr, remote_info[attr]))
10285
        # compute memory used by primary instances
10286
        i_p_mem = i_p_up_mem = 0
10287
        for iinfo, beinfo in i_list:
10288
          if iinfo.primary_node == nname:
10289
            i_p_mem += beinfo[constants.BE_MEMORY]
10290
            if iinfo.name not in node_iinfo[nname].payload:
10291
              i_used_mem = 0
10292
            else:
10293
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10294
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10295
            remote_info['memory_free'] -= max(0, i_mem_diff)
10296

    
10297
            if iinfo.admin_up:
10298
              i_p_up_mem += beinfo[constants.BE_MEMORY]
10299

    
10300
        # compute memory used by instances
10301
        pnr_dyn = {
10302
          "total_memory": remote_info['memory_total'],
10303
          "reserved_memory": remote_info['memory_dom0'],
10304
          "free_memory": remote_info['memory_free'],
10305
          "total_disk": remote_info['vg_size'],
10306
          "free_disk": remote_info['vg_free'],
10307
          "total_cpus": remote_info['cpu_total'],
10308
          "i_pri_memory": i_p_mem,
10309
          "i_pri_up_memory": i_p_up_mem,
10310
          }
10311
        pnr.update(pnr_dyn)
10312

    
10313
      node_results[nname] = pnr
10314
    data["nodes"] = node_results
10315

    
10316
    # instance data
10317
    instance_data = {}
10318
    for iinfo, beinfo in i_list:
10319
      nic_data = []
10320
      for nic in iinfo.nics:
10321
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10322
        nic_dict = {"mac": nic.mac,
10323
                    "ip": nic.ip,
10324
                    "mode": filled_params[constants.NIC_MODE],
10325
                    "link": filled_params[constants.NIC_LINK],
10326
                   }
10327
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10328
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10329
        nic_data.append(nic_dict)
10330
      pir = {
10331
        "tags": list(iinfo.GetTags()),
10332
        "admin_up": iinfo.admin_up,
10333
        "vcpus": beinfo[constants.BE_VCPUS],
10334
        "memory": beinfo[constants.BE_MEMORY],
10335
        "os": iinfo.os,
10336
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10337
        "nics": nic_data,
10338
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10339
        "disk_template": iinfo.disk_template,
10340
        "hypervisor": iinfo.hypervisor,
10341
        }
10342
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10343
                                                 pir["disks"])
10344
      instance_data[iinfo.name] = pir
10345

    
10346
    data["instances"] = instance_data
10347

    
10348
    self.in_data = data
10349

    
10350
  def _AddNewInstance(self):
10351
    """Add new instance data to allocator structure.
10352

10353
    This in combination with _AllocatorGetClusterData will create the
10354
    correct structure needed as input for the allocator.
10355

10356
    The checks for the completeness of the opcode must have already been
10357
    done.
10358

10359
    """
10360
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10361

    
10362
    if self.disk_template in constants.DTS_NET_MIRROR:
10363
      self.required_nodes = 2
10364
    else:
10365
      self.required_nodes = 1
10366
    request = {
10367
      "name": self.name,
10368
      "disk_template": self.disk_template,
10369
      "tags": self.tags,
10370
      "os": self.os,
10371
      "vcpus": self.vcpus,
10372
      "memory": self.mem_size,
10373
      "disks": self.disks,
10374
      "disk_space_total": disk_space,
10375
      "nics": self.nics,
10376
      "required_nodes": self.required_nodes,
10377
      }
10378
    return request
10379

    
10380
  def _AddRelocateInstance(self):
10381
    """Add relocate instance data to allocator structure.
10382

10383
    This in combination with _IAllocatorGetClusterData will create the
10384
    correct structure needed as input for the allocator.
10385

10386
    The checks for the completeness of the opcode must have already been
10387
    done.
10388

10389
    """
10390
    instance = self.cfg.GetInstanceInfo(self.name)
10391
    if instance is None:
10392
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
10393
                                   " IAllocator" % self.name)
10394

    
10395
    if instance.disk_template not in constants.DTS_NET_MIRROR:
10396
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10397
                                 errors.ECODE_INVAL)
10398

    
10399
    if len(instance.secondary_nodes) != 1:
10400
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
10401
                                 errors.ECODE_STATE)
10402

    
10403
    self.required_nodes = 1
10404
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
10405
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10406

    
10407
    request = {
10408
      "name": self.name,
10409
      "disk_space_total": disk_space,
10410
      "required_nodes": self.required_nodes,
10411
      "relocate_from": self.relocate_from,
10412
      }
10413
    return request
10414

    
10415
  def _AddEvacuateNodes(self):
10416
    """Add evacuate nodes data to allocator structure.
10417

10418
    """
10419
    request = {
10420
      "evac_nodes": self.evac_nodes
10421
      }
10422
    return request
10423

    
10424
  def _BuildInputData(self, fn):
10425
    """Build input data structures.
10426

10427
    """
10428
    self._ComputeClusterData()
10429

    
10430
    request = fn()
10431
    request["type"] = self.mode
10432
    self.in_data["request"] = request
10433

    
10434
    self.in_text = serializer.Dump(self.in_data)
10435

    
10436
  def Run(self, name, validate=True, call_fn=None):
10437
    """Run an instance allocator and return the results.
10438

10439
    """
10440
    if call_fn is None:
10441
      call_fn = self.rpc.call_iallocator_runner
10442

    
10443
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10444
    result.Raise("Failure while running the iallocator script")
10445

    
10446
    self.out_text = result.payload
10447
    if validate:
10448
      self._ValidateResult()
10449

    
10450
  def _ValidateResult(self):
10451
    """Process the allocator results.
10452

10453
    This will process and if successful save the result in
10454
    self.out_data and the other parameters.
10455

10456
    """
10457
    try:
10458
      rdict = serializer.Load(self.out_text)
10459
    except Exception, err:
10460
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10461

    
10462
    if not isinstance(rdict, dict):
10463
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10464

    
10465
    # TODO: remove backwards compatiblity in later versions
10466
    if "nodes" in rdict and "result" not in rdict:
10467
      rdict["result"] = rdict["nodes"]
10468
      del rdict["nodes"]
10469

    
10470
    for key in "success", "info", "result":
10471
      if key not in rdict:
10472
        raise errors.OpExecError("Can't parse iallocator results:"
10473
                                 " missing key '%s'" % key)
10474
      setattr(self, key, rdict[key])
10475

    
10476
    if not isinstance(rdict["result"], list):
10477
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10478
                               " is not a list")
10479
    self.out_data = rdict
10480

    
10481

    
10482
class LUTestAllocator(NoHooksLU):
10483
  """Run allocator tests.
10484

10485
  This LU runs the allocator tests
10486

10487
  """
10488
  _OP_PARAMS = [
10489
    ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10490
    ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10491
    ("name", _NoDefault, _TNonEmptyString),
10492
    ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10493
      _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10494
               _TOr(_TNone, _TNonEmptyString))))),
10495
    ("disks", _NoDefault, _TOr(_TNone, _TList)),
10496
    ("hypervisor", None, _TMaybeString),
10497
    ("allocator", None, _TMaybeString),
10498
    ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10499
    ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10500
    ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10501
    ("os", None, _TMaybeString),
10502
    ("disk_template", None, _TMaybeString),
10503
    ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10504
    ]
10505

    
10506
  def CheckPrereq(self):
10507
    """Check prerequisites.
10508

10509
    This checks the opcode parameters depending on the director and mode test.
10510

10511
    """
10512
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10513
      for attr in ["mem_size", "disks", "disk_template",
10514
                   "os", "tags", "nics", "vcpus"]:
10515
        if not hasattr(self.op, attr):
10516
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10517
                                     attr, errors.ECODE_INVAL)
10518
      iname = self.cfg.ExpandInstanceName(self.op.name)
10519
      if iname is not None:
10520
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10521
                                   iname, errors.ECODE_EXISTS)
10522
      if not isinstance(self.op.nics, list):
10523
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10524
                                   errors.ECODE_INVAL)
10525
      if not isinstance(self.op.disks, list):
10526
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10527
                                   errors.ECODE_INVAL)
10528
      for row in self.op.disks:
10529
        if (not isinstance(row, dict) or
10530
            "size" not in row or
10531
            not isinstance(row["size"], int) or
10532
            "mode" not in row or
10533
            row["mode"] not in ['r', 'w']):
10534
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10535
                                     " parameter", errors.ECODE_INVAL)
10536
      if self.op.hypervisor is None:
10537
        self.op.hypervisor = self.cfg.GetHypervisorType()
10538
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10539
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10540
      self.op.name = fname
10541
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10542
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10543
      if not hasattr(self.op, "evac_nodes"):
10544
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10545
                                   " opcode input", errors.ECODE_INVAL)
10546
    else:
10547
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10548
                                 self.op.mode, errors.ECODE_INVAL)
10549

    
10550
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10551
      if self.op.allocator is None:
10552
        raise errors.OpPrereqError("Missing allocator name",
10553
                                   errors.ECODE_INVAL)
10554
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10555
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10556
                                 self.op.direction, errors.ECODE_INVAL)
10557

    
10558
  def Exec(self, feedback_fn):
10559
    """Run the allocator test.
10560

10561
    """
10562
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10563
      ial = IAllocator(self.cfg, self.rpc,
10564
                       mode=self.op.mode,
10565
                       name=self.op.name,
10566
                       mem_size=self.op.mem_size,
10567
                       disks=self.op.disks,
10568
                       disk_template=self.op.disk_template,
10569
                       os=self.op.os,
10570
                       tags=self.op.tags,
10571
                       nics=self.op.nics,
10572
                       vcpus=self.op.vcpus,
10573
                       hypervisor=self.op.hypervisor,
10574
                       )
10575
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10576
      ial = IAllocator(self.cfg, self.rpc,
10577
                       mode=self.op.mode,
10578
                       name=self.op.name,
10579
                       relocate_from=list(self.relocate_from),
10580
                       )
10581
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10582
      ial = IAllocator(self.cfg, self.rpc,
10583
                       mode=self.op.mode,
10584
                       evac_nodes=self.op.evac_nodes)
10585
    else:
10586
      raise errors.ProgrammerError("Uncatched mode %s in"
10587
                                   " LUTestAllocator.Exec", self.op.mode)
10588

    
10589
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10590
      result = ial.in_text
10591
    else:
10592
      ial.Run(self.op.allocator, validate=False)
10593
      result = ial.out_text
10594
    return result