Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ e58f87a9

History | View | Annotate | Download (362.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42

    
43
from ganeti import ssh
44
from ganeti import utils
45
from ganeti import errors
46
from ganeti import hypervisor
47
from ganeti import locking
48
from ganeti import constants
49
from ganeti import objects
50
from ganeti import serializer
51
from ganeti import ssconf
52
from ganeti import uidpool
53
from ganeti import compat
54
from ganeti import masterd
55
from ganeti import netutils
56

    
57
import ganeti.masterd.instance # pylint: disable-msg=W0611
58

    
59

    
60
# Modifiable default values; need to define these here before the
61
# actual LUs
62

    
63
def _EmptyList():
64
  """Returns an empty list.
65

66
  """
67
  return []
68

    
69

    
70
def _EmptyDict():
71
  """Returns an empty dict.
72

73
  """
74
  return {}
75

    
76

    
77
#: The without-default default value
78
_NoDefault = object()
79

    
80

    
81
#: The no-type (value to complex to check it in the type system)
82
_NoType = object()
83

    
84

    
85
# Some basic types
86
def _TNotNone(val):
87
  """Checks if the given value is not None.
88

89
  """
90
  return val is not None
91

    
92

    
93
def _TNone(val):
94
  """Checks if the given value is None.
95

96
  """
97
  return val is None
98

    
99

    
100
def _TBool(val):
101
  """Checks if the given value is a boolean.
102

103
  """
104
  return isinstance(val, bool)
105

    
106

    
107
def _TInt(val):
108
  """Checks if the given value is an integer.
109

110
  """
111
  return isinstance(val, int)
112

    
113

    
114
def _TFloat(val):
115
  """Checks if the given value is a float.
116

117
  """
118
  return isinstance(val, float)
119

    
120

    
121
def _TString(val):
122
  """Checks if the given value is a string.
123

124
  """
125
  return isinstance(val, basestring)
126

    
127

    
128
def _TTrue(val):
129
  """Checks if a given value evaluates to a boolean True value.
130

131
  """
132
  return bool(val)
133

    
134

    
135
def _TElemOf(target_list):
136
  """Builds a function that checks if a given value is a member of a list.
137

138
  """
139
  return lambda val: val in target_list
140

    
141

    
142
# Container types
143
def _TList(val):
144
  """Checks if the given value is a list.
145

146
  """
147
  return isinstance(val, list)
148

    
149

    
150
def _TDict(val):
151
  """Checks if the given value is a dictionary.
152

153
  """
154
  return isinstance(val, dict)
155

    
156

    
157
# Combinator types
158
def _TAnd(*args):
159
  """Combine multiple functions using an AND operation.
160

161
  """
162
  def fn(val):
163
    return compat.all(t(val) for t in args)
164
  return fn
165

    
166

    
167
def _TOr(*args):
168
  """Combine multiple functions using an AND operation.
169

170
  """
171
  def fn(val):
172
    return compat.any(t(val) for t in args)
173
  return fn
174

    
175

    
176
# Type aliases
177

    
178
#: a non-empty string
179
_TNonEmptyString = _TAnd(_TString, _TTrue)
180

    
181

    
182
#: a maybe non-empty string
183
_TMaybeString = _TOr(_TNonEmptyString, _TNone)
184

    
185

    
186
#: a maybe boolean (bool or none)
187
_TMaybeBool = _TOr(_TBool, _TNone)
188

    
189

    
190
#: a positive integer
191
_TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
192

    
193
#: a strictly positive integer
194
_TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
195

    
196

    
197
def _TListOf(my_type):
198
  """Checks if a given value is a list with all elements of the same type.
199

200
  """
201
  return _TAnd(_TList,
202
               lambda lst: compat.all(my_type(v) for v in lst))
203

    
204

    
205
def _TDictOf(key_type, val_type):
206
  """Checks a dict type for the type of its key/values.
207

208
  """
209
  return _TAnd(_TDict,
210
               lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
211
                                and compat.all(val_type(v)
212
                                               for v in my_dict.values())))
213

    
214

    
215
# Common opcode attributes
216

    
217
#: output fields for a query operation
218
_POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
219

    
220

    
221
#: the shutdown timeout
222
_PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
223
                     _TPositiveInt)
224

    
225
#: the force parameter
226
_PForce = ("force", False, _TBool)
227

    
228
#: a required instance name (for single-instance LUs)
229
_PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
230

    
231

    
232
#: a required node name (for single-node LUs)
233
_PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
234

    
235

    
236
# End types
237
class LogicalUnit(object):
238
  """Logical Unit base class.
239

240
  Subclasses must follow these rules:
241
    - implement ExpandNames
242
    - implement CheckPrereq (except when tasklets are used)
243
    - implement Exec (except when tasklets are used)
244
    - implement BuildHooksEnv
245
    - redefine HPATH and HTYPE
246
    - optionally redefine their run requirements:
247
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
248

249
  Note that all commands require root permissions.
250

251
  @ivar dry_run_result: the value (if any) that will be returned to the caller
252
      in dry-run mode (signalled by opcode dry_run parameter)
253
  @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
254
      they should get if not already defined, and types they must match
255

256
  """
257
  HPATH = None
258
  HTYPE = None
259
  _OP_PARAMS = []
260
  REQ_BGL = True
261

    
262
  def __init__(self, processor, op, context, rpc):
263
    """Constructor for LogicalUnit.
264

265
    This needs to be overridden in derived classes in order to check op
266
    validity.
267

268
    """
269
    self.proc = processor
270
    self.op = op
271
    self.cfg = context.cfg
272
    self.context = context
273
    self.rpc = rpc
274
    # Dicts used to declare locking needs to mcpu
275
    self.needed_locks = None
276
    self.acquired_locks = {}
277
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
278
    self.add_locks = {}
279
    self.remove_locks = {}
280
    # Used to force good behavior when calling helper functions
281
    self.recalculate_locks = {}
282
    self.__ssh = None
283
    # logging
284
    self.Log = processor.Log # pylint: disable-msg=C0103
285
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
286
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
287
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
288
    # support for dry-run
289
    self.dry_run_result = None
290
    # support for generic debug attribute
291
    if (not hasattr(self.op, "debug_level") or
292
        not isinstance(self.op.debug_level, int)):
293
      self.op.debug_level = 0
294

    
295
    # Tasklets
296
    self.tasklets = None
297

    
298
    # The new kind-of-type-system
299
    op_id = self.op.OP_ID
300
    for attr_name, aval, test in self._OP_PARAMS:
301
      if not hasattr(op, attr_name):
302
        if aval == _NoDefault:
303
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
304
                                     (op_id, attr_name), errors.ECODE_INVAL)
305
        else:
306
          if callable(aval):
307
            dval = aval()
308
          else:
309
            dval = aval
310
          setattr(self.op, attr_name, dval)
311
      attr_val = getattr(op, attr_name)
312
      if test == _NoType:
313
        # no tests here
314
        continue
315
      if not callable(test):
316
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
317
                                     " given type is not a proper type (%s)" %
318
                                     (op_id, attr_name, test))
319
      if not test(attr_val):
320
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
321
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
322
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
323
                                   (op_id, attr_name), errors.ECODE_INVAL)
324

    
325
    self.CheckArguments()
326

    
327
  def __GetSSH(self):
328
    """Returns the SshRunner object
329

330
    """
331
    if not self.__ssh:
332
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
333
    return self.__ssh
334

    
335
  ssh = property(fget=__GetSSH)
336

    
337
  def CheckArguments(self):
338
    """Check syntactic validity for the opcode arguments.
339

340
    This method is for doing a simple syntactic check and ensure
341
    validity of opcode parameters, without any cluster-related
342
    checks. While the same can be accomplished in ExpandNames and/or
343
    CheckPrereq, doing these separate is better because:
344

345
      - ExpandNames is left as as purely a lock-related function
346
      - CheckPrereq is run after we have acquired locks (and possible
347
        waited for them)
348

349
    The function is allowed to change the self.op attribute so that
350
    later methods can no longer worry about missing parameters.
351

352
    """
353
    pass
354

    
355
  def ExpandNames(self):
356
    """Expand names for this LU.
357

358
    This method is called before starting to execute the opcode, and it should
359
    update all the parameters of the opcode to their canonical form (e.g. a
360
    short node name must be fully expanded after this method has successfully
361
    completed). This way locking, hooks, logging, ecc. can work correctly.
362

363
    LUs which implement this method must also populate the self.needed_locks
364
    member, as a dict with lock levels as keys, and a list of needed lock names
365
    as values. Rules:
366

367
      - use an empty dict if you don't need any lock
368
      - if you don't need any lock at a particular level omit that level
369
      - don't put anything for the BGL level
370
      - if you want all locks at a level use locking.ALL_SET as a value
371

372
    If you need to share locks (rather than acquire them exclusively) at one
373
    level you can modify self.share_locks, setting a true value (usually 1) for
374
    that level. By default locks are not shared.
375

376
    This function can also define a list of tasklets, which then will be
377
    executed in order instead of the usual LU-level CheckPrereq and Exec
378
    functions, if those are not defined by the LU.
379

380
    Examples::
381

382
      # Acquire all nodes and one instance
383
      self.needed_locks = {
384
        locking.LEVEL_NODE: locking.ALL_SET,
385
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
386
      }
387
      # Acquire just two nodes
388
      self.needed_locks = {
389
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
390
      }
391
      # Acquire no locks
392
      self.needed_locks = {} # No, you can't leave it to the default value None
393

394
    """
395
    # The implementation of this method is mandatory only if the new LU is
396
    # concurrent, so that old LUs don't need to be changed all at the same
397
    # time.
398
    if self.REQ_BGL:
399
      self.needed_locks = {} # Exclusive LUs don't need locks.
400
    else:
401
      raise NotImplementedError
402

    
403
  def DeclareLocks(self, level):
404
    """Declare LU locking needs for a level
405

406
    While most LUs can just declare their locking needs at ExpandNames time,
407
    sometimes there's the need to calculate some locks after having acquired
408
    the ones before. This function is called just before acquiring locks at a
409
    particular level, but after acquiring the ones at lower levels, and permits
410
    such calculations. It can be used to modify self.needed_locks, and by
411
    default it does nothing.
412

413
    This function is only called if you have something already set in
414
    self.needed_locks for the level.
415

416
    @param level: Locking level which is going to be locked
417
    @type level: member of ganeti.locking.LEVELS
418

419
    """
420

    
421
  def CheckPrereq(self):
422
    """Check prerequisites for this LU.
423

424
    This method should check that the prerequisites for the execution
425
    of this LU are fulfilled. It can do internode communication, but
426
    it should be idempotent - no cluster or system changes are
427
    allowed.
428

429
    The method should raise errors.OpPrereqError in case something is
430
    not fulfilled. Its return value is ignored.
431

432
    This method should also update all the parameters of the opcode to
433
    their canonical form if it hasn't been done by ExpandNames before.
434

435
    """
436
    if self.tasklets is not None:
437
      for (idx, tl) in enumerate(self.tasklets):
438
        logging.debug("Checking prerequisites for tasklet %s/%s",
439
                      idx + 1, len(self.tasklets))
440
        tl.CheckPrereq()
441
    else:
442
      pass
443

    
444
  def Exec(self, feedback_fn):
445
    """Execute the LU.
446

447
    This method should implement the actual work. It should raise
448
    errors.OpExecError for failures that are somewhat dealt with in
449
    code, or expected.
450

451
    """
452
    if self.tasklets is not None:
453
      for (idx, tl) in enumerate(self.tasklets):
454
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
455
        tl.Exec(feedback_fn)
456
    else:
457
      raise NotImplementedError
458

    
459
  def BuildHooksEnv(self):
460
    """Build hooks environment for this LU.
461

462
    This method should return a three-node tuple consisting of: a dict
463
    containing the environment that will be used for running the
464
    specific hook for this LU, a list of node names on which the hook
465
    should run before the execution, and a list of node names on which
466
    the hook should run after the execution.
467

468
    The keys of the dict must not have 'GANETI_' prefixed as this will
469
    be handled in the hooks runner. Also note additional keys will be
470
    added by the hooks runner. If the LU doesn't define any
471
    environment, an empty dict (and not None) should be returned.
472

473
    No nodes should be returned as an empty list (and not None).
474

475
    Note that if the HPATH for a LU class is None, this function will
476
    not be called.
477

478
    """
479
    raise NotImplementedError
480

    
481
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
482
    """Notify the LU about the results of its hooks.
483

484
    This method is called every time a hooks phase is executed, and notifies
485
    the Logical Unit about the hooks' result. The LU can then use it to alter
486
    its result based on the hooks.  By default the method does nothing and the
487
    previous result is passed back unchanged but any LU can define it if it
488
    wants to use the local cluster hook-scripts somehow.
489

490
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
491
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
492
    @param hook_results: the results of the multi-node hooks rpc call
493
    @param feedback_fn: function used send feedback back to the caller
494
    @param lu_result: the previous Exec result this LU had, or None
495
        in the PRE phase
496
    @return: the new Exec result, based on the previous result
497
        and hook results
498

499
    """
500
    # API must be kept, thus we ignore the unused argument and could
501
    # be a function warnings
502
    # pylint: disable-msg=W0613,R0201
503
    return lu_result
504

    
505
  def _ExpandAndLockInstance(self):
506
    """Helper function to expand and lock an instance.
507

508
    Many LUs that work on an instance take its name in self.op.instance_name
509
    and need to expand it and then declare the expanded name for locking. This
510
    function does it, and then updates self.op.instance_name to the expanded
511
    name. It also initializes needed_locks as a dict, if this hasn't been done
512
    before.
513

514
    """
515
    if self.needed_locks is None:
516
      self.needed_locks = {}
517
    else:
518
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
519
        "_ExpandAndLockInstance called with instance-level locks set"
520
    self.op.instance_name = _ExpandInstanceName(self.cfg,
521
                                                self.op.instance_name)
522
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
523

    
524
  def _LockInstancesNodes(self, primary_only=False):
525
    """Helper function to declare instances' nodes for locking.
526

527
    This function should be called after locking one or more instances to lock
528
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
529
    with all primary or secondary nodes for instances already locked and
530
    present in self.needed_locks[locking.LEVEL_INSTANCE].
531

532
    It should be called from DeclareLocks, and for safety only works if
533
    self.recalculate_locks[locking.LEVEL_NODE] is set.
534

535
    In the future it may grow parameters to just lock some instance's nodes, or
536
    to just lock primaries or secondary nodes, if needed.
537

538
    If should be called in DeclareLocks in a way similar to::
539

540
      if level == locking.LEVEL_NODE:
541
        self._LockInstancesNodes()
542

543
    @type primary_only: boolean
544
    @param primary_only: only lock primary nodes of locked instances
545

546
    """
547
    assert locking.LEVEL_NODE in self.recalculate_locks, \
548
      "_LockInstancesNodes helper function called with no nodes to recalculate"
549

    
550
    # TODO: check if we're really been called with the instance locks held
551

    
552
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
553
    # future we might want to have different behaviors depending on the value
554
    # of self.recalculate_locks[locking.LEVEL_NODE]
555
    wanted_nodes = []
556
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
557
      instance = self.context.cfg.GetInstanceInfo(instance_name)
558
      wanted_nodes.append(instance.primary_node)
559
      if not primary_only:
560
        wanted_nodes.extend(instance.secondary_nodes)
561

    
562
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
563
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
564
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
565
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
566

    
567
    del self.recalculate_locks[locking.LEVEL_NODE]
568

    
569

    
570
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
571
  """Simple LU which runs no hooks.
572

573
  This LU is intended as a parent for other LogicalUnits which will
574
  run no hooks, in order to reduce duplicate code.
575

576
  """
577
  HPATH = None
578
  HTYPE = None
579

    
580
  def BuildHooksEnv(self):
581
    """Empty BuildHooksEnv for NoHooksLu.
582

583
    This just raises an error.
584

585
    """
586
    assert False, "BuildHooksEnv called for NoHooksLUs"
587

    
588

    
589
class Tasklet:
590
  """Tasklet base class.
591

592
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
593
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
594
  tasklets know nothing about locks.
595

596
  Subclasses must follow these rules:
597
    - Implement CheckPrereq
598
    - Implement Exec
599

600
  """
601
  def __init__(self, lu):
602
    self.lu = lu
603

    
604
    # Shortcuts
605
    self.cfg = lu.cfg
606
    self.rpc = lu.rpc
607

    
608
  def CheckPrereq(self):
609
    """Check prerequisites for this tasklets.
610

611
    This method should check whether the prerequisites for the execution of
612
    this tasklet are fulfilled. It can do internode communication, but it
613
    should be idempotent - no cluster or system changes are allowed.
614

615
    The method should raise errors.OpPrereqError in case something is not
616
    fulfilled. Its return value is ignored.
617

618
    This method should also update all parameters to their canonical form if it
619
    hasn't been done before.
620

621
    """
622
    pass
623

    
624
  def Exec(self, feedback_fn):
625
    """Execute the tasklet.
626

627
    This method should implement the actual work. It should raise
628
    errors.OpExecError for failures that are somewhat dealt with in code, or
629
    expected.
630

631
    """
632
    raise NotImplementedError
633

    
634

    
635
def _GetWantedNodes(lu, nodes):
636
  """Returns list of checked and expanded node names.
637

638
  @type lu: L{LogicalUnit}
639
  @param lu: the logical unit on whose behalf we execute
640
  @type nodes: list
641
  @param nodes: list of node names or None for all nodes
642
  @rtype: list
643
  @return: the list of nodes, sorted
644
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
645

646
  """
647
  if not nodes:
648
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
649
      " non-empty list of nodes whose name is to be expanded.")
650

    
651
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
652
  return utils.NiceSort(wanted)
653

    
654

    
655
def _GetWantedInstances(lu, instances):
656
  """Returns list of checked and expanded instance names.
657

658
  @type lu: L{LogicalUnit}
659
  @param lu: the logical unit on whose behalf we execute
660
  @type instances: list
661
  @param instances: list of instance names or None for all instances
662
  @rtype: list
663
  @return: the list of instances, sorted
664
  @raise errors.OpPrereqError: if the instances parameter is wrong type
665
  @raise errors.OpPrereqError: if any of the passed instances is not found
666

667
  """
668
  if instances:
669
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
670
  else:
671
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
672
  return wanted
673

    
674

    
675
def _GetUpdatedParams(old_params, update_dict,
676
                      use_default=True, use_none=False):
677
  """Return the new version of a parameter dictionary.
678

679
  @type old_params: dict
680
  @param old_params: old parameters
681
  @type update_dict: dict
682
  @param update_dict: dict containing new parameter values, or
683
      constants.VALUE_DEFAULT to reset the parameter to its default
684
      value
685
  @param use_default: boolean
686
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
687
      values as 'to be deleted' values
688
  @param use_none: boolean
689
  @type use_none: whether to recognise C{None} values as 'to be
690
      deleted' values
691
  @rtype: dict
692
  @return: the new parameter dictionary
693

694
  """
695
  params_copy = copy.deepcopy(old_params)
696
  for key, val in update_dict.iteritems():
697
    if ((use_default and val == constants.VALUE_DEFAULT) or
698
        (use_none and val is None)):
699
      try:
700
        del params_copy[key]
701
      except KeyError:
702
        pass
703
    else:
704
      params_copy[key] = val
705
  return params_copy
706

    
707

    
708
def _CheckOutputFields(static, dynamic, selected):
709
  """Checks whether all selected fields are valid.
710

711
  @type static: L{utils.FieldSet}
712
  @param static: static fields set
713
  @type dynamic: L{utils.FieldSet}
714
  @param dynamic: dynamic fields set
715

716
  """
717
  f = utils.FieldSet()
718
  f.Extend(static)
719
  f.Extend(dynamic)
720

    
721
  delta = f.NonMatching(selected)
722
  if delta:
723
    raise errors.OpPrereqError("Unknown output fields selected: %s"
724
                               % ",".join(delta), errors.ECODE_INVAL)
725

    
726

    
727
def _CheckGlobalHvParams(params):
728
  """Validates that given hypervisor params are not global ones.
729

730
  This will ensure that instances don't get customised versions of
731
  global params.
732

733
  """
734
  used_globals = constants.HVC_GLOBALS.intersection(params)
735
  if used_globals:
736
    msg = ("The following hypervisor parameters are global and cannot"
737
           " be customized at instance level, please modify them at"
738
           " cluster level: %s" % utils.CommaJoin(used_globals))
739
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
740

    
741

    
742
def _CheckNodeOnline(lu, node):
743
  """Ensure that a given node is online.
744

745
  @param lu: the LU on behalf of which we make the check
746
  @param node: the node to check
747
  @raise errors.OpPrereqError: if the node is offline
748

749
  """
750
  if lu.cfg.GetNodeInfo(node).offline:
751
    raise errors.OpPrereqError("Can't use offline node %s" % node,
752
                               errors.ECODE_INVAL)
753

    
754

    
755
def _CheckNodeNotDrained(lu, node):
756
  """Ensure that a given node is not drained.
757

758
  @param lu: the LU on behalf of which we make the check
759
  @param node: the node to check
760
  @raise errors.OpPrereqError: if the node is drained
761

762
  """
763
  if lu.cfg.GetNodeInfo(node).drained:
764
    raise errors.OpPrereqError("Can't use drained node %s" % node,
765
                               errors.ECODE_INVAL)
766

    
767

    
768
def _CheckNodeHasOS(lu, node, os_name, force_variant):
769
  """Ensure that a node supports a given OS.
770

771
  @param lu: the LU on behalf of which we make the check
772
  @param node: the node to check
773
  @param os_name: the OS to query about
774
  @param force_variant: whether to ignore variant errors
775
  @raise errors.OpPrereqError: if the node is not supporting the OS
776

777
  """
778
  result = lu.rpc.call_os_get(node, os_name)
779
  result.Raise("OS '%s' not in supported OS list for node %s" %
780
               (os_name, node),
781
               prereq=True, ecode=errors.ECODE_INVAL)
782
  if not force_variant:
783
    _CheckOSVariant(result.payload, os_name)
784

    
785

    
786
def _RequireFileStorage():
787
  """Checks that file storage is enabled.
788

789
  @raise errors.OpPrereqError: when file storage is disabled
790

791
  """
792
  if not constants.ENABLE_FILE_STORAGE:
793
    raise errors.OpPrereqError("File storage disabled at configure time",
794
                               errors.ECODE_INVAL)
795

    
796

    
797
def _CheckDiskTemplate(template):
798
  """Ensure a given disk template is valid.
799

800
  """
801
  if template not in constants.DISK_TEMPLATES:
802
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
803
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
804
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
805
  if template == constants.DT_FILE:
806
    _RequireFileStorage()
807
  return True
808

    
809

    
810
def _CheckStorageType(storage_type):
811
  """Ensure a given storage type is valid.
812

813
  """
814
  if storage_type not in constants.VALID_STORAGE_TYPES:
815
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
816
                               errors.ECODE_INVAL)
817
  if storage_type == constants.ST_FILE:
818
    _RequireFileStorage()
819
  return True
820

    
821

    
822
def _GetClusterDomainSecret():
823
  """Reads the cluster domain secret.
824

825
  """
826
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
827
                               strict=True)
828

    
829

    
830
def _CheckInstanceDown(lu, instance, reason):
831
  """Ensure that an instance is not running."""
832
  if instance.admin_up:
833
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
834
                               (instance.name, reason), errors.ECODE_STATE)
835

    
836
  pnode = instance.primary_node
837
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
838
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
839
              prereq=True, ecode=errors.ECODE_ENVIRON)
840

    
841
  if instance.name in ins_l.payload:
842
    raise errors.OpPrereqError("Instance %s is running, %s" %
843
                               (instance.name, reason), errors.ECODE_STATE)
844

    
845

    
846
def _ExpandItemName(fn, name, kind):
847
  """Expand an item name.
848

849
  @param fn: the function to use for expansion
850
  @param name: requested item name
851
  @param kind: text description ('Node' or 'Instance')
852
  @return: the resolved (full) name
853
  @raise errors.OpPrereqError: if the item is not found
854

855
  """
856
  full_name = fn(name)
857
  if full_name is None:
858
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
859
                               errors.ECODE_NOENT)
860
  return full_name
861

    
862

    
863
def _ExpandNodeName(cfg, name):
864
  """Wrapper over L{_ExpandItemName} for nodes."""
865
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
866

    
867

    
868
def _ExpandInstanceName(cfg, name):
869
  """Wrapper over L{_ExpandItemName} for instance."""
870
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
871

    
872

    
873
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
874
                          memory, vcpus, nics, disk_template, disks,
875
                          bep, hvp, hypervisor_name):
876
  """Builds instance related env variables for hooks
877

878
  This builds the hook environment from individual variables.
879

880
  @type name: string
881
  @param name: the name of the instance
882
  @type primary_node: string
883
  @param primary_node: the name of the instance's primary node
884
  @type secondary_nodes: list
885
  @param secondary_nodes: list of secondary nodes as strings
886
  @type os_type: string
887
  @param os_type: the name of the instance's OS
888
  @type status: boolean
889
  @param status: the should_run status of the instance
890
  @type memory: string
891
  @param memory: the memory size of the instance
892
  @type vcpus: string
893
  @param vcpus: the count of VCPUs the instance has
894
  @type nics: list
895
  @param nics: list of tuples (ip, mac, mode, link) representing
896
      the NICs the instance has
897
  @type disk_template: string
898
  @param disk_template: the disk template of the instance
899
  @type disks: list
900
  @param disks: the list of (size, mode) pairs
901
  @type bep: dict
902
  @param bep: the backend parameters for the instance
903
  @type hvp: dict
904
  @param hvp: the hypervisor parameters for the instance
905
  @type hypervisor_name: string
906
  @param hypervisor_name: the hypervisor for the instance
907
  @rtype: dict
908
  @return: the hook environment for this instance
909

910
  """
911
  if status:
912
    str_status = "up"
913
  else:
914
    str_status = "down"
915
  env = {
916
    "OP_TARGET": name,
917
    "INSTANCE_NAME": name,
918
    "INSTANCE_PRIMARY": primary_node,
919
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
920
    "INSTANCE_OS_TYPE": os_type,
921
    "INSTANCE_STATUS": str_status,
922
    "INSTANCE_MEMORY": memory,
923
    "INSTANCE_VCPUS": vcpus,
924
    "INSTANCE_DISK_TEMPLATE": disk_template,
925
    "INSTANCE_HYPERVISOR": hypervisor_name,
926
  }
927

    
928
  if nics:
929
    nic_count = len(nics)
930
    for idx, (ip, mac, mode, link) in enumerate(nics):
931
      if ip is None:
932
        ip = ""
933
      env["INSTANCE_NIC%d_IP" % idx] = ip
934
      env["INSTANCE_NIC%d_MAC" % idx] = mac
935
      env["INSTANCE_NIC%d_MODE" % idx] = mode
936
      env["INSTANCE_NIC%d_LINK" % idx] = link
937
      if mode == constants.NIC_MODE_BRIDGED:
938
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
939
  else:
940
    nic_count = 0
941

    
942
  env["INSTANCE_NIC_COUNT"] = nic_count
943

    
944
  if disks:
945
    disk_count = len(disks)
946
    for idx, (size, mode) in enumerate(disks):
947
      env["INSTANCE_DISK%d_SIZE" % idx] = size
948
      env["INSTANCE_DISK%d_MODE" % idx] = mode
949
  else:
950
    disk_count = 0
951

    
952
  env["INSTANCE_DISK_COUNT"] = disk_count
953

    
954
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
955
    for key, value in source.items():
956
      env["INSTANCE_%s_%s" % (kind, key)] = value
957

    
958
  return env
959

    
960

    
961
def _NICListToTuple(lu, nics):
962
  """Build a list of nic information tuples.
963

964
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
965
  value in LUQueryInstanceData.
966

967
  @type lu:  L{LogicalUnit}
968
  @param lu: the logical unit on whose behalf we execute
969
  @type nics: list of L{objects.NIC}
970
  @param nics: list of nics to convert to hooks tuples
971

972
  """
973
  hooks_nics = []
974
  cluster = lu.cfg.GetClusterInfo()
975
  for nic in nics:
976
    ip = nic.ip
977
    mac = nic.mac
978
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
979
    mode = filled_params[constants.NIC_MODE]
980
    link = filled_params[constants.NIC_LINK]
981
    hooks_nics.append((ip, mac, mode, link))
982
  return hooks_nics
983

    
984

    
985
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
986
  """Builds instance related env variables for hooks from an object.
987

988
  @type lu: L{LogicalUnit}
989
  @param lu: the logical unit on whose behalf we execute
990
  @type instance: L{objects.Instance}
991
  @param instance: the instance for which we should build the
992
      environment
993
  @type override: dict
994
  @param override: dictionary with key/values that will override
995
      our values
996
  @rtype: dict
997
  @return: the hook environment dictionary
998

999
  """
1000
  cluster = lu.cfg.GetClusterInfo()
1001
  bep = cluster.FillBE(instance)
1002
  hvp = cluster.FillHV(instance)
1003
  args = {
1004
    'name': instance.name,
1005
    'primary_node': instance.primary_node,
1006
    'secondary_nodes': instance.secondary_nodes,
1007
    'os_type': instance.os,
1008
    'status': instance.admin_up,
1009
    'memory': bep[constants.BE_MEMORY],
1010
    'vcpus': bep[constants.BE_VCPUS],
1011
    'nics': _NICListToTuple(lu, instance.nics),
1012
    'disk_template': instance.disk_template,
1013
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1014
    'bep': bep,
1015
    'hvp': hvp,
1016
    'hypervisor_name': instance.hypervisor,
1017
  }
1018
  if override:
1019
    args.update(override)
1020
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1021

    
1022

    
1023
def _AdjustCandidatePool(lu, exceptions):
1024
  """Adjust the candidate pool after node operations.
1025

1026
  """
1027
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1028
  if mod_list:
1029
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1030
               utils.CommaJoin(node.name for node in mod_list))
1031
    for name in mod_list:
1032
      lu.context.ReaddNode(name)
1033
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1034
  if mc_now > mc_max:
1035
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1036
               (mc_now, mc_max))
1037

    
1038

    
1039
def _DecideSelfPromotion(lu, exceptions=None):
1040
  """Decide whether I should promote myself as a master candidate.
1041

1042
  """
1043
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1044
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1045
  # the new node will increase mc_max with one, so:
1046
  mc_should = min(mc_should + 1, cp_size)
1047
  return mc_now < mc_should
1048

    
1049

    
1050
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1051
  """Check that the brigdes needed by a list of nics exist.
1052

1053
  """
1054
  cluster = lu.cfg.GetClusterInfo()
1055
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1056
  brlist = [params[constants.NIC_LINK] for params in paramslist
1057
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1058
  if brlist:
1059
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1060
    result.Raise("Error checking bridges on destination node '%s'" %
1061
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1062

    
1063

    
1064
def _CheckInstanceBridgesExist(lu, instance, node=None):
1065
  """Check that the brigdes needed by an instance exist.
1066

1067
  """
1068
  if node is None:
1069
    node = instance.primary_node
1070
  _CheckNicsBridgesExist(lu, instance.nics, node)
1071

    
1072

    
1073
def _CheckOSVariant(os_obj, name):
1074
  """Check whether an OS name conforms to the os variants specification.
1075

1076
  @type os_obj: L{objects.OS}
1077
  @param os_obj: OS object to check
1078
  @type name: string
1079
  @param name: OS name passed by the user, to check for validity
1080

1081
  """
1082
  if not os_obj.supported_variants:
1083
    return
1084
  try:
1085
    variant = name.split("+", 1)[1]
1086
  except IndexError:
1087
    raise errors.OpPrereqError("OS name must include a variant",
1088
                               errors.ECODE_INVAL)
1089

    
1090
  if variant not in os_obj.supported_variants:
1091
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1092

    
1093

    
1094
def _GetNodeInstancesInner(cfg, fn):
1095
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1096

    
1097

    
1098
def _GetNodeInstances(cfg, node_name):
1099
  """Returns a list of all primary and secondary instances on a node.
1100

1101
  """
1102

    
1103
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1104

    
1105

    
1106
def _GetNodePrimaryInstances(cfg, node_name):
1107
  """Returns primary instances on a node.
1108

1109
  """
1110
  return _GetNodeInstancesInner(cfg,
1111
                                lambda inst: node_name == inst.primary_node)
1112

    
1113

    
1114
def _GetNodeSecondaryInstances(cfg, node_name):
1115
  """Returns secondary instances on a node.
1116

1117
  """
1118
  return _GetNodeInstancesInner(cfg,
1119
                                lambda inst: node_name in inst.secondary_nodes)
1120

    
1121

    
1122
def _GetStorageTypeArgs(cfg, storage_type):
1123
  """Returns the arguments for a storage type.
1124

1125
  """
1126
  # Special case for file storage
1127
  if storage_type == constants.ST_FILE:
1128
    # storage.FileStorage wants a list of storage directories
1129
    return [[cfg.GetFileStorageDir()]]
1130

    
1131
  return []
1132

    
1133

    
1134
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1135
  faulty = []
1136

    
1137
  for dev in instance.disks:
1138
    cfg.SetDiskID(dev, node_name)
1139

    
1140
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1141
  result.Raise("Failed to get disk status from node %s" % node_name,
1142
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1143

    
1144
  for idx, bdev_status in enumerate(result.payload):
1145
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1146
      faulty.append(idx)
1147

    
1148
  return faulty
1149

    
1150

    
1151
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1152
  """Check the sanity of iallocator and node arguments and use the
1153
  cluster-wide iallocator if appropriate.
1154

1155
  Check that at most one of (iallocator, node) is specified. If none is
1156
  specified, then the LU's opcode's iallocator slot is filled with the
1157
  cluster-wide default iallocator.
1158

1159
  @type iallocator_slot: string
1160
  @param iallocator_slot: the name of the opcode iallocator slot
1161
  @type node_slot: string
1162
  @param node_slot: the name of the opcode target node slot
1163

1164
  """
1165
  node = getattr(lu.op, node_slot, None)
1166
  iallocator = getattr(lu.op, iallocator_slot, None)
1167

    
1168
  if node is not None and iallocator is not None:
1169
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1170
                               errors.ECODE_INVAL)
1171
  elif node is None and iallocator is None:
1172
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1173
    if default_iallocator:
1174
      setattr(lu.op, iallocator_slot, default_iallocator)
1175
    else:
1176
      raise errors.OpPrereqError("No iallocator or node given and no"
1177
                                 " cluster-wide default iallocator found."
1178
                                 " Please specify either an iallocator or a"
1179
                                 " node, or set a cluster-wide default"
1180
                                 " iallocator.")
1181

    
1182

    
1183
class LUPostInitCluster(LogicalUnit):
1184
  """Logical unit for running hooks after cluster initialization.
1185

1186
  """
1187
  HPATH = "cluster-init"
1188
  HTYPE = constants.HTYPE_CLUSTER
1189

    
1190
  def BuildHooksEnv(self):
1191
    """Build hooks env.
1192

1193
    """
1194
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1195
    mn = self.cfg.GetMasterNode()
1196
    return env, [], [mn]
1197

    
1198
  def Exec(self, feedback_fn):
1199
    """Nothing to do.
1200

1201
    """
1202
    return True
1203

    
1204

    
1205
class LUDestroyCluster(LogicalUnit):
1206
  """Logical unit for destroying the cluster.
1207

1208
  """
1209
  HPATH = "cluster-destroy"
1210
  HTYPE = constants.HTYPE_CLUSTER
1211

    
1212
  def BuildHooksEnv(self):
1213
    """Build hooks env.
1214

1215
    """
1216
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1217
    return env, [], []
1218

    
1219
  def CheckPrereq(self):
1220
    """Check prerequisites.
1221

1222
    This checks whether the cluster is empty.
1223

1224
    Any errors are signaled by raising errors.OpPrereqError.
1225

1226
    """
1227
    master = self.cfg.GetMasterNode()
1228

    
1229
    nodelist = self.cfg.GetNodeList()
1230
    if len(nodelist) != 1 or nodelist[0] != master:
1231
      raise errors.OpPrereqError("There are still %d node(s) in"
1232
                                 " this cluster." % (len(nodelist) - 1),
1233
                                 errors.ECODE_INVAL)
1234
    instancelist = self.cfg.GetInstanceList()
1235
    if instancelist:
1236
      raise errors.OpPrereqError("There are still %d instance(s) in"
1237
                                 " this cluster." % len(instancelist),
1238
                                 errors.ECODE_INVAL)
1239

    
1240
  def Exec(self, feedback_fn):
1241
    """Destroys the cluster.
1242

1243
    """
1244
    master = self.cfg.GetMasterNode()
1245
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1246

    
1247
    # Run post hooks on master node before it's removed
1248
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1249
    try:
1250
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1251
    except:
1252
      # pylint: disable-msg=W0702
1253
      self.LogWarning("Errors occurred running hooks on %s" % master)
1254

    
1255
    result = self.rpc.call_node_stop_master(master, False)
1256
    result.Raise("Could not disable the master role")
1257

    
1258
    if modify_ssh_setup:
1259
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1260
      utils.CreateBackup(priv_key)
1261
      utils.CreateBackup(pub_key)
1262

    
1263
    return master
1264

    
1265

    
1266
def _VerifyCertificate(filename):
1267
  """Verifies a certificate for LUVerifyCluster.
1268

1269
  @type filename: string
1270
  @param filename: Path to PEM file
1271

1272
  """
1273
  try:
1274
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1275
                                           utils.ReadFile(filename))
1276
  except Exception, err: # pylint: disable-msg=W0703
1277
    return (LUVerifyCluster.ETYPE_ERROR,
1278
            "Failed to load X509 certificate %s: %s" % (filename, err))
1279

    
1280
  (errcode, msg) = \
1281
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1282
                                constants.SSL_CERT_EXPIRATION_ERROR)
1283

    
1284
  if msg:
1285
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1286
  else:
1287
    fnamemsg = None
1288

    
1289
  if errcode is None:
1290
    return (None, fnamemsg)
1291
  elif errcode == utils.CERT_WARNING:
1292
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1293
  elif errcode == utils.CERT_ERROR:
1294
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1295

    
1296
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1297

    
1298

    
1299
class LUVerifyCluster(LogicalUnit):
1300
  """Verifies the cluster status.
1301

1302
  """
1303
  HPATH = "cluster-verify"
1304
  HTYPE = constants.HTYPE_CLUSTER
1305
  _OP_PARAMS = [
1306
    ("skip_checks", _EmptyList,
1307
     _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1308
    ("verbose", False, _TBool),
1309
    ("error_codes", False, _TBool),
1310
    ("debug_simulate_errors", False, _TBool),
1311
    ]
1312
  REQ_BGL = False
1313

    
1314
  TCLUSTER = "cluster"
1315
  TNODE = "node"
1316
  TINSTANCE = "instance"
1317

    
1318
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1319
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1320
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1321
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1322
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1323
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1324
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1325
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1326
  ENODEDRBD = (TNODE, "ENODEDRBD")
1327
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1328
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1329
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1330
  ENODEHV = (TNODE, "ENODEHV")
1331
  ENODELVM = (TNODE, "ENODELVM")
1332
  ENODEN1 = (TNODE, "ENODEN1")
1333
  ENODENET = (TNODE, "ENODENET")
1334
  ENODEOS = (TNODE, "ENODEOS")
1335
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1336
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1337
  ENODERPC = (TNODE, "ENODERPC")
1338
  ENODESSH = (TNODE, "ENODESSH")
1339
  ENODEVERSION = (TNODE, "ENODEVERSION")
1340
  ENODESETUP = (TNODE, "ENODESETUP")
1341
  ENODETIME = (TNODE, "ENODETIME")
1342

    
1343
  ETYPE_FIELD = "code"
1344
  ETYPE_ERROR = "ERROR"
1345
  ETYPE_WARNING = "WARNING"
1346

    
1347
  class NodeImage(object):
1348
    """A class representing the logical and physical status of a node.
1349

1350
    @type name: string
1351
    @ivar name: the node name to which this object refers
1352
    @ivar volumes: a structure as returned from
1353
        L{ganeti.backend.GetVolumeList} (runtime)
1354
    @ivar instances: a list of running instances (runtime)
1355
    @ivar pinst: list of configured primary instances (config)
1356
    @ivar sinst: list of configured secondary instances (config)
1357
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1358
        of this node (config)
1359
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1360
    @ivar dfree: free disk, as reported by the node (runtime)
1361
    @ivar offline: the offline status (config)
1362
    @type rpc_fail: boolean
1363
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1364
        not whether the individual keys were correct) (runtime)
1365
    @type lvm_fail: boolean
1366
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1367
    @type hyp_fail: boolean
1368
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1369
    @type ghost: boolean
1370
    @ivar ghost: whether this is a known node or not (config)
1371
    @type os_fail: boolean
1372
    @ivar os_fail: whether the RPC call didn't return valid OS data
1373
    @type oslist: list
1374
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1375

1376
    """
1377
    def __init__(self, offline=False, name=None):
1378
      self.name = name
1379
      self.volumes = {}
1380
      self.instances = []
1381
      self.pinst = []
1382
      self.sinst = []
1383
      self.sbp = {}
1384
      self.mfree = 0
1385
      self.dfree = 0
1386
      self.offline = offline
1387
      self.rpc_fail = False
1388
      self.lvm_fail = False
1389
      self.hyp_fail = False
1390
      self.ghost = False
1391
      self.os_fail = False
1392
      self.oslist = {}
1393

    
1394
  def ExpandNames(self):
1395
    self.needed_locks = {
1396
      locking.LEVEL_NODE: locking.ALL_SET,
1397
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1398
    }
1399
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1400

    
1401
  def _Error(self, ecode, item, msg, *args, **kwargs):
1402
    """Format an error message.
1403

1404
    Based on the opcode's error_codes parameter, either format a
1405
    parseable error code, or a simpler error string.
1406

1407
    This must be called only from Exec and functions called from Exec.
1408

1409
    """
1410
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1411
    itype, etxt = ecode
1412
    # first complete the msg
1413
    if args:
1414
      msg = msg % args
1415
    # then format the whole message
1416
    if self.op.error_codes:
1417
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1418
    else:
1419
      if item:
1420
        item = " " + item
1421
      else:
1422
        item = ""
1423
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1424
    # and finally report it via the feedback_fn
1425
    self._feedback_fn("  - %s" % msg)
1426

    
1427
  def _ErrorIf(self, cond, *args, **kwargs):
1428
    """Log an error message if the passed condition is True.
1429

1430
    """
1431
    cond = bool(cond) or self.op.debug_simulate_errors
1432
    if cond:
1433
      self._Error(*args, **kwargs)
1434
    # do not mark the operation as failed for WARN cases only
1435
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1436
      self.bad = self.bad or cond
1437

    
1438
  def _VerifyNode(self, ninfo, nresult):
1439
    """Perform some basic validation on data returned from a node.
1440

1441
    - check the result data structure is well formed and has all the mandatory
1442
      fields
1443
    - check ganeti version
1444

1445
    @type ninfo: L{objects.Node}
1446
    @param ninfo: the node to check
1447
    @param nresult: the results from the node
1448
    @rtype: boolean
1449
    @return: whether overall this call was successful (and we can expect
1450
         reasonable values in the respose)
1451

1452
    """
1453
    node = ninfo.name
1454
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1455

    
1456
    # main result, nresult should be a non-empty dict
1457
    test = not nresult or not isinstance(nresult, dict)
1458
    _ErrorIf(test, self.ENODERPC, node,
1459
                  "unable to verify node: no data returned")
1460
    if test:
1461
      return False
1462

    
1463
    # compares ganeti version
1464
    local_version = constants.PROTOCOL_VERSION
1465
    remote_version = nresult.get("version", None)
1466
    test = not (remote_version and
1467
                isinstance(remote_version, (list, tuple)) and
1468
                len(remote_version) == 2)
1469
    _ErrorIf(test, self.ENODERPC, node,
1470
             "connection to node returned invalid data")
1471
    if test:
1472
      return False
1473

    
1474
    test = local_version != remote_version[0]
1475
    _ErrorIf(test, self.ENODEVERSION, node,
1476
             "incompatible protocol versions: master %s,"
1477
             " node %s", local_version, remote_version[0])
1478
    if test:
1479
      return False
1480

    
1481
    # node seems compatible, we can actually try to look into its results
1482

    
1483
    # full package version
1484
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1485
                  self.ENODEVERSION, node,
1486
                  "software version mismatch: master %s, node %s",
1487
                  constants.RELEASE_VERSION, remote_version[1],
1488
                  code=self.ETYPE_WARNING)
1489

    
1490
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1491
    if isinstance(hyp_result, dict):
1492
      for hv_name, hv_result in hyp_result.iteritems():
1493
        test = hv_result is not None
1494
        _ErrorIf(test, self.ENODEHV, node,
1495
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1496

    
1497

    
1498
    test = nresult.get(constants.NV_NODESETUP,
1499
                           ["Missing NODESETUP results"])
1500
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1501
             "; ".join(test))
1502

    
1503
    return True
1504

    
1505
  def _VerifyNodeTime(self, ninfo, nresult,
1506
                      nvinfo_starttime, nvinfo_endtime):
1507
    """Check the node time.
1508

1509
    @type ninfo: L{objects.Node}
1510
    @param ninfo: the node to check
1511
    @param nresult: the remote results for the node
1512
    @param nvinfo_starttime: the start time of the RPC call
1513
    @param nvinfo_endtime: the end time of the RPC call
1514

1515
    """
1516
    node = ninfo.name
1517
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1518

    
1519
    ntime = nresult.get(constants.NV_TIME, None)
1520
    try:
1521
      ntime_merged = utils.MergeTime(ntime)
1522
    except (ValueError, TypeError):
1523
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1524
      return
1525

    
1526
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1527
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1528
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1529
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1530
    else:
1531
      ntime_diff = None
1532

    
1533
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1534
             "Node time diverges by at least %s from master node time",
1535
             ntime_diff)
1536

    
1537
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1538
    """Check the node time.
1539

1540
    @type ninfo: L{objects.Node}
1541
    @param ninfo: the node to check
1542
    @param nresult: the remote results for the node
1543
    @param vg_name: the configured VG name
1544

1545
    """
1546
    if vg_name is None:
1547
      return
1548

    
1549
    node = ninfo.name
1550
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1551

    
1552
    # checks vg existence and size > 20G
1553
    vglist = nresult.get(constants.NV_VGLIST, None)
1554
    test = not vglist
1555
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1556
    if not test:
1557
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1558
                                            constants.MIN_VG_SIZE)
1559
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1560

    
1561
    # check pv names
1562
    pvlist = nresult.get(constants.NV_PVLIST, None)
1563
    test = pvlist is None
1564
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1565
    if not test:
1566
      # check that ':' is not present in PV names, since it's a
1567
      # special character for lvcreate (denotes the range of PEs to
1568
      # use on the PV)
1569
      for _, pvname, owner_vg in pvlist:
1570
        test = ":" in pvname
1571
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1572
                 " '%s' of VG '%s'", pvname, owner_vg)
1573

    
1574
  def _VerifyNodeNetwork(self, ninfo, nresult):
1575
    """Check the node time.
1576

1577
    @type ninfo: L{objects.Node}
1578
    @param ninfo: the node to check
1579
    @param nresult: the remote results for the node
1580

1581
    """
1582
    node = ninfo.name
1583
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1584

    
1585
    test = constants.NV_NODELIST not in nresult
1586
    _ErrorIf(test, self.ENODESSH, node,
1587
             "node hasn't returned node ssh connectivity data")
1588
    if not test:
1589
      if nresult[constants.NV_NODELIST]:
1590
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1591
          _ErrorIf(True, self.ENODESSH, node,
1592
                   "ssh communication with node '%s': %s", a_node, a_msg)
1593

    
1594
    test = constants.NV_NODENETTEST not in nresult
1595
    _ErrorIf(test, self.ENODENET, node,
1596
             "node hasn't returned node tcp connectivity data")
1597
    if not test:
1598
      if nresult[constants.NV_NODENETTEST]:
1599
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1600
        for anode in nlist:
1601
          _ErrorIf(True, self.ENODENET, node,
1602
                   "tcp communication with node '%s': %s",
1603
                   anode, nresult[constants.NV_NODENETTEST][anode])
1604

    
1605
    test = constants.NV_MASTERIP not in nresult
1606
    _ErrorIf(test, self.ENODENET, node,
1607
             "node hasn't returned node master IP reachability data")
1608
    if not test:
1609
      if not nresult[constants.NV_MASTERIP]:
1610
        if node == self.master_node:
1611
          msg = "the master node cannot reach the master IP (not configured?)"
1612
        else:
1613
          msg = "cannot reach the master IP"
1614
        _ErrorIf(True, self.ENODENET, node, msg)
1615

    
1616

    
1617
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1618
    """Verify an instance.
1619

1620
    This function checks to see if the required block devices are
1621
    available on the instance's node.
1622

1623
    """
1624
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1625
    node_current = instanceconfig.primary_node
1626

    
1627
    node_vol_should = {}
1628
    instanceconfig.MapLVsByNode(node_vol_should)
1629

    
1630
    for node in node_vol_should:
1631
      n_img = node_image[node]
1632
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1633
        # ignore missing volumes on offline or broken nodes
1634
        continue
1635
      for volume in node_vol_should[node]:
1636
        test = volume not in n_img.volumes
1637
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1638
                 "volume %s missing on node %s", volume, node)
1639

    
1640
    if instanceconfig.admin_up:
1641
      pri_img = node_image[node_current]
1642
      test = instance not in pri_img.instances and not pri_img.offline
1643
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1644
               "instance not running on its primary node %s",
1645
               node_current)
1646

    
1647
    for node, n_img in node_image.items():
1648
      if (not node == node_current):
1649
        test = instance in n_img.instances
1650
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1651
                 "instance should not run on node %s", node)
1652

    
1653
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1654
    """Verify if there are any unknown volumes in the cluster.
1655

1656
    The .os, .swap and backup volumes are ignored. All other volumes are
1657
    reported as unknown.
1658

1659
    """
1660
    for node, n_img in node_image.items():
1661
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1662
        # skip non-healthy nodes
1663
        continue
1664
      for volume in n_img.volumes:
1665
        test = (node not in node_vol_should or
1666
                volume not in node_vol_should[node])
1667
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1668
                      "volume %s is unknown", volume)
1669

    
1670
  def _VerifyOrphanInstances(self, instancelist, node_image):
1671
    """Verify the list of running instances.
1672

1673
    This checks what instances are running but unknown to the cluster.
1674

1675
    """
1676
    for node, n_img in node_image.items():
1677
      for o_inst in n_img.instances:
1678
        test = o_inst not in instancelist
1679
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1680
                      "instance %s on node %s should not exist", o_inst, node)
1681

    
1682
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1683
    """Verify N+1 Memory Resilience.
1684

1685
    Check that if one single node dies we can still start all the
1686
    instances it was primary for.
1687

1688
    """
1689
    for node, n_img in node_image.items():
1690
      # This code checks that every node which is now listed as
1691
      # secondary has enough memory to host all instances it is
1692
      # supposed to should a single other node in the cluster fail.
1693
      # FIXME: not ready for failover to an arbitrary node
1694
      # FIXME: does not support file-backed instances
1695
      # WARNING: we currently take into account down instances as well
1696
      # as up ones, considering that even if they're down someone
1697
      # might want to start them even in the event of a node failure.
1698
      for prinode, instances in n_img.sbp.items():
1699
        needed_mem = 0
1700
        for instance in instances:
1701
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1702
          if bep[constants.BE_AUTO_BALANCE]:
1703
            needed_mem += bep[constants.BE_MEMORY]
1704
        test = n_img.mfree < needed_mem
1705
        self._ErrorIf(test, self.ENODEN1, node,
1706
                      "not enough memory on to accommodate"
1707
                      " failovers should peer node %s fail", prinode)
1708

    
1709
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1710
                       master_files):
1711
    """Verifies and computes the node required file checksums.
1712

1713
    @type ninfo: L{objects.Node}
1714
    @param ninfo: the node to check
1715
    @param nresult: the remote results for the node
1716
    @param file_list: required list of files
1717
    @param local_cksum: dictionary of local files and their checksums
1718
    @param master_files: list of files that only masters should have
1719

1720
    """
1721
    node = ninfo.name
1722
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1723

    
1724
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1725
    test = not isinstance(remote_cksum, dict)
1726
    _ErrorIf(test, self.ENODEFILECHECK, node,
1727
             "node hasn't returned file checksum data")
1728
    if test:
1729
      return
1730

    
1731
    for file_name in file_list:
1732
      node_is_mc = ninfo.master_candidate
1733
      must_have = (file_name not in master_files) or node_is_mc
1734
      # missing
1735
      test1 = file_name not in remote_cksum
1736
      # invalid checksum
1737
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1738
      # existing and good
1739
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1740
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1741
               "file '%s' missing", file_name)
1742
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1743
               "file '%s' has wrong checksum", file_name)
1744
      # not candidate and this is not a must-have file
1745
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1746
               "file '%s' should not exist on non master"
1747
               " candidates (and the file is outdated)", file_name)
1748
      # all good, except non-master/non-must have combination
1749
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1750
               "file '%s' should not exist"
1751
               " on non master candidates", file_name)
1752

    
1753
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1754
                      drbd_map):
1755
    """Verifies and the node DRBD status.
1756

1757
    @type ninfo: L{objects.Node}
1758
    @param ninfo: the node to check
1759
    @param nresult: the remote results for the node
1760
    @param instanceinfo: the dict of instances
1761
    @param drbd_helper: the configured DRBD usermode helper
1762
    @param drbd_map: the DRBD map as returned by
1763
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1764

1765
    """
1766
    node = ninfo.name
1767
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1768

    
1769
    if drbd_helper:
1770
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1771
      test = (helper_result == None)
1772
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1773
               "no drbd usermode helper returned")
1774
      if helper_result:
1775
        status, payload = helper_result
1776
        test = not status
1777
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1778
                 "drbd usermode helper check unsuccessful: %s", payload)
1779
        test = status and (payload != drbd_helper)
1780
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1781
                 "wrong drbd usermode helper: %s", payload)
1782

    
1783
    # compute the DRBD minors
1784
    node_drbd = {}
1785
    for minor, instance in drbd_map[node].items():
1786
      test = instance not in instanceinfo
1787
      _ErrorIf(test, self.ECLUSTERCFG, None,
1788
               "ghost instance '%s' in temporary DRBD map", instance)
1789
        # ghost instance should not be running, but otherwise we
1790
        # don't give double warnings (both ghost instance and
1791
        # unallocated minor in use)
1792
      if test:
1793
        node_drbd[minor] = (instance, False)
1794
      else:
1795
        instance = instanceinfo[instance]
1796
        node_drbd[minor] = (instance.name, instance.admin_up)
1797

    
1798
    # and now check them
1799
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1800
    test = not isinstance(used_minors, (tuple, list))
1801
    _ErrorIf(test, self.ENODEDRBD, node,
1802
             "cannot parse drbd status file: %s", str(used_minors))
1803
    if test:
1804
      # we cannot check drbd status
1805
      return
1806

    
1807
    for minor, (iname, must_exist) in node_drbd.items():
1808
      test = minor not in used_minors and must_exist
1809
      _ErrorIf(test, self.ENODEDRBD, node,
1810
               "drbd minor %d of instance %s is not active", minor, iname)
1811
    for minor in used_minors:
1812
      test = minor not in node_drbd
1813
      _ErrorIf(test, self.ENODEDRBD, node,
1814
               "unallocated drbd minor %d is in use", minor)
1815

    
1816
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1817
    """Builds the node OS structures.
1818

1819
    @type ninfo: L{objects.Node}
1820
    @param ninfo: the node to check
1821
    @param nresult: the remote results for the node
1822
    @param nimg: the node image object
1823

1824
    """
1825
    node = ninfo.name
1826
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1827

    
1828
    remote_os = nresult.get(constants.NV_OSLIST, None)
1829
    test = (not isinstance(remote_os, list) or
1830
            not compat.all(isinstance(v, list) and len(v) == 7
1831
                           for v in remote_os))
1832

    
1833
    _ErrorIf(test, self.ENODEOS, node,
1834
             "node hasn't returned valid OS data")
1835

    
1836
    nimg.os_fail = test
1837

    
1838
    if test:
1839
      return
1840

    
1841
    os_dict = {}
1842

    
1843
    for (name, os_path, status, diagnose,
1844
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1845

    
1846
      if name not in os_dict:
1847
        os_dict[name] = []
1848

    
1849
      # parameters is a list of lists instead of list of tuples due to
1850
      # JSON lacking a real tuple type, fix it:
1851
      parameters = [tuple(v) for v in parameters]
1852
      os_dict[name].append((os_path, status, diagnose,
1853
                            set(variants), set(parameters), set(api_ver)))
1854

    
1855
    nimg.oslist = os_dict
1856

    
1857
  def _VerifyNodeOS(self, ninfo, nimg, base):
1858
    """Verifies the node OS list.
1859

1860
    @type ninfo: L{objects.Node}
1861
    @param ninfo: the node to check
1862
    @param nimg: the node image object
1863
    @param base: the 'template' node we match against (e.g. from the master)
1864

1865
    """
1866
    node = ninfo.name
1867
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1868

    
1869
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1870

    
1871
    for os_name, os_data in nimg.oslist.items():
1872
      assert os_data, "Empty OS status for OS %s?!" % os_name
1873
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1874
      _ErrorIf(not f_status, self.ENODEOS, node,
1875
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1876
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1877
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1878
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1879
      # this will catched in backend too
1880
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1881
               and not f_var, self.ENODEOS, node,
1882
               "OS %s with API at least %d does not declare any variant",
1883
               os_name, constants.OS_API_V15)
1884
      # comparisons with the 'base' image
1885
      test = os_name not in base.oslist
1886
      _ErrorIf(test, self.ENODEOS, node,
1887
               "Extra OS %s not present on reference node (%s)",
1888
               os_name, base.name)
1889
      if test:
1890
        continue
1891
      assert base.oslist[os_name], "Base node has empty OS status?"
1892
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1893
      if not b_status:
1894
        # base OS is invalid, skipping
1895
        continue
1896
      for kind, a, b in [("API version", f_api, b_api),
1897
                         ("variants list", f_var, b_var),
1898
                         ("parameters", f_param, b_param)]:
1899
        _ErrorIf(a != b, self.ENODEOS, node,
1900
                 "OS %s %s differs from reference node %s: %s vs. %s",
1901
                 kind, os_name, base.name,
1902
                 utils.CommaJoin(a), utils.CommaJoin(b))
1903

    
1904
    # check any missing OSes
1905
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1906
    _ErrorIf(missing, self.ENODEOS, node,
1907
             "OSes present on reference node %s but missing on this node: %s",
1908
             base.name, utils.CommaJoin(missing))
1909

    
1910
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1911
    """Verifies and updates the node volume data.
1912

1913
    This function will update a L{NodeImage}'s internal structures
1914
    with data from the remote call.
1915

1916
    @type ninfo: L{objects.Node}
1917
    @param ninfo: the node to check
1918
    @param nresult: the remote results for the node
1919
    @param nimg: the node image object
1920
    @param vg_name: the configured VG name
1921

1922
    """
1923
    node = ninfo.name
1924
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1925

    
1926
    nimg.lvm_fail = True
1927
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1928
    if vg_name is None:
1929
      pass
1930
    elif isinstance(lvdata, basestring):
1931
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1932
               utils.SafeEncode(lvdata))
1933
    elif not isinstance(lvdata, dict):
1934
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1935
    else:
1936
      nimg.volumes = lvdata
1937
      nimg.lvm_fail = False
1938

    
1939
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1940
    """Verifies and updates the node instance list.
1941

1942
    If the listing was successful, then updates this node's instance
1943
    list. Otherwise, it marks the RPC call as failed for the instance
1944
    list key.
1945

1946
    @type ninfo: L{objects.Node}
1947
    @param ninfo: the node to check
1948
    @param nresult: the remote results for the node
1949
    @param nimg: the node image object
1950

1951
    """
1952
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1953
    test = not isinstance(idata, list)
1954
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1955
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1956
    if test:
1957
      nimg.hyp_fail = True
1958
    else:
1959
      nimg.instances = idata
1960

    
1961
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1962
    """Verifies and computes a node information map
1963

1964
    @type ninfo: L{objects.Node}
1965
    @param ninfo: the node to check
1966
    @param nresult: the remote results for the node
1967
    @param nimg: the node image object
1968
    @param vg_name: the configured VG name
1969

1970
    """
1971
    node = ninfo.name
1972
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1973

    
1974
    # try to read free memory (from the hypervisor)
1975
    hv_info = nresult.get(constants.NV_HVINFO, None)
1976
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1977
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1978
    if not test:
1979
      try:
1980
        nimg.mfree = int(hv_info["memory_free"])
1981
      except (ValueError, TypeError):
1982
        _ErrorIf(True, self.ENODERPC, node,
1983
                 "node returned invalid nodeinfo, check hypervisor")
1984

    
1985
    # FIXME: devise a free space model for file based instances as well
1986
    if vg_name is not None:
1987
      test = (constants.NV_VGLIST not in nresult or
1988
              vg_name not in nresult[constants.NV_VGLIST])
1989
      _ErrorIf(test, self.ENODELVM, node,
1990
               "node didn't return data for the volume group '%s'"
1991
               " - it is either missing or broken", vg_name)
1992
      if not test:
1993
        try:
1994
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1995
        except (ValueError, TypeError):
1996
          _ErrorIf(True, self.ENODERPC, node,
1997
                   "node returned invalid LVM info, check LVM status")
1998

    
1999
  def BuildHooksEnv(self):
2000
    """Build hooks env.
2001

2002
    Cluster-Verify hooks just ran in the post phase and their failure makes
2003
    the output be logged in the verify output and the verification to fail.
2004

2005
    """
2006
    all_nodes = self.cfg.GetNodeList()
2007
    env = {
2008
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2009
      }
2010
    for node in self.cfg.GetAllNodesInfo().values():
2011
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2012

    
2013
    return env, [], all_nodes
2014

    
2015
  def Exec(self, feedback_fn):
2016
    """Verify integrity of cluster, performing various test on nodes.
2017

2018
    """
2019
    self.bad = False
2020
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2021
    verbose = self.op.verbose
2022
    self._feedback_fn = feedback_fn
2023
    feedback_fn("* Verifying global settings")
2024
    for msg in self.cfg.VerifyConfig():
2025
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2026

    
2027
    # Check the cluster certificates
2028
    for cert_filename in constants.ALL_CERT_FILES:
2029
      (errcode, msg) = _VerifyCertificate(cert_filename)
2030
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2031

    
2032
    vg_name = self.cfg.GetVGName()
2033
    drbd_helper = self.cfg.GetDRBDHelper()
2034
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2035
    cluster = self.cfg.GetClusterInfo()
2036
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2037
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2038
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2039
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2040
                        for iname in instancelist)
2041
    i_non_redundant = [] # Non redundant instances
2042
    i_non_a_balanced = [] # Non auto-balanced instances
2043
    n_offline = 0 # Count of offline nodes
2044
    n_drained = 0 # Count of nodes being drained
2045
    node_vol_should = {}
2046

    
2047
    # FIXME: verify OS list
2048
    # do local checksums
2049
    master_files = [constants.CLUSTER_CONF_FILE]
2050
    master_node = self.master_node = self.cfg.GetMasterNode()
2051
    master_ip = self.cfg.GetMasterIP()
2052

    
2053
    file_names = ssconf.SimpleStore().GetFileList()
2054
    file_names.extend(constants.ALL_CERT_FILES)
2055
    file_names.extend(master_files)
2056
    if cluster.modify_etc_hosts:
2057
      file_names.append(constants.ETC_HOSTS)
2058

    
2059
    local_checksums = utils.FingerprintFiles(file_names)
2060

    
2061
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2062
    node_verify_param = {
2063
      constants.NV_FILELIST: file_names,
2064
      constants.NV_NODELIST: [node.name for node in nodeinfo
2065
                              if not node.offline],
2066
      constants.NV_HYPERVISOR: hypervisors,
2067
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2068
                                  node.secondary_ip) for node in nodeinfo
2069
                                 if not node.offline],
2070
      constants.NV_INSTANCELIST: hypervisors,
2071
      constants.NV_VERSION: None,
2072
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2073
      constants.NV_NODESETUP: None,
2074
      constants.NV_TIME: None,
2075
      constants.NV_MASTERIP: (master_node, master_ip),
2076
      constants.NV_OSLIST: None,
2077
      }
2078

    
2079
    if vg_name is not None:
2080
      node_verify_param[constants.NV_VGLIST] = None
2081
      node_verify_param[constants.NV_LVLIST] = vg_name
2082
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2083
      node_verify_param[constants.NV_DRBDLIST] = None
2084

    
2085
    if drbd_helper:
2086
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2087

    
2088
    # Build our expected cluster state
2089
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2090
                                                 name=node.name))
2091
                      for node in nodeinfo)
2092

    
2093
    for instance in instancelist:
2094
      inst_config = instanceinfo[instance]
2095

    
2096
      for nname in inst_config.all_nodes:
2097
        if nname not in node_image:
2098
          # ghost node
2099
          gnode = self.NodeImage(name=nname)
2100
          gnode.ghost = True
2101
          node_image[nname] = gnode
2102

    
2103
      inst_config.MapLVsByNode(node_vol_should)
2104

    
2105
      pnode = inst_config.primary_node
2106
      node_image[pnode].pinst.append(instance)
2107

    
2108
      for snode in inst_config.secondary_nodes:
2109
        nimg = node_image[snode]
2110
        nimg.sinst.append(instance)
2111
        if pnode not in nimg.sbp:
2112
          nimg.sbp[pnode] = []
2113
        nimg.sbp[pnode].append(instance)
2114

    
2115
    # At this point, we have the in-memory data structures complete,
2116
    # except for the runtime information, which we'll gather next
2117

    
2118
    # Due to the way our RPC system works, exact response times cannot be
2119
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2120
    # time before and after executing the request, we can at least have a time
2121
    # window.
2122
    nvinfo_starttime = time.time()
2123
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2124
                                           self.cfg.GetClusterName())
2125
    nvinfo_endtime = time.time()
2126

    
2127
    all_drbd_map = self.cfg.ComputeDRBDMap()
2128

    
2129
    feedback_fn("* Verifying node status")
2130

    
2131
    refos_img = None
2132

    
2133
    for node_i in nodeinfo:
2134
      node = node_i.name
2135
      nimg = node_image[node]
2136

    
2137
      if node_i.offline:
2138
        if verbose:
2139
          feedback_fn("* Skipping offline node %s" % (node,))
2140
        n_offline += 1
2141
        continue
2142

    
2143
      if node == master_node:
2144
        ntype = "master"
2145
      elif node_i.master_candidate:
2146
        ntype = "master candidate"
2147
      elif node_i.drained:
2148
        ntype = "drained"
2149
        n_drained += 1
2150
      else:
2151
        ntype = "regular"
2152
      if verbose:
2153
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2154

    
2155
      msg = all_nvinfo[node].fail_msg
2156
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2157
      if msg:
2158
        nimg.rpc_fail = True
2159
        continue
2160

    
2161
      nresult = all_nvinfo[node].payload
2162

    
2163
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2164
      self._VerifyNodeNetwork(node_i, nresult)
2165
      self._VerifyNodeLVM(node_i, nresult, vg_name)
2166
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2167
                            master_files)
2168
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2169
                           all_drbd_map)
2170
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2171

    
2172
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2173
      self._UpdateNodeInstances(node_i, nresult, nimg)
2174
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2175
      self._UpdateNodeOS(node_i, nresult, nimg)
2176
      if not nimg.os_fail:
2177
        if refos_img is None:
2178
          refos_img = nimg
2179
        self._VerifyNodeOS(node_i, nimg, refos_img)
2180

    
2181
    feedback_fn("* Verifying instance status")
2182
    for instance in instancelist:
2183
      if verbose:
2184
        feedback_fn("* Verifying instance %s" % instance)
2185
      inst_config = instanceinfo[instance]
2186
      self._VerifyInstance(instance, inst_config, node_image)
2187
      inst_nodes_offline = []
2188

    
2189
      pnode = inst_config.primary_node
2190
      pnode_img = node_image[pnode]
2191
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2192
               self.ENODERPC, pnode, "instance %s, connection to"
2193
               " primary node failed", instance)
2194

    
2195
      if pnode_img.offline:
2196
        inst_nodes_offline.append(pnode)
2197

    
2198
      # If the instance is non-redundant we cannot survive losing its primary
2199
      # node, so we are not N+1 compliant. On the other hand we have no disk
2200
      # templates with more than one secondary so that situation is not well
2201
      # supported either.
2202
      # FIXME: does not support file-backed instances
2203
      if not inst_config.secondary_nodes:
2204
        i_non_redundant.append(instance)
2205
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2206
               instance, "instance has multiple secondary nodes: %s",
2207
               utils.CommaJoin(inst_config.secondary_nodes),
2208
               code=self.ETYPE_WARNING)
2209

    
2210
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2211
        i_non_a_balanced.append(instance)
2212

    
2213
      for snode in inst_config.secondary_nodes:
2214
        s_img = node_image[snode]
2215
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2216
                 "instance %s, connection to secondary node failed", instance)
2217

    
2218
        if s_img.offline:
2219
          inst_nodes_offline.append(snode)
2220

    
2221
      # warn that the instance lives on offline nodes
2222
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2223
               "instance lives on offline node(s) %s",
2224
               utils.CommaJoin(inst_nodes_offline))
2225
      # ... or ghost nodes
2226
      for node in inst_config.all_nodes:
2227
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2228
                 "instance lives on ghost node %s", node)
2229

    
2230
    feedback_fn("* Verifying orphan volumes")
2231
    self._VerifyOrphanVolumes(node_vol_should, node_image)
2232

    
2233
    feedback_fn("* Verifying orphan instances")
2234
    self._VerifyOrphanInstances(instancelist, node_image)
2235

    
2236
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2237
      feedback_fn("* Verifying N+1 Memory redundancy")
2238
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2239

    
2240
    feedback_fn("* Other Notes")
2241
    if i_non_redundant:
2242
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2243
                  % len(i_non_redundant))
2244

    
2245
    if i_non_a_balanced:
2246
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2247
                  % len(i_non_a_balanced))
2248

    
2249
    if n_offline:
2250
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2251

    
2252
    if n_drained:
2253
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2254

    
2255
    return not self.bad
2256

    
2257
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2258
    """Analyze the post-hooks' result
2259

2260
    This method analyses the hook result, handles it, and sends some
2261
    nicely-formatted feedback back to the user.
2262

2263
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2264
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2265
    @param hooks_results: the results of the multi-node hooks rpc call
2266
    @param feedback_fn: function used send feedback back to the caller
2267
    @param lu_result: previous Exec result
2268
    @return: the new Exec result, based on the previous result
2269
        and hook results
2270

2271
    """
2272
    # We only really run POST phase hooks, and are only interested in
2273
    # their results
2274
    if phase == constants.HOOKS_PHASE_POST:
2275
      # Used to change hooks' output to proper indentation
2276
      indent_re = re.compile('^', re.M)
2277
      feedback_fn("* Hooks Results")
2278
      assert hooks_results, "invalid result from hooks"
2279

    
2280
      for node_name in hooks_results:
2281
        res = hooks_results[node_name]
2282
        msg = res.fail_msg
2283
        test = msg and not res.offline
2284
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2285
                      "Communication failure in hooks execution: %s", msg)
2286
        if res.offline or msg:
2287
          # No need to investigate payload if node is offline or gave an error.
2288
          # override manually lu_result here as _ErrorIf only
2289
          # overrides self.bad
2290
          lu_result = 1
2291
          continue
2292
        for script, hkr, output in res.payload:
2293
          test = hkr == constants.HKR_FAIL
2294
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2295
                        "Script %s failed, output:", script)
2296
          if test:
2297
            output = indent_re.sub('      ', output)
2298
            feedback_fn("%s" % output)
2299
            lu_result = 0
2300

    
2301
      return lu_result
2302

    
2303

    
2304
class LUVerifyDisks(NoHooksLU):
2305
  """Verifies the cluster disks status.
2306

2307
  """
2308
  REQ_BGL = False
2309

    
2310
  def ExpandNames(self):
2311
    self.needed_locks = {
2312
      locking.LEVEL_NODE: locking.ALL_SET,
2313
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2314
    }
2315
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2316

    
2317
  def Exec(self, feedback_fn):
2318
    """Verify integrity of cluster disks.
2319

2320
    @rtype: tuple of three items
2321
    @return: a tuple of (dict of node-to-node_error, list of instances
2322
        which need activate-disks, dict of instance: (node, volume) for
2323
        missing volumes
2324

2325
    """
2326
    result = res_nodes, res_instances, res_missing = {}, [], {}
2327

    
2328
    vg_name = self.cfg.GetVGName()
2329
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2330
    instances = [self.cfg.GetInstanceInfo(name)
2331
                 for name in self.cfg.GetInstanceList()]
2332

    
2333
    nv_dict = {}
2334
    for inst in instances:
2335
      inst_lvs = {}
2336
      if (not inst.admin_up or
2337
          inst.disk_template not in constants.DTS_NET_MIRROR):
2338
        continue
2339
      inst.MapLVsByNode(inst_lvs)
2340
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2341
      for node, vol_list in inst_lvs.iteritems():
2342
        for vol in vol_list:
2343
          nv_dict[(node, vol)] = inst
2344

    
2345
    if not nv_dict:
2346
      return result
2347

    
2348
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2349

    
2350
    for node in nodes:
2351
      # node_volume
2352
      node_res = node_lvs[node]
2353
      if node_res.offline:
2354
        continue
2355
      msg = node_res.fail_msg
2356
      if msg:
2357
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2358
        res_nodes[node] = msg
2359
        continue
2360

    
2361
      lvs = node_res.payload
2362
      for lv_name, (_, _, lv_online) in lvs.items():
2363
        inst = nv_dict.pop((node, lv_name), None)
2364
        if (not lv_online and inst is not None
2365
            and inst.name not in res_instances):
2366
          res_instances.append(inst.name)
2367

    
2368
    # any leftover items in nv_dict are missing LVs, let's arrange the
2369
    # data better
2370
    for key, inst in nv_dict.iteritems():
2371
      if inst.name not in res_missing:
2372
        res_missing[inst.name] = []
2373
      res_missing[inst.name].append(key)
2374

    
2375
    return result
2376

    
2377

    
2378
class LURepairDiskSizes(NoHooksLU):
2379
  """Verifies the cluster disks sizes.
2380

2381
  """
2382
  _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2383
  REQ_BGL = False
2384

    
2385
  def ExpandNames(self):
2386
    if self.op.instances:
2387
      self.wanted_names = []
2388
      for name in self.op.instances:
2389
        full_name = _ExpandInstanceName(self.cfg, name)
2390
        self.wanted_names.append(full_name)
2391
      self.needed_locks = {
2392
        locking.LEVEL_NODE: [],
2393
        locking.LEVEL_INSTANCE: self.wanted_names,
2394
        }
2395
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2396
    else:
2397
      self.wanted_names = None
2398
      self.needed_locks = {
2399
        locking.LEVEL_NODE: locking.ALL_SET,
2400
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2401
        }
2402
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2403

    
2404
  def DeclareLocks(self, level):
2405
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2406
      self._LockInstancesNodes(primary_only=True)
2407

    
2408
  def CheckPrereq(self):
2409
    """Check prerequisites.
2410

2411
    This only checks the optional instance list against the existing names.
2412

2413
    """
2414
    if self.wanted_names is None:
2415
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2416

    
2417
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2418
                             in self.wanted_names]
2419

    
2420
  def _EnsureChildSizes(self, disk):
2421
    """Ensure children of the disk have the needed disk size.
2422

2423
    This is valid mainly for DRBD8 and fixes an issue where the
2424
    children have smaller disk size.
2425

2426
    @param disk: an L{ganeti.objects.Disk} object
2427

2428
    """
2429
    if disk.dev_type == constants.LD_DRBD8:
2430
      assert disk.children, "Empty children for DRBD8?"
2431
      fchild = disk.children[0]
2432
      mismatch = fchild.size < disk.size
2433
      if mismatch:
2434
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2435
                     fchild.size, disk.size)
2436
        fchild.size = disk.size
2437

    
2438
      # and we recurse on this child only, not on the metadev
2439
      return self._EnsureChildSizes(fchild) or mismatch
2440
    else:
2441
      return False
2442

    
2443
  def Exec(self, feedback_fn):
2444
    """Verify the size of cluster disks.
2445

2446
    """
2447
    # TODO: check child disks too
2448
    # TODO: check differences in size between primary/secondary nodes
2449
    per_node_disks = {}
2450
    for instance in self.wanted_instances:
2451
      pnode = instance.primary_node
2452
      if pnode not in per_node_disks:
2453
        per_node_disks[pnode] = []
2454
      for idx, disk in enumerate(instance.disks):
2455
        per_node_disks[pnode].append((instance, idx, disk))
2456

    
2457
    changed = []
2458
    for node, dskl in per_node_disks.items():
2459
      newl = [v[2].Copy() for v in dskl]
2460
      for dsk in newl:
2461
        self.cfg.SetDiskID(dsk, node)
2462
      result = self.rpc.call_blockdev_getsizes(node, newl)
2463
      if result.fail_msg:
2464
        self.LogWarning("Failure in blockdev_getsizes call to node"
2465
                        " %s, ignoring", node)
2466
        continue
2467
      if len(result.data) != len(dskl):
2468
        self.LogWarning("Invalid result from node %s, ignoring node results",
2469
                        node)
2470
        continue
2471
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2472
        if size is None:
2473
          self.LogWarning("Disk %d of instance %s did not return size"
2474
                          " information, ignoring", idx, instance.name)
2475
          continue
2476
        if not isinstance(size, (int, long)):
2477
          self.LogWarning("Disk %d of instance %s did not return valid"
2478
                          " size information, ignoring", idx, instance.name)
2479
          continue
2480
        size = size >> 20
2481
        if size != disk.size:
2482
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2483
                       " correcting: recorded %d, actual %d", idx,
2484
                       instance.name, disk.size, size)
2485
          disk.size = size
2486
          self.cfg.Update(instance, feedback_fn)
2487
          changed.append((instance.name, idx, size))
2488
        if self._EnsureChildSizes(disk):
2489
          self.cfg.Update(instance, feedback_fn)
2490
          changed.append((instance.name, idx, disk.size))
2491
    return changed
2492

    
2493

    
2494
class LURenameCluster(LogicalUnit):
2495
  """Rename the cluster.
2496

2497
  """
2498
  HPATH = "cluster-rename"
2499
  HTYPE = constants.HTYPE_CLUSTER
2500
  _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2501

    
2502
  def BuildHooksEnv(self):
2503
    """Build hooks env.
2504

2505
    """
2506
    env = {
2507
      "OP_TARGET": self.cfg.GetClusterName(),
2508
      "NEW_NAME": self.op.name,
2509
      }
2510
    mn = self.cfg.GetMasterNode()
2511
    all_nodes = self.cfg.GetNodeList()
2512
    return env, [mn], all_nodes
2513

    
2514
  def CheckPrereq(self):
2515
    """Verify that the passed name is a valid one.
2516

2517
    """
2518
    hostname = netutils.GetHostInfo(self.op.name)
2519

    
2520
    new_name = hostname.name
2521
    self.ip = new_ip = hostname.ip
2522
    old_name = self.cfg.GetClusterName()
2523
    old_ip = self.cfg.GetMasterIP()
2524
    if new_name == old_name and new_ip == old_ip:
2525
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2526
                                 " cluster has changed",
2527
                                 errors.ECODE_INVAL)
2528
    if new_ip != old_ip:
2529
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2530
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2531
                                   " reachable on the network. Aborting." %
2532
                                   new_ip, errors.ECODE_NOTUNIQUE)
2533

    
2534
    self.op.name = new_name
2535

    
2536
  def Exec(self, feedback_fn):
2537
    """Rename the cluster.
2538

2539
    """
2540
    clustername = self.op.name
2541
    ip = self.ip
2542

    
2543
    # shutdown the master IP
2544
    master = self.cfg.GetMasterNode()
2545
    result = self.rpc.call_node_stop_master(master, False)
2546
    result.Raise("Could not disable the master role")
2547

    
2548
    try:
2549
      cluster = self.cfg.GetClusterInfo()
2550
      cluster.cluster_name = clustername
2551
      cluster.master_ip = ip
2552
      self.cfg.Update(cluster, feedback_fn)
2553

    
2554
      # update the known hosts file
2555
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2556
      node_list = self.cfg.GetNodeList()
2557
      try:
2558
        node_list.remove(master)
2559
      except ValueError:
2560
        pass
2561
      result = self.rpc.call_upload_file(node_list,
2562
                                         constants.SSH_KNOWN_HOSTS_FILE)
2563
      for to_node, to_result in result.iteritems():
2564
        msg = to_result.fail_msg
2565
        if msg:
2566
          msg = ("Copy of file %s to node %s failed: %s" %
2567
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2568
          self.proc.LogWarning(msg)
2569

    
2570
    finally:
2571
      result = self.rpc.call_node_start_master(master, False, False)
2572
      msg = result.fail_msg
2573
      if msg:
2574
        self.LogWarning("Could not re-enable the master role on"
2575
                        " the master, please restart manually: %s", msg)
2576

    
2577

    
2578
class LUSetClusterParams(LogicalUnit):
2579
  """Change the parameters of the cluster.
2580

2581
  """
2582
  HPATH = "cluster-modify"
2583
  HTYPE = constants.HTYPE_CLUSTER
2584
  _OP_PARAMS = [
2585
    ("vg_name", None, _TMaybeString),
2586
    ("enabled_hypervisors", None,
2587
     _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2588
    ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2589
    ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2590
    ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2591
    ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2592
    ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2593
    ("uid_pool", None, _NoType),
2594
    ("add_uids", None, _NoType),
2595
    ("remove_uids", None, _NoType),
2596
    ("maintain_node_health", None, _TMaybeBool),
2597
    ("nicparams", None, _TOr(_TDict, _TNone)),
2598
    ("drbd_helper", None, _TOr(_TString, _TNone)),
2599
    ("default_iallocator", None, _TMaybeString),
2600
    ]
2601
  REQ_BGL = False
2602

    
2603
  def CheckArguments(self):
2604
    """Check parameters
2605

2606
    """
2607
    if self.op.uid_pool:
2608
      uidpool.CheckUidPool(self.op.uid_pool)
2609

    
2610
    if self.op.add_uids:
2611
      uidpool.CheckUidPool(self.op.add_uids)
2612

    
2613
    if self.op.remove_uids:
2614
      uidpool.CheckUidPool(self.op.remove_uids)
2615

    
2616
  def ExpandNames(self):
2617
    # FIXME: in the future maybe other cluster params won't require checking on
2618
    # all nodes to be modified.
2619
    self.needed_locks = {
2620
      locking.LEVEL_NODE: locking.ALL_SET,
2621
    }
2622
    self.share_locks[locking.LEVEL_NODE] = 1
2623

    
2624
  def BuildHooksEnv(self):
2625
    """Build hooks env.
2626

2627
    """
2628
    env = {
2629
      "OP_TARGET": self.cfg.GetClusterName(),
2630
      "NEW_VG_NAME": self.op.vg_name,
2631
      }
2632
    mn = self.cfg.GetMasterNode()
2633
    return env, [mn], [mn]
2634

    
2635
  def CheckPrereq(self):
2636
    """Check prerequisites.
2637

2638
    This checks whether the given params don't conflict and
2639
    if the given volume group is valid.
2640

2641
    """
2642
    if self.op.vg_name is not None and not self.op.vg_name:
2643
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2644
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2645
                                   " instances exist", errors.ECODE_INVAL)
2646

    
2647
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2648
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2649
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2650
                                   " drbd-based instances exist",
2651
                                   errors.ECODE_INVAL)
2652

    
2653
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2654

    
2655
    # if vg_name not None, checks given volume group on all nodes
2656
    if self.op.vg_name:
2657
      vglist = self.rpc.call_vg_list(node_list)
2658
      for node in node_list:
2659
        msg = vglist[node].fail_msg
2660
        if msg:
2661
          # ignoring down node
2662
          self.LogWarning("Error while gathering data on node %s"
2663
                          " (ignoring node): %s", node, msg)
2664
          continue
2665
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2666
                                              self.op.vg_name,
2667
                                              constants.MIN_VG_SIZE)
2668
        if vgstatus:
2669
          raise errors.OpPrereqError("Error on node '%s': %s" %
2670
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2671

    
2672
    if self.op.drbd_helper:
2673
      # checks given drbd helper on all nodes
2674
      helpers = self.rpc.call_drbd_helper(node_list)
2675
      for node in node_list:
2676
        ninfo = self.cfg.GetNodeInfo(node)
2677
        if ninfo.offline:
2678
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2679
          continue
2680
        msg = helpers[node].fail_msg
2681
        if msg:
2682
          raise errors.OpPrereqError("Error checking drbd helper on node"
2683
                                     " '%s': %s" % (node, msg),
2684
                                     errors.ECODE_ENVIRON)
2685
        node_helper = helpers[node].payload
2686
        if node_helper != self.op.drbd_helper:
2687
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2688
                                     (node, node_helper), errors.ECODE_ENVIRON)
2689

    
2690
    self.cluster = cluster = self.cfg.GetClusterInfo()
2691
    # validate params changes
2692
    if self.op.beparams:
2693
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2694
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2695

    
2696
    if self.op.nicparams:
2697
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2698
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2699
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2700
      nic_errors = []
2701

    
2702
      # check all instances for consistency
2703
      for instance in self.cfg.GetAllInstancesInfo().values():
2704
        for nic_idx, nic in enumerate(instance.nics):
2705
          params_copy = copy.deepcopy(nic.nicparams)
2706
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2707

    
2708
          # check parameter syntax
2709
          try:
2710
            objects.NIC.CheckParameterSyntax(params_filled)
2711
          except errors.ConfigurationError, err:
2712
            nic_errors.append("Instance %s, nic/%d: %s" %
2713
                              (instance.name, nic_idx, err))
2714

    
2715
          # if we're moving instances to routed, check that they have an ip
2716
          target_mode = params_filled[constants.NIC_MODE]
2717
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2718
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2719
                              (instance.name, nic_idx))
2720
      if nic_errors:
2721
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2722
                                   "\n".join(nic_errors))
2723

    
2724
    # hypervisor list/parameters
2725
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2726
    if self.op.hvparams:
2727
      for hv_name, hv_dict in self.op.hvparams.items():
2728
        if hv_name not in self.new_hvparams:
2729
          self.new_hvparams[hv_name] = hv_dict
2730
        else:
2731
          self.new_hvparams[hv_name].update(hv_dict)
2732

    
2733
    # os hypervisor parameters
2734
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2735
    if self.op.os_hvp:
2736
      for os_name, hvs in self.op.os_hvp.items():
2737
        if os_name not in self.new_os_hvp:
2738
          self.new_os_hvp[os_name] = hvs
2739
        else:
2740
          for hv_name, hv_dict in hvs.items():
2741
            if hv_name not in self.new_os_hvp[os_name]:
2742
              self.new_os_hvp[os_name][hv_name] = hv_dict
2743
            else:
2744
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2745

    
2746
    # os parameters
2747
    self.new_osp = objects.FillDict(cluster.osparams, {})
2748
    if self.op.osparams:
2749
      for os_name, osp in self.op.osparams.items():
2750
        if os_name not in self.new_osp:
2751
          self.new_osp[os_name] = {}
2752

    
2753
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2754
                                                  use_none=True)
2755

    
2756
        if not self.new_osp[os_name]:
2757
          # we removed all parameters
2758
          del self.new_osp[os_name]
2759
        else:
2760
          # check the parameter validity (remote check)
2761
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2762
                         os_name, self.new_osp[os_name])
2763

    
2764
    # changes to the hypervisor list
2765
    if self.op.enabled_hypervisors is not None:
2766
      self.hv_list = self.op.enabled_hypervisors
2767
      for hv in self.hv_list:
2768
        # if the hypervisor doesn't already exist in the cluster
2769
        # hvparams, we initialize it to empty, and then (in both
2770
        # cases) we make sure to fill the defaults, as we might not
2771
        # have a complete defaults list if the hypervisor wasn't
2772
        # enabled before
2773
        if hv not in new_hvp:
2774
          new_hvp[hv] = {}
2775
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2776
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2777
    else:
2778
      self.hv_list = cluster.enabled_hypervisors
2779

    
2780
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2781
      # either the enabled list has changed, or the parameters have, validate
2782
      for hv_name, hv_params in self.new_hvparams.items():
2783
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2784
            (self.op.enabled_hypervisors and
2785
             hv_name in self.op.enabled_hypervisors)):
2786
          # either this is a new hypervisor, or its parameters have changed
2787
          hv_class = hypervisor.GetHypervisor(hv_name)
2788
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2789
          hv_class.CheckParameterSyntax(hv_params)
2790
          _CheckHVParams(self, node_list, hv_name, hv_params)
2791

    
2792
    if self.op.os_hvp:
2793
      # no need to check any newly-enabled hypervisors, since the
2794
      # defaults have already been checked in the above code-block
2795
      for os_name, os_hvp in self.new_os_hvp.items():
2796
        for hv_name, hv_params in os_hvp.items():
2797
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2798
          # we need to fill in the new os_hvp on top of the actual hv_p
2799
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2800
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2801
          hv_class = hypervisor.GetHypervisor(hv_name)
2802
          hv_class.CheckParameterSyntax(new_osp)
2803
          _CheckHVParams(self, node_list, hv_name, new_osp)
2804

    
2805
    if self.op.default_iallocator:
2806
      alloc_script = utils.FindFile(self.op.default_iallocator,
2807
                                    constants.IALLOCATOR_SEARCH_PATH,
2808
                                    os.path.isfile)
2809
      if alloc_script is None:
2810
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2811
                                   " specified" % self.op.default_iallocator,
2812
                                   errors.ECODE_INVAL)
2813

    
2814
  def Exec(self, feedback_fn):
2815
    """Change the parameters of the cluster.
2816

2817
    """
2818
    if self.op.vg_name is not None:
2819
      new_volume = self.op.vg_name
2820
      if not new_volume:
2821
        new_volume = None
2822
      if new_volume != self.cfg.GetVGName():
2823
        self.cfg.SetVGName(new_volume)
2824
      else:
2825
        feedback_fn("Cluster LVM configuration already in desired"
2826
                    " state, not changing")
2827
    if self.op.drbd_helper is not None:
2828
      new_helper = self.op.drbd_helper
2829
      if not new_helper:
2830
        new_helper = None
2831
      if new_helper != self.cfg.GetDRBDHelper():
2832
        self.cfg.SetDRBDHelper(new_helper)
2833
      else:
2834
        feedback_fn("Cluster DRBD helper already in desired state,"
2835
                    " not changing")
2836
    if self.op.hvparams:
2837
      self.cluster.hvparams = self.new_hvparams
2838
    if self.op.os_hvp:
2839
      self.cluster.os_hvp = self.new_os_hvp
2840
    if self.op.enabled_hypervisors is not None:
2841
      self.cluster.hvparams = self.new_hvparams
2842
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2843
    if self.op.beparams:
2844
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2845
    if self.op.nicparams:
2846
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2847
    if self.op.osparams:
2848
      self.cluster.osparams = self.new_osp
2849

    
2850
    if self.op.candidate_pool_size is not None:
2851
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2852
      # we need to update the pool size here, otherwise the save will fail
2853
      _AdjustCandidatePool(self, [])
2854

    
2855
    if self.op.maintain_node_health is not None:
2856
      self.cluster.maintain_node_health = self.op.maintain_node_health
2857

    
2858
    if self.op.add_uids is not None:
2859
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2860

    
2861
    if self.op.remove_uids is not None:
2862
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2863

    
2864
    if self.op.uid_pool is not None:
2865
      self.cluster.uid_pool = self.op.uid_pool
2866

    
2867
    if self.op.default_iallocator is not None:
2868
      self.cluster.default_iallocator = self.op.default_iallocator
2869

    
2870
    self.cfg.Update(self.cluster, feedback_fn)
2871

    
2872

    
2873
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2874
  """Distribute additional files which are part of the cluster configuration.
2875

2876
  ConfigWriter takes care of distributing the config and ssconf files, but
2877
  there are more files which should be distributed to all nodes. This function
2878
  makes sure those are copied.
2879

2880
  @param lu: calling logical unit
2881
  @param additional_nodes: list of nodes not in the config to distribute to
2882

2883
  """
2884
  # 1. Gather target nodes
2885
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2886
  dist_nodes = lu.cfg.GetOnlineNodeList()
2887
  if additional_nodes is not None:
2888
    dist_nodes.extend(additional_nodes)
2889
  if myself.name in dist_nodes:
2890
    dist_nodes.remove(myself.name)
2891

    
2892
  # 2. Gather files to distribute
2893
  dist_files = set([constants.ETC_HOSTS,
2894
                    constants.SSH_KNOWN_HOSTS_FILE,
2895
                    constants.RAPI_CERT_FILE,
2896
                    constants.RAPI_USERS_FILE,
2897
                    constants.CONFD_HMAC_KEY,
2898
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2899
                   ])
2900

    
2901
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2902
  for hv_name in enabled_hypervisors:
2903
    hv_class = hypervisor.GetHypervisor(hv_name)
2904
    dist_files.update(hv_class.GetAncillaryFiles())
2905

    
2906
  # 3. Perform the files upload
2907
  for fname in dist_files:
2908
    if os.path.exists(fname):
2909
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2910
      for to_node, to_result in result.items():
2911
        msg = to_result.fail_msg
2912
        if msg:
2913
          msg = ("Copy of file %s to node %s failed: %s" %
2914
                 (fname, to_node, msg))
2915
          lu.proc.LogWarning(msg)
2916

    
2917

    
2918
class LURedistributeConfig(NoHooksLU):
2919
  """Force the redistribution of cluster configuration.
2920

2921
  This is a very simple LU.
2922

2923
  """
2924
  REQ_BGL = False
2925

    
2926
  def ExpandNames(self):
2927
    self.needed_locks = {
2928
      locking.LEVEL_NODE: locking.ALL_SET,
2929
    }
2930
    self.share_locks[locking.LEVEL_NODE] = 1
2931

    
2932
  def Exec(self, feedback_fn):
2933
    """Redistribute the configuration.
2934

2935
    """
2936
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2937
    _RedistributeAncillaryFiles(self)
2938

    
2939

    
2940
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2941
  """Sleep and poll for an instance's disk to sync.
2942

2943
  """
2944
  if not instance.disks or disks is not None and not disks:
2945
    return True
2946

    
2947
  disks = _ExpandCheckDisks(instance, disks)
2948

    
2949
  if not oneshot:
2950
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2951

    
2952
  node = instance.primary_node
2953

    
2954
  for dev in disks:
2955
    lu.cfg.SetDiskID(dev, node)
2956

    
2957
  # TODO: Convert to utils.Retry
2958

    
2959
  retries = 0
2960
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2961
  while True:
2962
    max_time = 0
2963
    done = True
2964
    cumul_degraded = False
2965
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2966
    msg = rstats.fail_msg
2967
    if msg:
2968
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2969
      retries += 1
2970
      if retries >= 10:
2971
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2972
                                 " aborting." % node)
2973
      time.sleep(6)
2974
      continue
2975
    rstats = rstats.payload
2976
    retries = 0
2977
    for i, mstat in enumerate(rstats):
2978
      if mstat is None:
2979
        lu.LogWarning("Can't compute data for node %s/%s",
2980
                           node, disks[i].iv_name)
2981
        continue
2982

    
2983
      cumul_degraded = (cumul_degraded or
2984
                        (mstat.is_degraded and mstat.sync_percent is None))
2985
      if mstat.sync_percent is not None:
2986
        done = False
2987
        if mstat.estimated_time is not None:
2988
          rem_time = ("%s remaining (estimated)" %
2989
                      utils.FormatSeconds(mstat.estimated_time))
2990
          max_time = mstat.estimated_time
2991
        else:
2992
          rem_time = "no time estimate"
2993
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2994
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
2995

    
2996
    # if we're done but degraded, let's do a few small retries, to
2997
    # make sure we see a stable and not transient situation; therefore
2998
    # we force restart of the loop
2999
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3000
      logging.info("Degraded disks found, %d retries left", degr_retries)
3001
      degr_retries -= 1
3002
      time.sleep(1)
3003
      continue
3004

    
3005
    if done or oneshot:
3006
      break
3007

    
3008
    time.sleep(min(60, max_time))
3009

    
3010
  if done:
3011
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3012
  return not cumul_degraded
3013

    
3014

    
3015
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3016
  """Check that mirrors are not degraded.
3017

3018
  The ldisk parameter, if True, will change the test from the
3019
  is_degraded attribute (which represents overall non-ok status for
3020
  the device(s)) to the ldisk (representing the local storage status).
3021

3022
  """
3023
  lu.cfg.SetDiskID(dev, node)
3024

    
3025
  result = True
3026

    
3027
  if on_primary or dev.AssembleOnSecondary():
3028
    rstats = lu.rpc.call_blockdev_find(node, dev)
3029
    msg = rstats.fail_msg
3030
    if msg:
3031
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3032
      result = False
3033
    elif not rstats.payload:
3034
      lu.LogWarning("Can't find disk on node %s", node)
3035
      result = False
3036
    else:
3037
      if ldisk:
3038
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3039
      else:
3040
        result = result and not rstats.payload.is_degraded
3041

    
3042
  if dev.children:
3043
    for child in dev.children:
3044
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3045

    
3046
  return result
3047

    
3048

    
3049
class LUDiagnoseOS(NoHooksLU):
3050
  """Logical unit for OS diagnose/query.
3051

3052
  """
3053
  _OP_PARAMS = [
3054
    _POutputFields,
3055
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3056
    ]
3057
  REQ_BGL = False
3058
  _FIELDS_STATIC = utils.FieldSet()
3059
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
3060
                                   "parameters", "api_versions")
3061

    
3062
  def CheckArguments(self):
3063
    if self.op.names:
3064
      raise errors.OpPrereqError("Selective OS query not supported",
3065
                                 errors.ECODE_INVAL)
3066

    
3067
    _CheckOutputFields(static=self._FIELDS_STATIC,
3068
                       dynamic=self._FIELDS_DYNAMIC,
3069
                       selected=self.op.output_fields)
3070

    
3071
  def ExpandNames(self):
3072
    # Lock all nodes, in shared mode
3073
    # Temporary removal of locks, should be reverted later
3074
    # TODO: reintroduce locks when they are lighter-weight
3075
    self.needed_locks = {}
3076
    #self.share_locks[locking.LEVEL_NODE] = 1
3077
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3078

    
3079
  @staticmethod
3080
  def _DiagnoseByOS(rlist):
3081
    """Remaps a per-node return list into an a per-os per-node dictionary
3082

3083
    @param rlist: a map with node names as keys and OS objects as values
3084

3085
    @rtype: dict
3086
    @return: a dictionary with osnames as keys and as value another
3087
        map, with nodes as keys and tuples of (path, status, diagnose,
3088
        variants, parameters, api_versions) as values, eg::
3089

3090
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3091
                                     (/srv/..., False, "invalid api")],
3092
                           "node2": [(/srv/..., True, "", [], [])]}
3093
          }
3094

3095
    """
3096
    all_os = {}
3097
    # we build here the list of nodes that didn't fail the RPC (at RPC
3098
    # level), so that nodes with a non-responding node daemon don't
3099
    # make all OSes invalid
3100
    good_nodes = [node_name for node_name in rlist
3101
                  if not rlist[node_name].fail_msg]
3102
    for node_name, nr in rlist.items():
3103
      if nr.fail_msg or not nr.payload:
3104
        continue
3105
      for (name, path, status, diagnose, variants,
3106
           params, api_versions) in nr.payload:
3107
        if name not in all_os:
3108
          # build a list of nodes for this os containing empty lists
3109
          # for each node in node_list
3110
          all_os[name] = {}
3111
          for nname in good_nodes:
3112
            all_os[name][nname] = []
3113
        # convert params from [name, help] to (name, help)
3114
        params = [tuple(v) for v in params]
3115
        all_os[name][node_name].append((path, status, diagnose,
3116
                                        variants, params, api_versions))
3117
    return all_os
3118

    
3119
  def Exec(self, feedback_fn):
3120
    """Compute the list of OSes.
3121

3122
    """
3123
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3124
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3125
    pol = self._DiagnoseByOS(node_data)
3126
    output = []
3127

    
3128
    for os_name, os_data in pol.items():
3129
      row = []
3130
      valid = True
3131
      (variants, params, api_versions) = null_state = (set(), set(), set())
3132
      for idx, osl in enumerate(os_data.values()):
3133
        valid = bool(valid and osl and osl[0][1])
3134
        if not valid:
3135
          (variants, params, api_versions) = null_state
3136
          break
3137
        node_variants, node_params, node_api = osl[0][3:6]
3138
        if idx == 0: # first entry
3139
          variants = set(node_variants)
3140
          params = set(node_params)
3141
          api_versions = set(node_api)
3142
        else: # keep consistency
3143
          variants.intersection_update(node_variants)
3144
          params.intersection_update(node_params)
3145
          api_versions.intersection_update(node_api)
3146

    
3147
      for field in self.op.output_fields:
3148
        if field == "name":
3149
          val = os_name
3150
        elif field == "valid":
3151
          val = valid
3152
        elif field == "node_status":
3153
          # this is just a copy of the dict
3154
          val = {}
3155
          for node_name, nos_list in os_data.items():
3156
            val[node_name] = nos_list
3157
        elif field == "variants":
3158
          val = list(variants)
3159
        elif field == "parameters":
3160
          val = list(params)
3161
        elif field == "api_versions":
3162
          val = list(api_versions)
3163
        else:
3164
          raise errors.ParameterError(field)
3165
        row.append(val)
3166
      output.append(row)
3167

    
3168
    return output
3169

    
3170

    
3171
class LURemoveNode(LogicalUnit):
3172
  """Logical unit for removing a node.
3173

3174
  """
3175
  HPATH = "node-remove"
3176
  HTYPE = constants.HTYPE_NODE
3177
  _OP_PARAMS = [
3178
    _PNodeName,
3179
    ]
3180

    
3181
  def BuildHooksEnv(self):
3182
    """Build hooks env.
3183

3184
    This doesn't run on the target node in the pre phase as a failed
3185
    node would then be impossible to remove.
3186

3187
    """
3188
    env = {
3189
      "OP_TARGET": self.op.node_name,
3190
      "NODE_NAME": self.op.node_name,
3191
      }
3192
    all_nodes = self.cfg.GetNodeList()
3193
    try:
3194
      all_nodes.remove(self.op.node_name)
3195
    except ValueError:
3196
      logging.warning("Node %s which is about to be removed not found"
3197
                      " in the all nodes list", self.op.node_name)
3198
    return env, all_nodes, all_nodes
3199

    
3200
  def CheckPrereq(self):
3201
    """Check prerequisites.
3202

3203
    This checks:
3204
     - the node exists in the configuration
3205
     - it does not have primary or secondary instances
3206
     - it's not the master
3207

3208
    Any errors are signaled by raising errors.OpPrereqError.
3209

3210
    """
3211
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3212
    node = self.cfg.GetNodeInfo(self.op.node_name)
3213
    assert node is not None
3214

    
3215
    instance_list = self.cfg.GetInstanceList()
3216

    
3217
    masternode = self.cfg.GetMasterNode()
3218
    if node.name == masternode:
3219
      raise errors.OpPrereqError("Node is the master node,"
3220
                                 " you need to failover first.",
3221
                                 errors.ECODE_INVAL)
3222

    
3223
    for instance_name in instance_list:
3224
      instance = self.cfg.GetInstanceInfo(instance_name)
3225
      if node.name in instance.all_nodes:
3226
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3227
                                   " please remove first." % instance_name,
3228
                                   errors.ECODE_INVAL)
3229
    self.op.node_name = node.name
3230
    self.node = node
3231

    
3232
  def Exec(self, feedback_fn):
3233
    """Removes the node from the cluster.
3234

3235
    """
3236
    node = self.node
3237
    logging.info("Stopping the node daemon and removing configs from node %s",
3238
                 node.name)
3239

    
3240
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3241

    
3242
    # Promote nodes to master candidate as needed
3243
    _AdjustCandidatePool(self, exceptions=[node.name])
3244
    self.context.RemoveNode(node.name)
3245

    
3246
    # Run post hooks on the node before it's removed
3247
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3248
    try:
3249
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3250
    except:
3251
      # pylint: disable-msg=W0702
3252
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3253

    
3254
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3255
    msg = result.fail_msg
3256
    if msg:
3257
      self.LogWarning("Errors encountered on the remote node while leaving"
3258
                      " the cluster: %s", msg)
3259

    
3260
    # Remove node from our /etc/hosts
3261
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3262
      # FIXME: this should be done via an rpc call to node daemon
3263
      utils.RemoveHostFromEtcHosts(node.name)
3264
      _RedistributeAncillaryFiles(self)
3265

    
3266

    
3267
class LUQueryNodes(NoHooksLU):
3268
  """Logical unit for querying nodes.
3269

3270
  """
3271
  # pylint: disable-msg=W0142
3272
  _OP_PARAMS = [
3273
    _POutputFields,
3274
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3275
    ("use_locking", False, _TBool),
3276
    ]
3277
  REQ_BGL = False
3278

    
3279
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3280
                    "master_candidate", "offline", "drained"]
3281

    
3282
  _FIELDS_DYNAMIC = utils.FieldSet(
3283
    "dtotal", "dfree",
3284
    "mtotal", "mnode", "mfree",
3285
    "bootid",
3286
    "ctotal", "cnodes", "csockets",
3287
    )
3288

    
3289
  _FIELDS_STATIC = utils.FieldSet(*[
3290
    "pinst_cnt", "sinst_cnt",
3291
    "pinst_list", "sinst_list",
3292
    "pip", "sip", "tags",
3293
    "master",
3294
    "role"] + _SIMPLE_FIELDS
3295
    )
3296

    
3297
  def CheckArguments(self):
3298
    _CheckOutputFields(static=self._FIELDS_STATIC,
3299
                       dynamic=self._FIELDS_DYNAMIC,
3300
                       selected=self.op.output_fields)
3301

    
3302
  def ExpandNames(self):
3303
    self.needed_locks = {}
3304
    self.share_locks[locking.LEVEL_NODE] = 1
3305

    
3306
    if self.op.names:
3307
      self.wanted = _GetWantedNodes(self, self.op.names)
3308
    else:
3309
      self.wanted = locking.ALL_SET
3310

    
3311
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3312
    self.do_locking = self.do_node_query and self.op.use_locking
3313
    if self.do_locking:
3314
      # if we don't request only static fields, we need to lock the nodes
3315
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
3316

    
3317
  def Exec(self, feedback_fn):
3318
    """Computes the list of nodes and their attributes.
3319

3320
    """
3321
    all_info = self.cfg.GetAllNodesInfo()
3322
    if self.do_locking:
3323
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
3324
    elif self.wanted != locking.ALL_SET:
3325
      nodenames = self.wanted
3326
      missing = set(nodenames).difference(all_info.keys())
3327
      if missing:
3328
        raise errors.OpExecError(
3329
          "Some nodes were removed before retrieving their data: %s" % missing)
3330
    else:
3331
      nodenames = all_info.keys()
3332

    
3333
    nodenames = utils.NiceSort(nodenames)
3334
    nodelist = [all_info[name] for name in nodenames]
3335

    
3336
    # begin data gathering
3337

    
3338
    if self.do_node_query:
3339
      live_data = {}
3340
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3341
                                          self.cfg.GetHypervisorType())
3342
      for name in nodenames:
3343
        nodeinfo = node_data[name]
3344
        if not nodeinfo.fail_msg and nodeinfo.payload:
3345
          nodeinfo = nodeinfo.payload
3346
          fn = utils.TryConvert
3347
          live_data[name] = {
3348
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3349
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3350
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
3351
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3352
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3353
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3354
            "bootid": nodeinfo.get('bootid', None),
3355
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3356
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3357
            }
3358
        else:
3359
          live_data[name] = {}
3360
    else:
3361
      live_data = dict.fromkeys(nodenames, {})
3362

    
3363
    node_to_primary = dict([(name, set()) for name in nodenames])
3364
    node_to_secondary = dict([(name, set()) for name in nodenames])
3365

    
3366
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3367
                             "sinst_cnt", "sinst_list"))
3368
    if inst_fields & frozenset(self.op.output_fields):
3369
      inst_data = self.cfg.GetAllInstancesInfo()
3370

    
3371
      for inst in inst_data.values():
3372
        if inst.primary_node in node_to_primary:
3373
          node_to_primary[inst.primary_node].add(inst.name)
3374
        for secnode in inst.secondary_nodes:
3375
          if secnode in node_to_secondary:
3376
            node_to_secondary[secnode].add(inst.name)
3377

    
3378
    master_node = self.cfg.GetMasterNode()
3379

    
3380
    # end data gathering
3381

    
3382
    output = []
3383
    for node in nodelist:
3384
      node_output = []
3385
      for field in self.op.output_fields:
3386
        if field in self._SIMPLE_FIELDS:
3387
          val = getattr(node, field)
3388
        elif field == "pinst_list":
3389
          val = list(node_to_primary[node.name])
3390
        elif field == "sinst_list":
3391
          val = list(node_to_secondary[node.name])
3392
        elif field == "pinst_cnt":
3393
          val = len(node_to_primary[node.name])
3394
        elif field == "sinst_cnt":
3395
          val = len(node_to_secondary[node.name])
3396
        elif field == "pip":
3397
          val = node.primary_ip
3398
        elif field == "sip":
3399
          val = node.secondary_ip
3400
        elif field == "tags":
3401
          val = list(node.GetTags())
3402
        elif field == "master":
3403
          val = node.name == master_node
3404
        elif self._FIELDS_DYNAMIC.Matches(field):
3405
          val = live_data[node.name].get(field, None)
3406
        elif field == "role":
3407
          if node.name == master_node:
3408
            val = "M"
3409
          elif node.master_candidate:
3410
            val = "C"
3411
          elif node.drained:
3412
            val = "D"
3413
          elif node.offline:
3414
            val = "O"
3415
          else:
3416
            val = "R"
3417
        else:
3418
          raise errors.ParameterError(field)
3419
        node_output.append(val)
3420
      output.append(node_output)
3421

    
3422
    return output
3423

    
3424

    
3425
class LUQueryNodeVolumes(NoHooksLU):
3426
  """Logical unit for getting volumes on node(s).
3427

3428
  """
3429
  _OP_PARAMS = [
3430
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3431
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3432
    ]
3433
  REQ_BGL = False
3434
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3435
  _FIELDS_STATIC = utils.FieldSet("node")
3436

    
3437
  def CheckArguments(self):
3438
    _CheckOutputFields(static=self._FIELDS_STATIC,
3439
                       dynamic=self._FIELDS_DYNAMIC,
3440
                       selected=self.op.output_fields)
3441

    
3442
  def ExpandNames(self):
3443
    self.needed_locks = {}
3444
    self.share_locks[locking.LEVEL_NODE] = 1
3445
    if not self.op.nodes:
3446
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3447
    else:
3448
      self.needed_locks[locking.LEVEL_NODE] = \
3449
        _GetWantedNodes(self, self.op.nodes)
3450

    
3451
  def Exec(self, feedback_fn):
3452
    """Computes the list of nodes and their attributes.
3453

3454
    """
3455
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3456
    volumes = self.rpc.call_node_volumes(nodenames)
3457

    
3458
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3459
             in self.cfg.GetInstanceList()]
3460

    
3461
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3462

    
3463
    output = []
3464
    for node in nodenames:
3465
      nresult = volumes[node]
3466
      if nresult.offline:
3467
        continue
3468
      msg = nresult.fail_msg
3469
      if msg:
3470
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3471
        continue
3472

    
3473
      node_vols = nresult.payload[:]
3474
      node_vols.sort(key=lambda vol: vol['dev'])
3475

    
3476
      for vol in node_vols:
3477
        node_output = []
3478
        for field in self.op.output_fields:
3479
          if field == "node":
3480
            val = node
3481
          elif field == "phys":
3482
            val = vol['dev']
3483
          elif field == "vg":
3484
            val = vol['vg']
3485
          elif field == "name":
3486
            val = vol['name']
3487
          elif field == "size":
3488
            val = int(float(vol['size']))
3489
          elif field == "instance":
3490
            for inst in ilist:
3491
              if node not in lv_by_node[inst]:
3492
                continue
3493
              if vol['name'] in lv_by_node[inst][node]:
3494
                val = inst.name
3495
                break
3496
            else:
3497
              val = '-'
3498
          else:
3499
            raise errors.ParameterError(field)
3500
          node_output.append(str(val))
3501

    
3502
        output.append(node_output)
3503

    
3504
    return output
3505

    
3506

    
3507
class LUQueryNodeStorage(NoHooksLU):
3508
  """Logical unit for getting information on storage units on node(s).
3509

3510
  """
3511
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3512
  _OP_PARAMS = [
3513
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3514
    ("storage_type", _NoDefault, _CheckStorageType),
3515
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3516
    ("name", None, _TMaybeString),
3517
    ]
3518
  REQ_BGL = False
3519

    
3520
  def CheckArguments(self):
3521
    _CheckOutputFields(static=self._FIELDS_STATIC,
3522
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3523
                       selected=self.op.output_fields)
3524

    
3525
  def ExpandNames(self):
3526
    self.needed_locks = {}
3527
    self.share_locks[locking.LEVEL_NODE] = 1
3528

    
3529
    if self.op.nodes:
3530
      self.needed_locks[locking.LEVEL_NODE] = \
3531
        _GetWantedNodes(self, self.op.nodes)
3532
    else:
3533
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3534

    
3535
  def Exec(self, feedback_fn):
3536
    """Computes the list of nodes and their attributes.
3537

3538
    """
3539
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3540

    
3541
    # Always get name to sort by
3542
    if constants.SF_NAME in self.op.output_fields:
3543
      fields = self.op.output_fields[:]
3544
    else:
3545
      fields = [constants.SF_NAME] + self.op.output_fields
3546

    
3547
    # Never ask for node or type as it's only known to the LU
3548
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3549
      while extra in fields:
3550
        fields.remove(extra)
3551

    
3552
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3553
    name_idx = field_idx[constants.SF_NAME]
3554

    
3555
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3556
    data = self.rpc.call_storage_list(self.nodes,
3557
                                      self.op.storage_type, st_args,
3558
                                      self.op.name, fields)
3559

    
3560
    result = []
3561

    
3562
    for node in utils.NiceSort(self.nodes):
3563
      nresult = data[node]
3564
      if nresult.offline:
3565
        continue
3566

    
3567
      msg = nresult.fail_msg
3568
      if msg:
3569
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3570
        continue
3571

    
3572
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3573

    
3574
      for name in utils.NiceSort(rows.keys()):
3575
        row = rows[name]
3576

    
3577
        out = []
3578

    
3579
        for field in self.op.output_fields:
3580
          if field == constants.SF_NODE:
3581
            val = node
3582
          elif field == constants.SF_TYPE:
3583
            val = self.op.storage_type
3584
          elif field in field_idx:
3585
            val = row[field_idx[field]]
3586
          else:
3587
            raise errors.ParameterError(field)
3588

    
3589
          out.append(val)
3590

    
3591
        result.append(out)
3592

    
3593
    return result
3594

    
3595

    
3596
class LUModifyNodeStorage(NoHooksLU):
3597
  """Logical unit for modifying a storage volume on a node.
3598

3599
  """
3600
  _OP_PARAMS = [
3601
    _PNodeName,
3602
    ("storage_type", _NoDefault, _CheckStorageType),
3603
    ("name", _NoDefault, _TNonEmptyString),
3604
    ("changes", _NoDefault, _TDict),
3605
    ]
3606
  REQ_BGL = False
3607

    
3608
  def CheckArguments(self):
3609
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3610

    
3611
    storage_type = self.op.storage_type
3612

    
3613
    try:
3614
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3615
    except KeyError:
3616
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3617
                                 " modified" % storage_type,
3618
                                 errors.ECODE_INVAL)
3619

    
3620
    diff = set(self.op.changes.keys()) - modifiable
3621
    if diff:
3622
      raise errors.OpPrereqError("The following fields can not be modified for"
3623
                                 " storage units of type '%s': %r" %
3624
                                 (storage_type, list(diff)),
3625
                                 errors.ECODE_INVAL)
3626

    
3627
  def ExpandNames(self):
3628
    self.needed_locks = {
3629
      locking.LEVEL_NODE: self.op.node_name,
3630
      }
3631

    
3632
  def Exec(self, feedback_fn):
3633
    """Computes the list of nodes and their attributes.
3634

3635
    """
3636
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3637
    result = self.rpc.call_storage_modify(self.op.node_name,
3638
                                          self.op.storage_type, st_args,
3639
                                          self.op.name, self.op.changes)
3640
    result.Raise("Failed to modify storage unit '%s' on %s" %
3641
                 (self.op.name, self.op.node_name))
3642

    
3643

    
3644
class LUAddNode(LogicalUnit):
3645
  """Logical unit for adding node to the cluster.
3646

3647
  """
3648
  HPATH = "node-add"
3649
  HTYPE = constants.HTYPE_NODE
3650
  _OP_PARAMS = [
3651
    _PNodeName,
3652
    ("primary_ip", None, _NoType),
3653
    ("secondary_ip", None, _TMaybeString),
3654
    ("readd", False, _TBool),
3655
    ]
3656

    
3657
  def CheckArguments(self):
3658
    # validate/normalize the node name
3659
    self.op.node_name = netutils.HostInfo.NormalizeName(self.op.node_name)
3660

    
3661
  def BuildHooksEnv(self):
3662
    """Build hooks env.
3663

3664
    This will run on all nodes before, and on all nodes + the new node after.
3665

3666
    """
3667
    env = {
3668
      "OP_TARGET": self.op.node_name,
3669
      "NODE_NAME": self.op.node_name,
3670
      "NODE_PIP": self.op.primary_ip,
3671
      "NODE_SIP": self.op.secondary_ip,
3672
      }
3673
    nodes_0 = self.cfg.GetNodeList()
3674
    nodes_1 = nodes_0 + [self.op.node_name, ]
3675
    return env, nodes_0, nodes_1
3676

    
3677
  def CheckPrereq(self):
3678
    """Check prerequisites.
3679

3680
    This checks:
3681
     - the new node is not already in the config
3682
     - it is resolvable
3683
     - its parameters (single/dual homed) matches the cluster
3684

3685
    Any errors are signaled by raising errors.OpPrereqError.
3686

3687
    """
3688
    node_name = self.op.node_name
3689
    cfg = self.cfg
3690

    
3691
    dns_data = netutils.GetHostInfo(node_name)
3692

    
3693
    node = dns_data.name
3694
    primary_ip = self.op.primary_ip = dns_data.ip
3695
    if self.op.secondary_ip is None:
3696
      self.op.secondary_ip = primary_ip
3697
    if not netutils.IsValidIP4(self.op.secondary_ip):
3698
      raise errors.OpPrereqError("Invalid secondary IP given",
3699
                                 errors.ECODE_INVAL)
3700
    secondary_ip = self.op.secondary_ip
3701

    
3702
    node_list = cfg.GetNodeList()
3703
    if not self.op.readd and node in node_list:
3704
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3705
                                 node, errors.ECODE_EXISTS)
3706
    elif self.op.readd and node not in node_list:
3707
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3708
                                 errors.ECODE_NOENT)
3709

    
3710
    self.changed_primary_ip = False
3711

    
3712
    for existing_node_name in node_list:
3713
      existing_node = cfg.GetNodeInfo(existing_node_name)
3714

    
3715
      if self.op.readd and node == existing_node_name:
3716
        if existing_node.secondary_ip != secondary_ip:
3717
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3718
                                     " address configuration as before",
3719
                                     errors.ECODE_INVAL)
3720
        if existing_node.primary_ip != primary_ip:
3721
          self.changed_primary_ip = True
3722

    
3723
        continue
3724

    
3725
      if (existing_node.primary_ip == primary_ip or
3726
          existing_node.secondary_ip == primary_ip or
3727
          existing_node.primary_ip == secondary_ip or
3728
          existing_node.secondary_ip == secondary_ip):
3729
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3730
                                   " existing node %s" % existing_node.name,
3731
                                   errors.ECODE_NOTUNIQUE)
3732

    
3733
    # check that the type of the node (single versus dual homed) is the
3734
    # same as for the master
3735
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3736
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3737
    newbie_singlehomed = secondary_ip == primary_ip
3738
    if master_singlehomed != newbie_singlehomed:
3739
      if master_singlehomed:
3740
        raise errors.OpPrereqError("The master has no private ip but the"
3741
                                   " new node has one",
3742
                                   errors.ECODE_INVAL)
3743
      else:
3744
        raise errors.OpPrereqError("The master has a private ip but the"
3745
                                   " new node doesn't have one",
3746
                                   errors.ECODE_INVAL)
3747

    
3748
    # checks reachability
3749
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3750
      raise errors.OpPrereqError("Node not reachable by ping",
3751
                                 errors.ECODE_ENVIRON)
3752

    
3753
    if not newbie_singlehomed:
3754
      # check reachability from my secondary ip to newbie's secondary ip
3755
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3756
                           source=myself.secondary_ip):
3757
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3758
                                   " based ping to noded port",
3759
                                   errors.ECODE_ENVIRON)
3760

    
3761
    if self.op.readd:
3762
      exceptions = [node]
3763
    else:
3764
      exceptions = []
3765

    
3766
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3767

    
3768
    if self.op.readd:
3769
      self.new_node = self.cfg.GetNodeInfo(node)
3770
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3771
    else:
3772
      self.new_node = objects.Node(name=node,
3773
                                   primary_ip=primary_ip,
3774
                                   secondary_ip=secondary_ip,
3775
                                   master_candidate=self.master_candidate,
3776
                                   offline=False, drained=False)
3777

    
3778
  def Exec(self, feedback_fn):
3779
    """Adds the new node to the cluster.
3780

3781
    """
3782
    new_node = self.new_node
3783
    node = new_node.name
3784

    
3785
    # for re-adds, reset the offline/drained/master-candidate flags;
3786
    # we need to reset here, otherwise offline would prevent RPC calls
3787
    # later in the procedure; this also means that if the re-add
3788
    # fails, we are left with a non-offlined, broken node
3789
    if self.op.readd:
3790
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3791
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3792
      # if we demote the node, we do cleanup later in the procedure
3793
      new_node.master_candidate = self.master_candidate
3794
      if self.changed_primary_ip:
3795
        new_node.primary_ip = self.op.primary_ip
3796

    
3797
    # notify the user about any possible mc promotion
3798
    if new_node.master_candidate:
3799
      self.LogInfo("Node will be a master candidate")
3800

    
3801
    # check connectivity
3802
    result = self.rpc.call_version([node])[node]
3803
    result.Raise("Can't get version information from node %s" % node)
3804
    if constants.PROTOCOL_VERSION == result.payload:
3805
      logging.info("Communication to node %s fine, sw version %s match",
3806
                   node, result.payload)
3807
    else:
3808
      raise errors.OpExecError("Version mismatch master version %s,"
3809
                               " node version %s" %
3810
                               (constants.PROTOCOL_VERSION, result.payload))
3811

    
3812
    # setup ssh on node
3813
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3814
      logging.info("Copy ssh key to node %s", node)
3815
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3816
      keyarray = []
3817
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3818
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3819
                  priv_key, pub_key]
3820

    
3821
      for i in keyfiles:
3822
        keyarray.append(utils.ReadFile(i))
3823

    
3824
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3825
                                      keyarray[2], keyarray[3], keyarray[4],
3826
                                      keyarray[5])
3827
      result.Raise("Cannot transfer ssh keys to the new node")
3828

    
3829
    # Add node to our /etc/hosts, and add key to known_hosts
3830
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3831
      # FIXME: this should be done via an rpc call to node daemon
3832
      utils.AddHostToEtcHosts(new_node.name)
3833

    
3834
    if new_node.secondary_ip != new_node.primary_ip:
3835
      result = self.rpc.call_node_has_ip_address(new_node.name,
3836
                                                 new_node.secondary_ip)
3837
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3838
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3839
      if not result.payload:
3840
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3841
                                 " you gave (%s). Please fix and re-run this"
3842
                                 " command." % new_node.secondary_ip)
3843

    
3844
    node_verify_list = [self.cfg.GetMasterNode()]
3845
    node_verify_param = {
3846
      constants.NV_NODELIST: [node],
3847
      # TODO: do a node-net-test as well?
3848
    }
3849

    
3850
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3851
                                       self.cfg.GetClusterName())
3852
    for verifier in node_verify_list:
3853
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3854
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3855
      if nl_payload:
3856
        for failed in nl_payload:
3857
          feedback_fn("ssh/hostname verification failed"
3858
                      " (checking from %s): %s" %
3859
                      (verifier, nl_payload[failed]))
3860
        raise errors.OpExecError("ssh/hostname verification failed.")
3861

    
3862
    if self.op.readd:
3863
      _RedistributeAncillaryFiles(self)
3864
      self.context.ReaddNode(new_node)
3865
      # make sure we redistribute the config
3866
      self.cfg.Update(new_node, feedback_fn)
3867
      # and make sure the new node will not have old files around
3868
      if not new_node.master_candidate:
3869
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3870
        msg = result.fail_msg
3871
        if msg:
3872
          self.LogWarning("Node failed to demote itself from master"
3873
                          " candidate status: %s" % msg)
3874
    else:
3875
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3876
      self.context.AddNode(new_node, self.proc.GetECId())
3877

    
3878

    
3879
class LUSetNodeParams(LogicalUnit):
3880
  """Modifies the parameters of a node.
3881

3882
  """
3883
  HPATH = "node-modify"
3884
  HTYPE = constants.HTYPE_NODE
3885
  _OP_PARAMS = [
3886
    _PNodeName,
3887
    ("master_candidate", None, _TMaybeBool),
3888
    ("offline", None, _TMaybeBool),
3889
    ("drained", None, _TMaybeBool),
3890
    ("auto_promote", False, _TBool),
3891
    _PForce,
3892
    ]
3893
  REQ_BGL = False
3894

    
3895
  def CheckArguments(self):
3896
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3897
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3898
    if all_mods.count(None) == 3:
3899
      raise errors.OpPrereqError("Please pass at least one modification",
3900
                                 errors.ECODE_INVAL)
3901
    if all_mods.count(True) > 1:
3902
      raise errors.OpPrereqError("Can't set the node into more than one"
3903
                                 " state at the same time",
3904
                                 errors.ECODE_INVAL)
3905

    
3906
    # Boolean value that tells us whether we're offlining or draining the node
3907
    self.offline_or_drain = (self.op.offline == True or
3908
                             self.op.drained == True)
3909
    self.deoffline_or_drain = (self.op.offline == False or
3910
                               self.op.drained == False)
3911
    self.might_demote = (self.op.master_candidate == False or
3912
                         self.offline_or_drain)
3913

    
3914
    self.lock_all = self.op.auto_promote and self.might_demote
3915

    
3916

    
3917
  def ExpandNames(self):
3918
    if self.lock_all:
3919
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3920
    else:
3921
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3922

    
3923
  def BuildHooksEnv(self):
3924
    """Build hooks env.
3925

3926
    This runs on the master node.
3927

3928
    """
3929
    env = {
3930
      "OP_TARGET": self.op.node_name,
3931
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3932
      "OFFLINE": str(self.op.offline),
3933
      "DRAINED": str(self.op.drained),
3934
      }
3935
    nl = [self.cfg.GetMasterNode(),
3936
          self.op.node_name]
3937
    return env, nl, nl
3938

    
3939
  def CheckPrereq(self):
3940
    """Check prerequisites.
3941

3942
    This only checks the instance list against the existing names.
3943

3944
    """
3945
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3946

    
3947
    if (self.op.master_candidate is not None or
3948
        self.op.drained is not None or
3949
        self.op.offline is not None):
3950
      # we can't change the master's node flags
3951
      if self.op.node_name == self.cfg.GetMasterNode():
3952
        raise errors.OpPrereqError("The master role can be changed"
3953
                                   " only via masterfailover",
3954
                                   errors.ECODE_INVAL)
3955

    
3956

    
3957
    if node.master_candidate and self.might_demote and not self.lock_all:
3958
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3959
      # check if after removing the current node, we're missing master
3960
      # candidates
3961
      (mc_remaining, mc_should, _) = \
3962
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3963
      if mc_remaining < mc_should:
3964
        raise errors.OpPrereqError("Not enough master candidates, please"
3965
                                   " pass auto_promote to allow promotion",
3966
                                   errors.ECODE_INVAL)
3967

    
3968
    if (self.op.master_candidate == True and
3969
        ((node.offline and not self.op.offline == False) or
3970
         (node.drained and not self.op.drained == False))):
3971
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3972
                                 " to master_candidate" % node.name,
3973
                                 errors.ECODE_INVAL)
3974

    
3975
    # If we're being deofflined/drained, we'll MC ourself if needed
3976
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3977
        self.op.master_candidate == True and not node.master_candidate):
3978
      self.op.master_candidate = _DecideSelfPromotion(self)
3979
      if self.op.master_candidate:
3980
        self.LogInfo("Autopromoting node to master candidate")
3981

    
3982
    return
3983

    
3984
  def Exec(self, feedback_fn):
3985
    """Modifies a node.
3986

3987
    """
3988
    node = self.node
3989

    
3990
    result = []
3991
    changed_mc = False
3992

    
3993
    if self.op.offline is not None:
3994
      node.offline = self.op.offline
3995
      result.append(("offline", str(self.op.offline)))
3996
      if self.op.offline == True:
3997
        if node.master_candidate:
3998
          node.master_candidate = False
3999
          changed_mc = True
4000
          result.append(("master_candidate", "auto-demotion due to offline"))
4001
        if node.drained:
4002
          node.drained = False
4003
          result.append(("drained", "clear drained status due to offline"))
4004

    
4005
    if self.op.master_candidate is not None:
4006
      node.master_candidate = self.op.master_candidate
4007
      changed_mc = True
4008
      result.append(("master_candidate", str(self.op.master_candidate)))
4009
      if self.op.master_candidate == False:
4010
        rrc = self.rpc.call_node_demote_from_mc(node.name)
4011
        msg = rrc.fail_msg
4012
        if msg:
4013
          self.LogWarning("Node failed to demote itself: %s" % msg)
4014

    
4015
    if self.op.drained is not None:
4016
      node.drained = self.op.drained
4017
      result.append(("drained", str(self.op.drained)))
4018
      if self.op.drained == True:
4019
        if node.master_candidate:
4020
          node.master_candidate = False
4021
          changed_mc = True
4022
          result.append(("master_candidate", "auto-demotion due to drain"))
4023
          rrc = self.rpc.call_node_demote_from_mc(node.name)
4024
          msg = rrc.fail_msg
4025
          if msg:
4026
            self.LogWarning("Node failed to demote itself: %s" % msg)
4027
        if node.offline:
4028
          node.offline = False
4029
          result.append(("offline", "clear offline status due to drain"))
4030

    
4031
    # we locked all nodes, we adjust the CP before updating this node
4032
    if self.lock_all:
4033
      _AdjustCandidatePool(self, [node.name])
4034

    
4035
    # this will trigger configuration file update, if needed
4036
    self.cfg.Update(node, feedback_fn)
4037

    
4038
    # this will trigger job queue propagation or cleanup
4039
    if changed_mc:
4040
      self.context.ReaddNode(node)
4041

    
4042
    return result
4043

    
4044

    
4045
class LUPowercycleNode(NoHooksLU):
4046
  """Powercycles a node.
4047

4048
  """
4049
  _OP_PARAMS = [
4050
    _PNodeName,
4051
    _PForce,
4052
    ]
4053
  REQ_BGL = False
4054

    
4055
  def CheckArguments(self):
4056
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4057
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4058
      raise errors.OpPrereqError("The node is the master and the force"
4059
                                 " parameter was not set",
4060
                                 errors.ECODE_INVAL)
4061

    
4062
  def ExpandNames(self):
4063
    """Locking for PowercycleNode.
4064

4065
    This is a last-resort option and shouldn't block on other
4066
    jobs. Therefore, we grab no locks.
4067

4068
    """
4069
    self.needed_locks = {}
4070

    
4071
  def Exec(self, feedback_fn):
4072
    """Reboots a node.
4073

4074
    """
4075
    result = self.rpc.call_node_powercycle(self.op.node_name,
4076
                                           self.cfg.GetHypervisorType())
4077
    result.Raise("Failed to schedule the reboot")
4078
    return result.payload
4079

    
4080

    
4081
class LUQueryClusterInfo(NoHooksLU):
4082
  """Query cluster configuration.
4083

4084
  """
4085
  REQ_BGL = False
4086

    
4087
  def ExpandNames(self):
4088
    self.needed_locks = {}
4089

    
4090
  def Exec(self, feedback_fn):
4091
    """Return cluster config.
4092

4093
    """
4094
    cluster = self.cfg.GetClusterInfo()
4095
    os_hvp = {}
4096

    
4097
    # Filter just for enabled hypervisors
4098
    for os_name, hv_dict in cluster.os_hvp.items():
4099
      os_hvp[os_name] = {}
4100
      for hv_name, hv_params in hv_dict.items():
4101
        if hv_name in cluster.enabled_hypervisors:
4102
          os_hvp[os_name][hv_name] = hv_params
4103

    
4104
    result = {
4105
      "software_version": constants.RELEASE_VERSION,
4106
      "protocol_version": constants.PROTOCOL_VERSION,
4107
      "config_version": constants.CONFIG_VERSION,
4108
      "os_api_version": max(constants.OS_API_VERSIONS),
4109
      "export_version": constants.EXPORT_VERSION,
4110
      "architecture": (platform.architecture()[0], platform.machine()),
4111
      "name": cluster.cluster_name,
4112
      "master": cluster.master_node,
4113
      "default_hypervisor": cluster.enabled_hypervisors[0],
4114
      "enabled_hypervisors": cluster.enabled_hypervisors,
4115
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4116
                        for hypervisor_name in cluster.enabled_hypervisors]),
4117
      "os_hvp": os_hvp,
4118
      "beparams": cluster.beparams,
4119
      "osparams": cluster.osparams,
4120
      "nicparams": cluster.nicparams,
4121
      "candidate_pool_size": cluster.candidate_pool_size,
4122
      "master_netdev": cluster.master_netdev,
4123
      "volume_group_name": cluster.volume_group_name,
4124
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4125
      "file_storage_dir": cluster.file_storage_dir,
4126
      "maintain_node_health": cluster.maintain_node_health,
4127
      "ctime": cluster.ctime,
4128
      "mtime": cluster.mtime,
4129
      "uuid": cluster.uuid,
4130
      "tags": list(cluster.GetTags()),
4131
      "uid_pool": cluster.uid_pool,
4132
      "default_iallocator": cluster.default_iallocator,
4133
      }
4134

    
4135
    return result
4136

    
4137

    
4138
class LUQueryConfigValues(NoHooksLU):
4139
  """Return configuration values.
4140

4141
  """
4142
  _OP_PARAMS = [_POutputFields]
4143
  REQ_BGL = False
4144
  _FIELDS_DYNAMIC = utils.FieldSet()
4145
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4146
                                  "watcher_pause")
4147

    
4148
  def CheckArguments(self):
4149
    _CheckOutputFields(static=self._FIELDS_STATIC,
4150
                       dynamic=self._FIELDS_DYNAMIC,
4151
                       selected=self.op.output_fields)
4152

    
4153
  def ExpandNames(self):
4154
    self.needed_locks = {}
4155

    
4156
  def Exec(self, feedback_fn):
4157
    """Dump a representation of the cluster config to the standard output.
4158

4159
    """
4160
    values = []
4161
    for field in self.op.output_fields:
4162
      if field == "cluster_name":
4163
        entry = self.cfg.GetClusterName()
4164
      elif field == "master_node":
4165
        entry = self.cfg.GetMasterNode()
4166
      elif field == "drain_flag":
4167
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4168
      elif field == "watcher_pause":
4169
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4170
      else:
4171
        raise errors.ParameterError(field)
4172
      values.append(entry)
4173
    return values
4174

    
4175

    
4176
class LUActivateInstanceDisks(NoHooksLU):
4177
  """Bring up an instance's disks.
4178

4179
  """
4180
  _OP_PARAMS = [
4181
    _PInstanceName,
4182
    ("ignore_size", False, _TBool),
4183
    ]
4184
  REQ_BGL = False
4185

    
4186
  def ExpandNames(self):
4187
    self._ExpandAndLockInstance()
4188
    self.needed_locks[locking.LEVEL_NODE] = []
4189
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4190

    
4191
  def DeclareLocks(self, level):
4192
    if level == locking.LEVEL_NODE:
4193
      self._LockInstancesNodes()
4194

    
4195
  def CheckPrereq(self):
4196
    """Check prerequisites.
4197

4198
    This checks that the instance is in the cluster.
4199

4200
    """
4201
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4202
    assert self.instance is not None, \
4203
      "Cannot retrieve locked instance %s" % self.op.instance_name
4204
    _CheckNodeOnline(self, self.instance.primary_node)
4205

    
4206
  def Exec(self, feedback_fn):
4207
    """Activate the disks.
4208

4209
    """
4210
    disks_ok, disks_info = \
4211
              _AssembleInstanceDisks(self, self.instance,
4212
                                     ignore_size=self.op.ignore_size)
4213
    if not disks_ok:
4214
      raise errors.OpExecError("Cannot activate block devices")
4215

    
4216
    return disks_info
4217

    
4218

    
4219
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4220
                           ignore_size=False):
4221
  """Prepare the block devices for an instance.
4222

4223
  This sets up the block devices on all nodes.
4224

4225
  @type lu: L{LogicalUnit}
4226
  @param lu: the logical unit on whose behalf we execute
4227
  @type instance: L{objects.Instance}
4228
  @param instance: the instance for whose disks we assemble
4229
  @type disks: list of L{objects.Disk} or None
4230
  @param disks: which disks to assemble (or all, if None)
4231
  @type ignore_secondaries: boolean
4232
  @param ignore_secondaries: if true, errors on secondary nodes
4233
      won't result in an error return from the function
4234
  @type ignore_size: boolean
4235
  @param ignore_size: if true, the current known size of the disk
4236
      will not be used during the disk activation, useful for cases
4237
      when the size is wrong
4238
  @return: False if the operation failed, otherwise a list of
4239
      (host, instance_visible_name, node_visible_name)
4240
      with the mapping from node devices to instance devices
4241

4242
  """
4243
  device_info = []
4244
  disks_ok = True
4245
  iname = instance.name
4246
  disks = _ExpandCheckDisks(instance, disks)
4247

    
4248
  # With the two passes mechanism we try to reduce the window of
4249
  # opportunity for the race condition of switching DRBD to primary
4250
  # before handshaking occured, but we do not eliminate it
4251

    
4252
  # The proper fix would be to wait (with some limits) until the
4253
  # connection has been made and drbd transitions from WFConnection
4254
  # into any other network-connected state (Connected, SyncTarget,
4255
  # SyncSource, etc.)
4256

    
4257
  # 1st pass, assemble on all nodes in secondary mode
4258
  for inst_disk in disks:
4259
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4260
      if ignore_size:
4261
        node_disk = node_disk.Copy()
4262
        node_disk.UnsetSize()
4263
      lu.cfg.SetDiskID(node_disk, node)
4264
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4265
      msg = result.fail_msg
4266
      if msg:
4267
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4268
                           " (is_primary=False, pass=1): %s",
4269
                           inst_disk.iv_name, node, msg)
4270
        if not ignore_secondaries:
4271
          disks_ok = False
4272

    
4273
  # FIXME: race condition on drbd migration to primary
4274

    
4275
  # 2nd pass, do only the primary node
4276
  for inst_disk in disks:
4277
    dev_path = None
4278

    
4279
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4280
      if node != instance.primary_node:
4281
        continue
4282
      if ignore_size:
4283
        node_disk = node_disk.Copy()
4284
        node_disk.UnsetSize()
4285
      lu.cfg.SetDiskID(node_disk, node)
4286
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4287
      msg = result.fail_msg
4288
      if msg:
4289
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4290
                           " (is_primary=True, pass=2): %s",
4291
                           inst_disk.iv_name, node, msg)
4292
        disks_ok = False
4293
      else:
4294
        dev_path = result.payload
4295

    
4296
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4297

    
4298
  # leave the disks configured for the primary node
4299
  # this is a workaround that would be fixed better by
4300
  # improving the logical/physical id handling
4301
  for disk in disks:
4302
    lu.cfg.SetDiskID(disk, instance.primary_node)
4303

    
4304
  return disks_ok, device_info
4305

    
4306

    
4307
def _StartInstanceDisks(lu, instance, force):
4308
  """Start the disks of an instance.
4309

4310
  """
4311
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4312
                                           ignore_secondaries=force)
4313
  if not disks_ok:
4314
    _ShutdownInstanceDisks(lu, instance)
4315
    if force is not None and not force:
4316
      lu.proc.LogWarning("", hint="If the message above refers to a"
4317
                         " secondary node,"
4318
                         " you can retry the operation using '--force'.")
4319
    raise errors.OpExecError("Disk consistency error")
4320

    
4321

    
4322
class LUDeactivateInstanceDisks(NoHooksLU):
4323
  """Shutdown an instance's disks.
4324

4325
  """
4326
  _OP_PARAMS = [
4327
    _PInstanceName,
4328
    ]
4329
  REQ_BGL = False
4330

    
4331
  def ExpandNames(self):
4332
    self._ExpandAndLockInstance()
4333
    self.needed_locks[locking.LEVEL_NODE] = []
4334
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4335

    
4336
  def DeclareLocks(self, level):
4337
    if level == locking.LEVEL_NODE:
4338
      self._LockInstancesNodes()
4339

    
4340
  def CheckPrereq(self):
4341
    """Check prerequisites.
4342

4343
    This checks that the instance is in the cluster.
4344

4345
    """
4346
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4347
    assert self.instance is not None, \
4348
      "Cannot retrieve locked instance %s" % self.op.instance_name
4349

    
4350
  def Exec(self, feedback_fn):
4351
    """Deactivate the disks
4352

4353
    """
4354
    instance = self.instance
4355
    _SafeShutdownInstanceDisks(self, instance)
4356

    
4357

    
4358
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4359
  """Shutdown block devices of an instance.
4360

4361
  This function checks if an instance is running, before calling
4362
  _ShutdownInstanceDisks.
4363

4364
  """
4365
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4366
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4367

    
4368

    
4369
def _ExpandCheckDisks(instance, disks):
4370
  """Return the instance disks selected by the disks list
4371

4372
  @type disks: list of L{objects.Disk} or None
4373
  @param disks: selected disks
4374
  @rtype: list of L{objects.Disk}
4375
  @return: selected instance disks to act on
4376

4377
  """
4378
  if disks is None:
4379
    return instance.disks
4380
  else:
4381
    if not set(disks).issubset(instance.disks):
4382
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4383
                                   " target instance")
4384
    return disks
4385

    
4386

    
4387
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4388
  """Shutdown block devices of an instance.
4389

4390
  This does the shutdown on all nodes of the instance.
4391

4392
  If the ignore_primary is false, errors on the primary node are
4393
  ignored.
4394

4395
  """
4396
  all_result = True
4397
  disks = _ExpandCheckDisks(instance, disks)
4398

    
4399
  for disk in disks:
4400
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4401
      lu.cfg.SetDiskID(top_disk, node)
4402
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4403
      msg = result.fail_msg
4404
      if msg:
4405
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4406
                      disk.iv_name, node, msg)
4407
        if not ignore_primary or node != instance.primary_node:
4408
          all_result = False
4409
  return all_result
4410

    
4411

    
4412
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4413
  """Checks if a node has enough free memory.
4414

4415
  This function check if a given node has the needed amount of free
4416
  memory. In case the node has less memory or we cannot get the
4417
  information from the node, this function raise an OpPrereqError
4418
  exception.
4419

4420
  @type lu: C{LogicalUnit}
4421
  @param lu: a logical unit from which we get configuration data
4422
  @type node: C{str}
4423
  @param node: the node to check
4424
  @type reason: C{str}
4425
  @param reason: string to use in the error message
4426
  @type requested: C{int}
4427
  @param requested: the amount of memory in MiB to check for
4428
  @type hypervisor_name: C{str}
4429
  @param hypervisor_name: the hypervisor to ask for memory stats
4430
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4431
      we cannot check the node
4432

4433
  """
4434
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4435
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4436
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4437
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4438
  if not isinstance(free_mem, int):
4439
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4440
                               " was '%s'" % (node, free_mem),
4441
                               errors.ECODE_ENVIRON)
4442
  if requested > free_mem:
4443
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4444
                               " needed %s MiB, available %s MiB" %
4445
                               (node, reason, requested, free_mem),
4446
                               errors.ECODE_NORES)
4447

    
4448

    
4449
def _CheckNodesFreeDisk(lu, nodenames, requested):
4450
  """Checks if nodes have enough free disk space in the default VG.
4451

4452
  This function check if all given nodes have the needed amount of
4453
  free disk. In case any node has less disk or we cannot get the
4454
  information from the node, this function raise an OpPrereqError
4455
  exception.
4456

4457
  @type lu: C{LogicalUnit}
4458
  @param lu: a logical unit from which we get configuration data
4459
  @type nodenames: C{list}
4460
  @param nodenames: the list of node names to check
4461
  @type requested: C{int}
4462
  @param requested: the amount of disk in MiB to check for
4463
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4464
      we cannot check the node
4465

4466
  """
4467
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4468
                                   lu.cfg.GetHypervisorType())
4469
  for node in nodenames:
4470
    info = nodeinfo[node]
4471
    info.Raise("Cannot get current information from node %s" % node,
4472
               prereq=True, ecode=errors.ECODE_ENVIRON)
4473
    vg_free = info.payload.get("vg_free", None)
4474
    if not isinstance(vg_free, int):
4475
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4476
                                 " result was '%s'" % (node, vg_free),
4477
                                 errors.ECODE_ENVIRON)
4478
    if requested > vg_free:
4479
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4480
                                 " required %d MiB, available %d MiB" %
4481
                                 (node, requested, vg_free),
4482
                                 errors.ECODE_NORES)
4483

    
4484

    
4485
class LUStartupInstance(LogicalUnit):
4486
  """Starts an instance.
4487

4488
  """
4489
  HPATH = "instance-start"
4490
  HTYPE = constants.HTYPE_INSTANCE
4491
  _OP_PARAMS = [
4492
    _PInstanceName,
4493
    _PForce,
4494
    ("hvparams", _EmptyDict, _TDict),
4495
    ("beparams", _EmptyDict, _TDict),
4496
    ]
4497
  REQ_BGL = False
4498

    
4499
  def CheckArguments(self):
4500
    # extra beparams
4501
    if self.op.beparams:
4502
      # fill the beparams dict
4503
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4504

    
4505
  def ExpandNames(self):
4506
    self._ExpandAndLockInstance()
4507

    
4508
  def BuildHooksEnv(self):
4509
    """Build hooks env.
4510

4511
    This runs on master, primary and secondary nodes of the instance.
4512

4513
    """
4514
    env = {
4515
      "FORCE": self.op.force,
4516
      }
4517
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4518
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4519
    return env, nl, nl
4520

    
4521
  def CheckPrereq(self):
4522
    """Check prerequisites.
4523

4524
    This checks that the instance is in the cluster.
4525

4526
    """
4527
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4528
    assert self.instance is not None, \
4529
      "Cannot retrieve locked instance %s" % self.op.instance_name
4530

    
4531
    # extra hvparams
4532
    if self.op.hvparams:
4533
      # check hypervisor parameter syntax (locally)
4534
      cluster = self.cfg.GetClusterInfo()
4535
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4536
      filled_hvp = cluster.FillHV(instance)
4537
      filled_hvp.update(self.op.hvparams)
4538
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4539
      hv_type.CheckParameterSyntax(filled_hvp)
4540
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4541

    
4542
    _CheckNodeOnline(self, instance.primary_node)
4543

    
4544
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4545
    # check bridges existence
4546
    _CheckInstanceBridgesExist(self, instance)
4547

    
4548
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4549
                                              instance.name,
4550
                                              instance.hypervisor)
4551
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4552
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4553
    if not remote_info.payload: # not running already
4554
      _CheckNodeFreeMemory(self, instance.primary_node,
4555
                           "starting instance %s" % instance.name,
4556
                           bep[constants.BE_MEMORY], instance.hypervisor)
4557

    
4558
  def Exec(self, feedback_fn):
4559
    """Start the instance.
4560

4561
    """
4562
    instance = self.instance
4563
    force = self.op.force
4564

    
4565
    self.cfg.MarkInstanceUp(instance.name)
4566

    
4567
    node_current = instance.primary_node
4568

    
4569
    _StartInstanceDisks(self, instance, force)
4570

    
4571
    result = self.rpc.call_instance_start(node_current, instance,
4572
                                          self.op.hvparams, self.op.beparams)
4573
    msg = result.fail_msg
4574
    if msg:
4575
      _ShutdownInstanceDisks(self, instance)
4576
      raise errors.OpExecError("Could not start instance: %s" % msg)
4577

    
4578

    
4579
class LURebootInstance(LogicalUnit):
4580
  """Reboot an instance.
4581

4582
  """
4583
  HPATH = "instance-reboot"
4584
  HTYPE = constants.HTYPE_INSTANCE
4585
  _OP_PARAMS = [
4586
    _PInstanceName,
4587
    ("ignore_secondaries", False, _TBool),
4588
    ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4589
    _PShutdownTimeout,
4590
    ]
4591
  REQ_BGL = False
4592

    
4593
  def ExpandNames(self):
4594
    self._ExpandAndLockInstance()
4595

    
4596
  def BuildHooksEnv(self):
4597
    """Build hooks env.
4598

4599
    This runs on master, primary and secondary nodes of the instance.
4600

4601
    """
4602
    env = {
4603
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4604
      "REBOOT_TYPE": self.op.reboot_type,
4605
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4606
      }
4607
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4608
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4609
    return env, nl, nl
4610

    
4611
  def CheckPrereq(self):
4612
    """Check prerequisites.
4613

4614
    This checks that the instance is in the cluster.
4615

4616
    """
4617
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4618
    assert self.instance is not None, \
4619
      "Cannot retrieve locked instance %s" % self.op.instance_name
4620

    
4621
    _CheckNodeOnline(self, instance.primary_node)
4622

    
4623
    # check bridges existence
4624
    _CheckInstanceBridgesExist(self, instance)
4625

    
4626
  def Exec(self, feedback_fn):
4627
    """Reboot the instance.
4628

4629
    """
4630
    instance = self.instance
4631
    ignore_secondaries = self.op.ignore_secondaries
4632
    reboot_type = self.op.reboot_type
4633

    
4634
    node_current = instance.primary_node
4635

    
4636
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4637
                       constants.INSTANCE_REBOOT_HARD]:
4638
      for disk in instance.disks:
4639
        self.cfg.SetDiskID(disk, node_current)
4640
      result = self.rpc.call_instance_reboot(node_current, instance,
4641
                                             reboot_type,
4642
                                             self.op.shutdown_timeout)
4643
      result.Raise("Could not reboot instance")
4644
    else:
4645
      result = self.rpc.call_instance_shutdown(node_current, instance,
4646
                                               self.op.shutdown_timeout)
4647
      result.Raise("Could not shutdown instance for full reboot")
4648
      _ShutdownInstanceDisks(self, instance)
4649
      _StartInstanceDisks(self, instance, ignore_secondaries)
4650
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4651
      msg = result.fail_msg
4652
      if msg:
4653
        _ShutdownInstanceDisks(self, instance)
4654
        raise errors.OpExecError("Could not start instance for"
4655
                                 " full reboot: %s" % msg)
4656

    
4657
    self.cfg.MarkInstanceUp(instance.name)
4658

    
4659

    
4660
class LUShutdownInstance(LogicalUnit):
4661
  """Shutdown an instance.
4662

4663
  """
4664
  HPATH = "instance-stop"
4665
  HTYPE = constants.HTYPE_INSTANCE
4666
  _OP_PARAMS = [
4667
    _PInstanceName,
4668
    ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4669
    ]
4670
  REQ_BGL = False
4671

    
4672
  def ExpandNames(self):
4673
    self._ExpandAndLockInstance()
4674

    
4675
  def BuildHooksEnv(self):
4676
    """Build hooks env.
4677

4678
    This runs on master, primary and secondary nodes of the instance.
4679

4680
    """
4681
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4682
    env["TIMEOUT"] = self.op.timeout
4683
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4684
    return env, nl, nl
4685

    
4686
  def CheckPrereq(self):
4687
    """Check prerequisites.
4688

4689
    This checks that the instance is in the cluster.
4690

4691
    """
4692
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4693
    assert self.instance is not None, \
4694
      "Cannot retrieve locked instance %s" % self.op.instance_name
4695
    _CheckNodeOnline(self, self.instance.primary_node)
4696

    
4697
  def Exec(self, feedback_fn):
4698
    """Shutdown the instance.
4699

4700
    """
4701
    instance = self.instance
4702
    node_current = instance.primary_node
4703
    timeout = self.op.timeout
4704
    self.cfg.MarkInstanceDown(instance.name)
4705
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4706
    msg = result.fail_msg
4707
    if msg:
4708
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4709

    
4710
    _ShutdownInstanceDisks(self, instance)
4711

    
4712

    
4713
class LUReinstallInstance(LogicalUnit):
4714
  """Reinstall an instance.
4715

4716
  """
4717
  HPATH = "instance-reinstall"
4718
  HTYPE = constants.HTYPE_INSTANCE
4719
  _OP_PARAMS = [
4720
    _PInstanceName,
4721
    ("os_type", None, _TMaybeString),
4722
    ("force_variant", False, _TBool),
4723
    ]
4724
  REQ_BGL = False
4725

    
4726
  def ExpandNames(self):
4727
    self._ExpandAndLockInstance()
4728

    
4729
  def BuildHooksEnv(self):
4730
    """Build hooks env.
4731

4732
    This runs on master, primary and secondary nodes of the instance.
4733

4734
    """
4735
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4736
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4737
    return env, nl, nl
4738

    
4739
  def CheckPrereq(self):
4740
    """Check prerequisites.
4741

4742
    This checks that the instance is in the cluster and is not running.
4743

4744
    """
4745
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4746
    assert instance is not None, \
4747
      "Cannot retrieve locked instance %s" % self.op.instance_name
4748
    _CheckNodeOnline(self, instance.primary_node)
4749

    
4750
    if instance.disk_template == constants.DT_DISKLESS:
4751
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4752
                                 self.op.instance_name,
4753
                                 errors.ECODE_INVAL)
4754
    _CheckInstanceDown(self, instance, "cannot reinstall")
4755

    
4756
    if self.op.os_type is not None:
4757
      # OS verification
4758
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4759
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4760

    
4761
    self.instance = instance
4762

    
4763
  def Exec(self, feedback_fn):
4764
    """Reinstall the instance.
4765

4766
    """
4767
    inst = self.instance
4768

    
4769
    if self.op.os_type is not None:
4770
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4771
      inst.os = self.op.os_type
4772
      self.cfg.Update(inst, feedback_fn)
4773

    
4774
    _StartInstanceDisks(self, inst, None)
4775
    try:
4776
      feedback_fn("Running the instance OS create scripts...")
4777
      # FIXME: pass debug option from opcode to backend
4778
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4779
                                             self.op.debug_level)
4780
      result.Raise("Could not install OS for instance %s on node %s" %
4781
                   (inst.name, inst.primary_node))
4782
    finally:
4783
      _ShutdownInstanceDisks(self, inst)
4784

    
4785

    
4786
class LURecreateInstanceDisks(LogicalUnit):
4787
  """Recreate an instance's missing disks.
4788

4789
  """
4790
  HPATH = "instance-recreate-disks"
4791
  HTYPE = constants.HTYPE_INSTANCE
4792
  _OP_PARAMS = [
4793
    _PInstanceName,
4794
    ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4795
    ]
4796
  REQ_BGL = False
4797

    
4798
  def ExpandNames(self):
4799
    self._ExpandAndLockInstance()
4800

    
4801
  def BuildHooksEnv(self):
4802
    """Build hooks env.
4803

4804
    This runs on master, primary and secondary nodes of the instance.
4805

4806
    """
4807
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4808
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4809
    return env, nl, nl
4810

    
4811
  def CheckPrereq(self):
4812
    """Check prerequisites.
4813

4814
    This checks that the instance is in the cluster and is not running.
4815

4816
    """
4817
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4818
    assert instance is not None, \
4819
      "Cannot retrieve locked instance %s" % self.op.instance_name
4820
    _CheckNodeOnline(self, instance.primary_node)
4821

    
4822
    if instance.disk_template == constants.DT_DISKLESS:
4823
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4824
                                 self.op.instance_name, errors.ECODE_INVAL)
4825
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4826

    
4827
    if not self.op.disks:
4828
      self.op.disks = range(len(instance.disks))
4829
    else:
4830
      for idx in self.op.disks:
4831
        if idx >= len(instance.disks):
4832
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4833
                                     errors.ECODE_INVAL)
4834

    
4835
    self.instance = instance
4836

    
4837
  def Exec(self, feedback_fn):
4838
    """Recreate the disks.
4839

4840
    """
4841
    to_skip = []
4842
    for idx, _ in enumerate(self.instance.disks):
4843
      if idx not in self.op.disks: # disk idx has not been passed in
4844
        to_skip.append(idx)
4845
        continue
4846

    
4847
    _CreateDisks(self, self.instance, to_skip=to_skip)
4848

    
4849

    
4850
class LURenameInstance(LogicalUnit):
4851
  """Rename an instance.
4852

4853
  """
4854
  HPATH = "instance-rename"
4855
  HTYPE = constants.HTYPE_INSTANCE
4856
  _OP_PARAMS = [
4857
    _PInstanceName,
4858
    ("new_name", _NoDefault, _TNonEmptyString),
4859
    ("ignore_ip", False, _TBool),
4860
    ("check_name", True, _TBool),
4861
    ]
4862

    
4863
  def BuildHooksEnv(self):
4864
    """Build hooks env.
4865

4866
    This runs on master, primary and secondary nodes of the instance.
4867

4868
    """
4869
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4870
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4871
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4872
    return env, nl, nl
4873

    
4874
  def CheckPrereq(self):
4875
    """Check prerequisites.
4876

4877
    This checks that the instance is in the cluster and is not running.
4878

4879
    """
4880
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4881
                                                self.op.instance_name)
4882
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4883
    assert instance is not None
4884
    _CheckNodeOnline(self, instance.primary_node)
4885
    _CheckInstanceDown(self, instance, "cannot rename")
4886
    self.instance = instance
4887

    
4888
    # new name verification
4889
    if self.op.check_name:
4890
      name_info = netutils.GetHostInfo(self.op.new_name)
4891
      self.op.new_name = name_info.name
4892

    
4893
    new_name = self.op.new_name
4894

    
4895
    instance_list = self.cfg.GetInstanceList()
4896
    if new_name in instance_list:
4897
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4898
                                 new_name, errors.ECODE_EXISTS)
4899

    
4900
    if not self.op.ignore_ip:
4901
      if netutils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4902
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4903
                                   (name_info.ip, new_name),
4904
                                   errors.ECODE_NOTUNIQUE)
4905

    
4906
  def Exec(self, feedback_fn):
4907
    """Reinstall the instance.
4908

4909
    """
4910
    inst = self.instance
4911
    old_name = inst.name
4912

    
4913
    if inst.disk_template == constants.DT_FILE:
4914
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4915

    
4916
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4917
    # Change the instance lock. This is definitely safe while we hold the BGL
4918
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4919
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4920

    
4921
    # re-read the instance from the configuration after rename
4922
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4923

    
4924
    if inst.disk_template == constants.DT_FILE:
4925
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4926
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4927
                                                     old_file_storage_dir,
4928
                                                     new_file_storage_dir)
4929
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4930
                   " (but the instance has been renamed in Ganeti)" %
4931
                   (inst.primary_node, old_file_storage_dir,
4932
                    new_file_storage_dir))
4933

    
4934
    _StartInstanceDisks(self, inst, None)
4935
    try:
4936
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4937
                                                 old_name, self.op.debug_level)
4938
      msg = result.fail_msg
4939
      if msg:
4940
        msg = ("Could not run OS rename script for instance %s on node %s"
4941
               " (but the instance has been renamed in Ganeti): %s" %
4942
               (inst.name, inst.primary_node, msg))
4943
        self.proc.LogWarning(msg)
4944
    finally:
4945
      _ShutdownInstanceDisks(self, inst)
4946

    
4947

    
4948
class LURemoveInstance(LogicalUnit):
4949
  """Remove an instance.
4950

4951
  """
4952
  HPATH = "instance-remove"
4953
  HTYPE = constants.HTYPE_INSTANCE
4954
  _OP_PARAMS = [
4955
    _PInstanceName,
4956
    ("ignore_failures", False, _TBool),
4957
    _PShutdownTimeout,
4958
    ]
4959
  REQ_BGL = False
4960

    
4961
  def ExpandNames(self):
4962
    self._ExpandAndLockInstance()
4963
    self.needed_locks[locking.LEVEL_NODE] = []
4964
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4965

    
4966
  def DeclareLocks(self, level):
4967
    if level == locking.LEVEL_NODE:
4968
      self._LockInstancesNodes()
4969

    
4970
  def BuildHooksEnv(self):
4971
    """Build hooks env.
4972

4973
    This runs on master, primary and secondary nodes of the instance.
4974

4975
    """
4976
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4977
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4978
    nl = [self.cfg.GetMasterNode()]
4979
    nl_post = list(self.instance.all_nodes) + nl
4980
    return env, nl, nl_post
4981

    
4982
  def CheckPrereq(self):
4983
    """Check prerequisites.
4984

4985
    This checks that the instance is in the cluster.
4986

4987
    """
4988
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4989
    assert self.instance is not None, \
4990
      "Cannot retrieve locked instance %s" % self.op.instance_name
4991

    
4992
  def Exec(self, feedback_fn):
4993
    """Remove the instance.
4994

4995
    """
4996
    instance = self.instance
4997
    logging.info("Shutting down instance %s on node %s",
4998
                 instance.name, instance.primary_node)
4999

    
5000
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5001
                                             self.op.shutdown_timeout)
5002
    msg = result.fail_msg
5003
    if msg:
5004
      if self.op.ignore_failures:
5005
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5006
      else:
5007
        raise errors.OpExecError("Could not shutdown instance %s on"
5008
                                 " node %s: %s" %
5009
                                 (instance.name, instance.primary_node, msg))
5010

    
5011
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5012

    
5013

    
5014
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5015
  """Utility function to remove an instance.
5016

5017
  """
5018
  logging.info("Removing block devices for instance %s", instance.name)
5019

    
5020
  if not _RemoveDisks(lu, instance):
5021
    if not ignore_failures:
5022
      raise errors.OpExecError("Can't remove instance's disks")
5023
    feedback_fn("Warning: can't remove instance's disks")
5024

    
5025
  logging.info("Removing instance %s out of cluster config", instance.name)
5026

    
5027
  lu.cfg.RemoveInstance(instance.name)
5028

    
5029
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5030
    "Instance lock removal conflict"
5031

    
5032
  # Remove lock for the instance
5033
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5034

    
5035

    
5036
class LUQueryInstances(NoHooksLU):
5037
  """Logical unit for querying instances.
5038

5039
  """
5040
  # pylint: disable-msg=W0142
5041
  _OP_PARAMS = [
5042
    ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5043
    ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5044
    ("use_locking", False, _TBool),
5045
    ]
5046
  REQ_BGL = False
5047
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5048
                    "serial_no", "ctime", "mtime", "uuid"]
5049
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5050
                                    "admin_state",
5051
                                    "disk_template", "ip", "mac", "bridge",
5052
                                    "nic_mode", "nic_link",
5053
                                    "sda_size", "sdb_size", "vcpus", "tags",
5054
                                    "network_port", "beparams",
5055
                                    r"(disk)\.(size)/([0-9]+)",
5056
                                    r"(disk)\.(sizes)", "disk_usage",
5057
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5058
                                    r"(nic)\.(bridge)/([0-9]+)",
5059
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
5060
                                    r"(disk|nic)\.(count)",
5061
                                    "hvparams",
5062
                                    ] + _SIMPLE_FIELDS +
5063
                                  ["hv/%s" % name
5064
                                   for name in constants.HVS_PARAMETERS
5065
                                   if name not in constants.HVC_GLOBALS] +
5066
                                  ["be/%s" % name
5067
                                   for name in constants.BES_PARAMETERS])
5068
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5069
                                   "oper_ram",
5070
                                   "oper_vcpus",
5071
                                   "status")
5072

    
5073

    
5074
  def CheckArguments(self):
5075
    _CheckOutputFields(static=self._FIELDS_STATIC,
5076
                       dynamic=self._FIELDS_DYNAMIC,
5077
                       selected=self.op.output_fields)
5078

    
5079
  def ExpandNames(self):
5080
    self.needed_locks = {}
5081
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5082
    self.share_locks[locking.LEVEL_NODE] = 1
5083

    
5084
    if self.op.names:
5085
      self.wanted = _GetWantedInstances(self, self.op.names)
5086
    else:
5087
      self.wanted = locking.ALL_SET
5088

    
5089
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5090
    self.do_locking = self.do_node_query and self.op.use_locking
5091
    if self.do_locking:
5092
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5093
      self.needed_locks[locking.LEVEL_NODE] = []
5094
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5095

    
5096
  def DeclareLocks(self, level):
5097
    if level == locking.LEVEL_NODE and self.do_locking:
5098
      self._LockInstancesNodes()
5099

    
5100
  def Exec(self, feedback_fn):
5101
    """Computes the list of nodes and their attributes.
5102

5103
    """
5104
    # pylint: disable-msg=R0912
5105
    # way too many branches here
5106
    all_info = self.cfg.GetAllInstancesInfo()
5107
    if self.wanted == locking.ALL_SET:
5108
      # caller didn't specify instance names, so ordering is not important
5109
      if self.do_locking:
5110
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5111
      else:
5112
        instance_names = all_info.keys()
5113
      instance_names = utils.NiceSort(instance_names)
5114
    else:
5115
      # caller did specify names, so we must keep the ordering
5116
      if self.do_locking:
5117
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5118
      else:
5119
        tgt_set = all_info.keys()
5120
      missing = set(self.wanted).difference(tgt_set)
5121
      if missing:
5122
        raise errors.OpExecError("Some instances were removed before"
5123
                                 " retrieving their data: %s" % missing)
5124
      instance_names = self.wanted
5125

    
5126
    instance_list = [all_info[iname] for iname in instance_names]
5127

    
5128
    # begin data gathering
5129

    
5130
    nodes = frozenset([inst.primary_node for inst in instance_list])
5131
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
5132

    
5133
    bad_nodes = []
5134
    off_nodes = []
5135
    if self.do_node_query:
5136
      live_data = {}
5137
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5138
      for name in nodes:
5139
        result = node_data[name]
5140
        if result.offline:
5141
          # offline nodes will be in both lists
5142
          off_nodes.append(name)
5143
        if result.fail_msg:
5144
          bad_nodes.append(name)
5145
        else:
5146
          if result.payload:
5147
            live_data.update(result.payload)
5148
          # else no instance is alive
5149
    else:
5150
      live_data = dict([(name, {}) for name in instance_names])
5151

    
5152
    # end data gathering
5153

    
5154
    HVPREFIX = "hv/"
5155
    BEPREFIX = "be/"
5156
    output = []
5157
    cluster = self.cfg.GetClusterInfo()
5158
    for instance in instance_list:
5159
      iout = []
5160
      i_hv = cluster.FillHV(instance, skip_globals=True)
5161
      i_be = cluster.FillBE(instance)
5162
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5163
      for field in self.op.output_fields:
5164
        st_match = self._FIELDS_STATIC.Matches(field)
5165
        if field in self._SIMPLE_FIELDS:
5166
          val = getattr(instance, field)
5167
        elif field == "pnode":
5168
          val = instance.primary_node
5169
        elif field == "snodes":
5170
          val = list(instance.secondary_nodes)
5171
        elif field == "admin_state":
5172
          val = instance.admin_up
5173
        elif field == "oper_state":
5174
          if instance.primary_node in bad_nodes:
5175
            val = None
5176
          else:
5177
            val = bool(live_data.get(instance.name))
5178
        elif field == "status":
5179
          if instance.primary_node in off_nodes:
5180
            val = "ERROR_nodeoffline"
5181
          elif instance.primary_node in bad_nodes:
5182
            val = "ERROR_nodedown"
5183
          else:
5184
            running = bool(live_data.get(instance.name))
5185
            if running:
5186
              if instance.admin_up:
5187
                val = "running"
5188
              else:
5189
                val = "ERROR_up"
5190
            else:
5191
              if instance.admin_up:
5192
                val = "ERROR_down"
5193
              else:
5194
                val = "ADMIN_down"
5195
        elif field == "oper_ram":
5196
          if instance.primary_node in bad_nodes:
5197
            val = None
5198
          elif instance.name in live_data:
5199
            val = live_data[instance.name].get("memory", "?")
5200
          else:
5201
            val = "-"
5202
        elif field == "oper_vcpus":
5203
          if instance.primary_node in bad_nodes:
5204
            val = None
5205
          elif instance.name in live_data:
5206
            val = live_data[instance.name].get("vcpus", "?")
5207
          else:
5208
            val = "-"
5209
        elif field == "vcpus":
5210
          val = i_be[constants.BE_VCPUS]
5211
        elif field == "disk_template":
5212
          val = instance.disk_template
5213
        elif field == "ip":
5214
          if instance.nics:
5215
            val = instance.nics[0].ip
5216
          else:
5217
            val = None
5218
        elif field == "nic_mode":
5219
          if instance.nics:
5220
            val = i_nicp[0][constants.NIC_MODE]
5221
          else:
5222
            val = None
5223
        elif field == "nic_link":
5224
          if instance.nics:
5225
            val = i_nicp[0][constants.NIC_LINK]
5226
          else:
5227
            val = None
5228
        elif field == "bridge":
5229
          if (instance.nics and
5230
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5231
            val = i_nicp[0][constants.NIC_LINK]
5232
          else:
5233
            val = None
5234
        elif field == "mac":
5235
          if instance.nics:
5236
            val = instance.nics[0].mac
5237
          else:
5238
            val = None
5239
        elif field == "sda_size" or field == "sdb_size":
5240
          idx = ord(field[2]) - ord('a')
5241
          try:
5242
            val = instance.FindDisk(idx).size
5243
          except errors.OpPrereqError:
5244
            val = None
5245
        elif field == "disk_usage": # total disk usage per node
5246
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5247
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5248
        elif field == "tags":
5249
          val = list(instance.GetTags())
5250
        elif field == "hvparams":
5251
          val = i_hv
5252
        elif (field.startswith(HVPREFIX) and
5253
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5254
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5255
          val = i_hv.get(field[len(HVPREFIX):], None)
5256
        elif field == "beparams":
5257
          val = i_be
5258
        elif (field.startswith(BEPREFIX) and
5259
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5260
          val = i_be.get(field[len(BEPREFIX):], None)
5261
        elif st_match and st_match.groups():
5262
          # matches a variable list
5263
          st_groups = st_match.groups()
5264
          if st_groups and st_groups[0] == "disk":
5265
            if st_groups[1] == "count":
5266
              val = len(instance.disks)
5267
            elif st_groups[1] == "sizes":
5268
              val = [disk.size for disk in instance.disks]
5269
            elif st_groups[1] == "size":
5270
              try:
5271
                val = instance.FindDisk(st_groups[2]).size
5272
              except errors.OpPrereqError:
5273
                val = None
5274
            else:
5275
              assert False, "Unhandled disk parameter"
5276
          elif st_groups[0] == "nic":
5277
            if st_groups[1] == "count":
5278
              val = len(instance.nics)
5279
            elif st_groups[1] == "macs":
5280
              val = [nic.mac for nic in instance.nics]
5281
            elif st_groups[1] == "ips":
5282
              val = [nic.ip for nic in instance.nics]
5283
            elif st_groups[1] == "modes":
5284
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5285
            elif st_groups[1] == "links":
5286
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5287
            elif st_groups[1] == "bridges":
5288
              val = []
5289
              for nicp in i_nicp:
5290
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5291
                  val.append(nicp[constants.NIC_LINK])
5292
                else:
5293
                  val.append(None)
5294
            else:
5295
              # index-based item
5296
              nic_idx = int(st_groups[2])
5297
              if nic_idx >= len(instance.nics):
5298
                val = None
5299
              else:
5300
                if st_groups[1] == "mac":
5301
                  val = instance.nics[nic_idx].mac
5302
                elif st_groups[1] == "ip":
5303
                  val = instance.nics[nic_idx].ip
5304
                elif st_groups[1] == "mode":
5305
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5306
                elif st_groups[1] == "link":
5307
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5308
                elif st_groups[1] == "bridge":
5309
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5310
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5311
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5312
                  else:
5313
                    val = None
5314
                else:
5315
                  assert False, "Unhandled NIC parameter"
5316
          else:
5317
            assert False, ("Declared but unhandled variable parameter '%s'" %
5318
                           field)
5319
        else:
5320
          assert False, "Declared but unhandled parameter '%s'" % field
5321
        iout.append(val)
5322
      output.append(iout)
5323

    
5324
    return output
5325

    
5326

    
5327
class LUFailoverInstance(LogicalUnit):
5328
  """Failover an instance.
5329

5330
  """
5331
  HPATH = "instance-failover"
5332
  HTYPE = constants.HTYPE_INSTANCE
5333
  _OP_PARAMS = [
5334
    _PInstanceName,
5335
    ("ignore_consistency", False, _TBool),
5336
    _PShutdownTimeout,
5337
    ]
5338
  REQ_BGL = False
5339

    
5340
  def ExpandNames(self):
5341
    self._ExpandAndLockInstance()
5342
    self.needed_locks[locking.LEVEL_NODE] = []
5343
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5344

    
5345
  def DeclareLocks(self, level):
5346
    if level == locking.LEVEL_NODE:
5347
      self._LockInstancesNodes()
5348

    
5349
  def BuildHooksEnv(self):
5350
    """Build hooks env.
5351

5352
    This runs on master, primary and secondary nodes of the instance.
5353

5354
    """
5355
    instance = self.instance
5356
    source_node = instance.primary_node
5357
    target_node = instance.secondary_nodes[0]
5358
    env = {
5359
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5360
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5361
      "OLD_PRIMARY": source_node,
5362
      "OLD_SECONDARY": target_node,
5363
      "NEW_PRIMARY": target_node,
5364
      "NEW_SECONDARY": source_node,
5365
      }
5366
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5367
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5368
    nl_post = list(nl)
5369
    nl_post.append(source_node)
5370
    return env, nl, nl_post
5371

    
5372
  def CheckPrereq(self):
5373
    """Check prerequisites.
5374

5375
    This checks that the instance is in the cluster.
5376

5377
    """
5378
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5379
    assert self.instance is not None, \
5380
      "Cannot retrieve locked instance %s" % self.op.instance_name
5381

    
5382
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5383
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5384
      raise errors.OpPrereqError("Instance's disk layout is not"
5385
                                 " network mirrored, cannot failover.",
5386
                                 errors.ECODE_STATE)
5387

    
5388
    secondary_nodes = instance.secondary_nodes
5389
    if not secondary_nodes:
5390
      raise errors.ProgrammerError("no secondary node but using "
5391
                                   "a mirrored disk template")
5392

    
5393
    target_node = secondary_nodes[0]
5394
    _CheckNodeOnline(self, target_node)
5395
    _CheckNodeNotDrained(self, target_node)
5396
    if instance.admin_up:
5397
      # check memory requirements on the secondary node
5398
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5399
                           instance.name, bep[constants.BE_MEMORY],
5400
                           instance.hypervisor)
5401
    else:
5402
      self.LogInfo("Not checking memory on the secondary node as"
5403
                   " instance will not be started")
5404

    
5405
    # check bridge existance
5406
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5407

    
5408
  def Exec(self, feedback_fn):
5409
    """Failover an instance.
5410

5411
    The failover is done by shutting it down on its present node and
5412
    starting it on the secondary.
5413

5414
    """
5415
    instance = self.instance
5416

    
5417
    source_node = instance.primary_node
5418
    target_node = instance.secondary_nodes[0]
5419

    
5420
    if instance.admin_up:
5421
      feedback_fn("* checking disk consistency between source and target")
5422
      for dev in instance.disks:
5423
        # for drbd, these are drbd over lvm
5424
        if not _CheckDiskConsistency(self, dev, target_node, False):
5425
          if not self.op.ignore_consistency:
5426
            raise errors.OpExecError("Disk %s is degraded on target node,"
5427
                                     " aborting failover." % dev.iv_name)
5428
    else:
5429
      feedback_fn("* not checking disk consistency as instance is not running")
5430

    
5431
    feedback_fn("* shutting down instance on source node")
5432
    logging.info("Shutting down instance %s on node %s",
5433
                 instance.name, source_node)
5434

    
5435
    result = self.rpc.call_instance_shutdown(source_node, instance,
5436
                                             self.op.shutdown_timeout)
5437
    msg = result.fail_msg
5438
    if msg:
5439
      if self.op.ignore_consistency:
5440
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5441
                             " Proceeding anyway. Please make sure node"
5442
                             " %s is down. Error details: %s",
5443
                             instance.name, source_node, source_node, msg)
5444
      else:
5445
        raise errors.OpExecError("Could not shutdown instance %s on"
5446
                                 " node %s: %s" %
5447
                                 (instance.name, source_node, msg))
5448

    
5449
    feedback_fn("* deactivating the instance's disks on source node")
5450
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5451
      raise errors.OpExecError("Can't shut down the instance's disks.")
5452

    
5453
    instance.primary_node = target_node
5454
    # distribute new instance config to the other nodes
5455
    self.cfg.Update(instance, feedback_fn)
5456

    
5457
    # Only start the instance if it's marked as up
5458
    if instance.admin_up:
5459
      feedback_fn("* activating the instance's disks on target node")
5460
      logging.info("Starting instance %s on node %s",
5461
                   instance.name, target_node)
5462

    
5463
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5464
                                           ignore_secondaries=True)
5465
      if not disks_ok:
5466
        _ShutdownInstanceDisks(self, instance)
5467
        raise errors.OpExecError("Can't activate the instance's disks")
5468

    
5469
      feedback_fn("* starting the instance on the target node")
5470
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5471
      msg = result.fail_msg
5472
      if msg:
5473
        _ShutdownInstanceDisks(self, instance)
5474
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5475
                                 (instance.name, target_node, msg))
5476

    
5477

    
5478
class LUMigrateInstance(LogicalUnit):
5479
  """Migrate an instance.
5480

5481
  This is migration without shutting down, compared to the failover,
5482
  which is done with shutdown.
5483

5484
  """
5485
  HPATH = "instance-migrate"
5486
  HTYPE = constants.HTYPE_INSTANCE
5487
  _OP_PARAMS = [
5488
    _PInstanceName,
5489
    ("live", True, _TBool),
5490
    ("cleanup", False, _TBool),
5491
    ]
5492

    
5493
  REQ_BGL = False
5494

    
5495
  def ExpandNames(self):
5496
    self._ExpandAndLockInstance()
5497

    
5498
    self.needed_locks[locking.LEVEL_NODE] = []
5499
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5500

    
5501
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5502
                                       self.op.live, self.op.cleanup)
5503
    self.tasklets = [self._migrater]
5504

    
5505
  def DeclareLocks(self, level):
5506
    if level == locking.LEVEL_NODE:
5507
      self._LockInstancesNodes()
5508

    
5509
  def BuildHooksEnv(self):
5510
    """Build hooks env.
5511

5512
    This runs on master, primary and secondary nodes of the instance.
5513

5514
    """
5515
    instance = self._migrater.instance
5516
    source_node = instance.primary_node
5517
    target_node = instance.secondary_nodes[0]
5518
    env = _BuildInstanceHookEnvByObject(self, instance)
5519
    env["MIGRATE_LIVE"] = self.op.live
5520
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5521
    env.update({
5522
        "OLD_PRIMARY": source_node,
5523
        "OLD_SECONDARY": target_node,
5524
        "NEW_PRIMARY": target_node,
5525
        "NEW_SECONDARY": source_node,
5526
        })
5527
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5528
    nl_post = list(nl)
5529
    nl_post.append(source_node)
5530
    return env, nl, nl_post
5531

    
5532

    
5533
class LUMoveInstance(LogicalUnit):
5534
  """Move an instance by data-copying.
5535

5536
  """
5537
  HPATH = "instance-move"
5538
  HTYPE = constants.HTYPE_INSTANCE
5539
  _OP_PARAMS = [
5540
    _PInstanceName,
5541
    ("target_node", _NoDefault, _TNonEmptyString),
5542
    _PShutdownTimeout,
5543
    ]
5544
  REQ_BGL = False
5545

    
5546
  def ExpandNames(self):
5547
    self._ExpandAndLockInstance()
5548
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5549
    self.op.target_node = target_node
5550
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5551
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5552

    
5553
  def DeclareLocks(self, level):
5554
    if level == locking.LEVEL_NODE:
5555
      self._LockInstancesNodes(primary_only=True)
5556

    
5557
  def BuildHooksEnv(self):
5558
    """Build hooks env.
5559

5560
    This runs on master, primary and secondary nodes of the instance.
5561

5562
    """
5563
    env = {
5564
      "TARGET_NODE": self.op.target_node,
5565
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5566
      }
5567
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5568
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5569
                                       self.op.target_node]
5570
    return env, nl, nl
5571

    
5572
  def CheckPrereq(self):
5573
    """Check prerequisites.
5574

5575
    This checks that the instance is in the cluster.
5576

5577
    """
5578
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5579
    assert self.instance is not None, \
5580
      "Cannot retrieve locked instance %s" % self.op.instance_name
5581

    
5582
    node = self.cfg.GetNodeInfo(self.op.target_node)
5583
    assert node is not None, \
5584
      "Cannot retrieve locked node %s" % self.op.target_node
5585

    
5586
    self.target_node = target_node = node.name
5587

    
5588
    if target_node == instance.primary_node:
5589
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5590
                                 (instance.name, target_node),
5591
                                 errors.ECODE_STATE)
5592

    
5593
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5594

    
5595
    for idx, dsk in enumerate(instance.disks):
5596
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5597
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5598
                                   " cannot copy" % idx, errors.ECODE_STATE)
5599

    
5600
    _CheckNodeOnline(self, target_node)
5601
    _CheckNodeNotDrained(self, target_node)
5602

    
5603
    if instance.admin_up:
5604
      # check memory requirements on the secondary node
5605
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5606
                           instance.name, bep[constants.BE_MEMORY],
5607
                           instance.hypervisor)
5608
    else:
5609
      self.LogInfo("Not checking memory on the secondary node as"
5610
                   " instance will not be started")
5611

    
5612
    # check bridge existance
5613
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5614

    
5615
  def Exec(self, feedback_fn):
5616
    """Move an instance.
5617

5618
    The move is done by shutting it down on its present node, copying
5619
    the data over (slow) and starting it on the new node.
5620

5621
    """
5622
    instance = self.instance
5623

    
5624
    source_node = instance.primary_node
5625
    target_node = self.target_node
5626

    
5627
    self.LogInfo("Shutting down instance %s on source node %s",
5628
                 instance.name, source_node)
5629

    
5630
    result = self.rpc.call_instance_shutdown(source_node, instance,
5631
                                             self.op.shutdown_timeout)
5632
    msg = result.fail_msg
5633
    if msg:
5634
      if self.op.ignore_consistency:
5635
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5636
                             " Proceeding anyway. Please make sure node"
5637
                             " %s is down. Error details: %s",
5638
                             instance.name, source_node, source_node, msg)
5639
      else:
5640
        raise errors.OpExecError("Could not shutdown instance %s on"
5641
                                 " node %s: %s" %
5642
                                 (instance.name, source_node, msg))
5643

    
5644
    # create the target disks
5645
    try:
5646
      _CreateDisks(self, instance, target_node=target_node)
5647
    except errors.OpExecError:
5648
      self.LogWarning("Device creation failed, reverting...")
5649
      try:
5650
        _RemoveDisks(self, instance, target_node=target_node)
5651
      finally:
5652
        self.cfg.ReleaseDRBDMinors(instance.name)
5653
        raise
5654

    
5655
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5656

    
5657
    errs = []
5658
    # activate, get path, copy the data over
5659
    for idx, disk in enumerate(instance.disks):
5660
      self.LogInfo("Copying data for disk %d", idx)
5661
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5662
                                               instance.name, True)
5663
      if result.fail_msg:
5664
        self.LogWarning("Can't assemble newly created disk %d: %s",
5665
                        idx, result.fail_msg)
5666
        errs.append(result.fail_msg)
5667
        break
5668
      dev_path = result.payload
5669
      result = self.rpc.call_blockdev_export(source_node, disk,
5670
                                             target_node, dev_path,
5671
                                             cluster_name)
5672
      if result.fail_msg:
5673
        self.LogWarning("Can't copy data over for disk %d: %s",
5674
                        idx, result.fail_msg)
5675
        errs.append(result.fail_msg)
5676
        break
5677

    
5678
    if errs:
5679
      self.LogWarning("Some disks failed to copy, aborting")
5680
      try:
5681
        _RemoveDisks(self, instance, target_node=target_node)
5682
      finally:
5683
        self.cfg.ReleaseDRBDMinors(instance.name)
5684
        raise errors.OpExecError("Errors during disk copy: %s" %
5685
                                 (",".join(errs),))
5686

    
5687
    instance.primary_node = target_node
5688
    self.cfg.Update(instance, feedback_fn)
5689

    
5690
    self.LogInfo("Removing the disks on the original node")
5691
    _RemoveDisks(self, instance, target_node=source_node)
5692

    
5693
    # Only start the instance if it's marked as up
5694
    if instance.admin_up:
5695
      self.LogInfo("Starting instance %s on node %s",
5696
                   instance.name, target_node)
5697

    
5698
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5699
                                           ignore_secondaries=True)
5700
      if not disks_ok:
5701
        _ShutdownInstanceDisks(self, instance)
5702
        raise errors.OpExecError("Can't activate the instance's disks")
5703

    
5704
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5705
      msg = result.fail_msg
5706
      if msg:
5707
        _ShutdownInstanceDisks(self, instance)
5708
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5709
                                 (instance.name, target_node, msg))
5710

    
5711

    
5712
class LUMigrateNode(LogicalUnit):
5713
  """Migrate all instances from a node.
5714

5715
  """
5716
  HPATH = "node-migrate"
5717
  HTYPE = constants.HTYPE_NODE
5718
  _OP_PARAMS = [
5719
    _PNodeName,
5720
    ("live", False, _TBool),
5721
    ]
5722
  REQ_BGL = False
5723

    
5724
  def ExpandNames(self):
5725
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5726

    
5727
    self.needed_locks = {
5728
      locking.LEVEL_NODE: [self.op.node_name],
5729
      }
5730

    
5731
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5732

    
5733
    # Create tasklets for migrating instances for all instances on this node
5734
    names = []
5735
    tasklets = []
5736

    
5737
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5738
      logging.debug("Migrating instance %s", inst.name)
5739
      names.append(inst.name)
5740

    
5741
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5742

    
5743
    self.tasklets = tasklets
5744

    
5745
    # Declare instance locks
5746
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5747

    
5748
  def DeclareLocks(self, level):
5749
    if level == locking.LEVEL_NODE:
5750
      self._LockInstancesNodes()
5751

    
5752
  def BuildHooksEnv(self):
5753
    """Build hooks env.
5754

5755
    This runs on the master, the primary and all the secondaries.
5756

5757
    """
5758
    env = {
5759
      "NODE_NAME": self.op.node_name,
5760
      }
5761

    
5762
    nl = [self.cfg.GetMasterNode()]
5763

    
5764
    return (env, nl, nl)
5765

    
5766

    
5767
class TLMigrateInstance(Tasklet):
5768
  def __init__(self, lu, instance_name, live, cleanup):
5769
    """Initializes this class.
5770

5771
    """
5772
    Tasklet.__init__(self, lu)
5773

    
5774
    # Parameters
5775
    self.instance_name = instance_name
5776
    self.live = live
5777
    self.cleanup = cleanup
5778

    
5779
  def CheckPrereq(self):
5780
    """Check prerequisites.
5781

5782
    This checks that the instance is in the cluster.
5783

5784
    """
5785
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5786
    instance = self.cfg.GetInstanceInfo(instance_name)
5787
    assert instance is not None
5788

    
5789
    if instance.disk_template != constants.DT_DRBD8:
5790
      raise errors.OpPrereqError("Instance's disk layout is not"
5791
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5792

    
5793
    secondary_nodes = instance.secondary_nodes
5794
    if not secondary_nodes:
5795
      raise errors.ConfigurationError("No secondary node but using"
5796
                                      " drbd8 disk template")
5797

    
5798
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5799

    
5800
    target_node = secondary_nodes[0]
5801
    # check memory requirements on the secondary node
5802
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5803
                         instance.name, i_be[constants.BE_MEMORY],
5804
                         instance.hypervisor)
5805

    
5806
    # check bridge existance
5807
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5808

    
5809
    if not self.cleanup:
5810
      _CheckNodeNotDrained(self.lu, target_node)
5811
      result = self.rpc.call_instance_migratable(instance.primary_node,
5812
                                                 instance)
5813
      result.Raise("Can't migrate, please use failover",
5814
                   prereq=True, ecode=errors.ECODE_STATE)
5815

    
5816
    self.instance = instance
5817

    
5818
  def _WaitUntilSync(self):
5819
    """Poll with custom rpc for disk sync.
5820

5821
    This uses our own step-based rpc call.
5822

5823
    """
5824
    self.feedback_fn("* wait until resync is done")
5825
    all_done = False
5826
    while not all_done:
5827
      all_done = True
5828
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5829
                                            self.nodes_ip,
5830
                                            self.instance.disks)
5831
      min_percent = 100
5832
      for node, nres in result.items():
5833
        nres.Raise("Cannot resync disks on node %s" % node)
5834
        node_done, node_percent = nres.payload
5835
        all_done = all_done and node_done
5836
        if node_percent is not None:
5837
          min_percent = min(min_percent, node_percent)
5838
      if not all_done:
5839
        if min_percent < 100:
5840
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5841
        time.sleep(2)
5842

    
5843
  def _EnsureSecondary(self, node):
5844
    """Demote a node to secondary.
5845

5846
    """
5847
    self.feedback_fn("* switching node %s to secondary mode" % node)
5848

    
5849
    for dev in self.instance.disks:
5850
      self.cfg.SetDiskID(dev, node)
5851

    
5852
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5853
                                          self.instance.disks)
5854
    result.Raise("Cannot change disk to secondary on node %s" % node)
5855

    
5856
  def _GoStandalone(self):
5857
    """Disconnect from the network.
5858

5859
    """
5860
    self.feedback_fn("* changing into standalone mode")
5861
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5862
                                               self.instance.disks)
5863
    for node, nres in result.items():
5864
      nres.Raise("Cannot disconnect disks node %s" % node)
5865

    
5866
  def _GoReconnect(self, multimaster):
5867
    """Reconnect to the network.
5868

5869
    """
5870
    if multimaster:
5871
      msg = "dual-master"
5872
    else:
5873
      msg = "single-master"
5874
    self.feedback_fn("* changing disks into %s mode" % msg)
5875
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5876
                                           self.instance.disks,
5877
                                           self.instance.name, multimaster)
5878
    for node, nres in result.items():
5879
      nres.Raise("Cannot change disks config on node %s" % node)
5880

    
5881
  def _ExecCleanup(self):
5882
    """Try to cleanup after a failed migration.
5883

5884
    The cleanup is done by:
5885
      - check that the instance is running only on one node
5886
        (and update the config if needed)
5887
      - change disks on its secondary node to secondary
5888
      - wait until disks are fully synchronized
5889
      - disconnect from the network
5890
      - change disks into single-master mode
5891
      - wait again until disks are fully synchronized
5892

5893
    """
5894
    instance = self.instance
5895
    target_node = self.target_node
5896
    source_node = self.source_node
5897

    
5898
    # check running on only one node
5899
    self.feedback_fn("* checking where the instance actually runs"
5900
                     " (if this hangs, the hypervisor might be in"
5901
                     " a bad state)")
5902
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5903
    for node, result in ins_l.items():
5904
      result.Raise("Can't contact node %s" % node)
5905

    
5906
    runningon_source = instance.name in ins_l[source_node].payload
5907
    runningon_target = instance.name in ins_l[target_node].payload
5908

    
5909
    if runningon_source and runningon_target:
5910
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5911
                               " or the hypervisor is confused. You will have"
5912
                               " to ensure manually that it runs only on one"
5913
                               " and restart this operation.")
5914

    
5915
    if not (runningon_source or runningon_target):
5916
      raise errors.OpExecError("Instance does not seem to be running at all."
5917
                               " In this case, it's safer to repair by"
5918
                               " running 'gnt-instance stop' to ensure disk"
5919
                               " shutdown, and then restarting it.")
5920

    
5921
    if runningon_target:
5922
      # the migration has actually succeeded, we need to update the config
5923
      self.feedback_fn("* instance running on secondary node (%s),"
5924
                       " updating config" % target_node)
5925
      instance.primary_node = target_node
5926
      self.cfg.Update(instance, self.feedback_fn)
5927
      demoted_node = source_node
5928
    else:
5929
      self.feedback_fn("* instance confirmed to be running on its"
5930
                       " primary node (%s)" % source_node)
5931
      demoted_node = target_node
5932

    
5933
    self._EnsureSecondary(demoted_node)
5934
    try:
5935
      self._WaitUntilSync()
5936
    except errors.OpExecError:
5937
      # we ignore here errors, since if the device is standalone, it
5938
      # won't be able to sync
5939
      pass
5940
    self._GoStandalone()
5941
    self._GoReconnect(False)
5942
    self._WaitUntilSync()
5943

    
5944
    self.feedback_fn("* done")
5945

    
5946
  def _RevertDiskStatus(self):
5947
    """Try to revert the disk status after a failed migration.
5948

5949
    """
5950
    target_node = self.target_node
5951
    try:
5952
      self._EnsureSecondary(target_node)
5953
      self._GoStandalone()
5954
      self._GoReconnect(False)
5955
      self._WaitUntilSync()
5956
    except errors.OpExecError, err:
5957
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5958
                         " drives: error '%s'\n"
5959
                         "Please look and recover the instance status" %
5960
                         str(err))
5961

    
5962
  def _AbortMigration(self):
5963
    """Call the hypervisor code to abort a started migration.
5964

5965
    """
5966
    instance = self.instance
5967
    target_node = self.target_node
5968
    migration_info = self.migration_info
5969

    
5970
    abort_result = self.rpc.call_finalize_migration(target_node,
5971
                                                    instance,
5972
                                                    migration_info,
5973
                                                    False)
5974
    abort_msg = abort_result.fail_msg
5975
    if abort_msg:
5976
      logging.error("Aborting migration failed on target node %s: %s",
5977
                    target_node, abort_msg)
5978
      # Don't raise an exception here, as we stil have to try to revert the
5979
      # disk status, even if this step failed.
5980

    
5981
  def _ExecMigration(self):
5982
    """Migrate an instance.
5983

5984
    The migrate is done by:
5985
      - change the disks into dual-master mode
5986
      - wait until disks are fully synchronized again
5987
      - migrate the instance
5988
      - change disks on the new secondary node (the old primary) to secondary
5989
      - wait until disks are fully synchronized
5990
      - change disks into single-master mode
5991

5992
    """
5993
    instance = self.instance
5994
    target_node = self.target_node
5995
    source_node = self.source_node
5996

    
5997
    self.feedback_fn("* checking disk consistency between source and target")
5998
    for dev in instance.disks:
5999
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6000
        raise errors.OpExecError("Disk %s is degraded or not fully"
6001
                                 " synchronized on target node,"
6002
                                 " aborting migrate." % dev.iv_name)
6003

    
6004
    # First get the migration information from the remote node
6005
    result = self.rpc.call_migration_info(source_node, instance)
6006
    msg = result.fail_msg
6007
    if msg:
6008
      log_err = ("Failed fetching source migration information from %s: %s" %
6009
                 (source_node, msg))
6010
      logging.error(log_err)
6011
      raise errors.OpExecError(log_err)
6012

    
6013
    self.migration_info = migration_info = result.payload
6014

    
6015
    # Then switch the disks to master/master mode
6016
    self._EnsureSecondary(target_node)
6017
    self._GoStandalone()
6018
    self._GoReconnect(True)
6019
    self._WaitUntilSync()
6020

    
6021
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6022
    result = self.rpc.call_accept_instance(target_node,
6023
                                           instance,
6024
                                           migration_info,
6025
                                           self.nodes_ip[target_node])
6026

    
6027
    msg = result.fail_msg
6028
    if msg:
6029
      logging.error("Instance pre-migration failed, trying to revert"
6030
                    " disk status: %s", msg)
6031
      self.feedback_fn("Pre-migration failed, aborting")
6032
      self._AbortMigration()
6033
      self._RevertDiskStatus()
6034
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6035
                               (instance.name, msg))
6036

    
6037
    self.feedback_fn("* migrating instance to %s" % target_node)
6038
    time.sleep(10)
6039
    result = self.rpc.call_instance_migrate(source_node, instance,
6040
                                            self.nodes_ip[target_node],
6041
                                            self.live)
6042
    msg = result.fail_msg
6043
    if msg:
6044
      logging.error("Instance migration failed, trying to revert"
6045
                    " disk status: %s", msg)
6046
      self.feedback_fn("Migration failed, aborting")
6047
      self._AbortMigration()
6048
      self._RevertDiskStatus()
6049
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6050
                               (instance.name, msg))
6051
    time.sleep(10)
6052

    
6053
    instance.primary_node = target_node
6054
    # distribute new instance config to the other nodes
6055
    self.cfg.Update(instance, self.feedback_fn)
6056

    
6057
    result = self.rpc.call_finalize_migration(target_node,
6058
                                              instance,
6059
                                              migration_info,
6060
                                              True)
6061
    msg = result.fail_msg
6062
    if msg:
6063
      logging.error("Instance migration succeeded, but finalization failed:"
6064
                    " %s", msg)
6065
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6066
                               msg)
6067

    
6068
    self._EnsureSecondary(source_node)
6069
    self._WaitUntilSync()
6070
    self._GoStandalone()
6071
    self._GoReconnect(False)
6072
    self._WaitUntilSync()
6073

    
6074
    self.feedback_fn("* done")
6075

    
6076
  def Exec(self, feedback_fn):
6077
    """Perform the migration.
6078

6079
    """
6080
    feedback_fn("Migrating instance %s" % self.instance.name)
6081

    
6082
    self.feedback_fn = feedback_fn
6083

    
6084
    self.source_node = self.instance.primary_node
6085
    self.target_node = self.instance.secondary_nodes[0]
6086
    self.all_nodes = [self.source_node, self.target_node]
6087
    self.nodes_ip = {
6088
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6089
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6090
      }
6091

    
6092
    if self.cleanup:
6093
      return self._ExecCleanup()
6094
    else:
6095
      return self._ExecMigration()
6096

    
6097

    
6098
def _CreateBlockDev(lu, node, instance, device, force_create,
6099
                    info, force_open):
6100
  """Create a tree of block devices on a given node.
6101

6102
  If this device type has to be created on secondaries, create it and
6103
  all its children.
6104

6105
  If not, just recurse to children keeping the same 'force' value.
6106

6107
  @param lu: the lu on whose behalf we execute
6108
  @param node: the node on which to create the device
6109
  @type instance: L{objects.Instance}
6110
  @param instance: the instance which owns the device
6111
  @type device: L{objects.Disk}
6112
  @param device: the device to create
6113
  @type force_create: boolean
6114
  @param force_create: whether to force creation of this device; this
6115
      will be change to True whenever we find a device which has
6116
      CreateOnSecondary() attribute
6117
  @param info: the extra 'metadata' we should attach to the device
6118
      (this will be represented as a LVM tag)
6119
  @type force_open: boolean
6120
  @param force_open: this parameter will be passes to the
6121
      L{backend.BlockdevCreate} function where it specifies
6122
      whether we run on primary or not, and it affects both
6123
      the child assembly and the device own Open() execution
6124

6125
  """
6126
  if device.CreateOnSecondary():
6127
    force_create = True
6128

    
6129
  if device.children:
6130
    for child in device.children:
6131
      _CreateBlockDev(lu, node, instance, child, force_create,
6132
                      info, force_open)
6133

    
6134
  if not force_create:
6135
    return
6136

    
6137
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6138

    
6139

    
6140
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6141
  """Create a single block device on a given node.
6142

6143
  This will not recurse over children of the device, so they must be
6144
  created in advance.
6145

6146
  @param lu: the lu on whose behalf we execute
6147
  @param node: the node on which to create the device
6148
  @type instance: L{objects.Instance}
6149
  @param instance: the instance which owns the device
6150
  @type device: L{objects.Disk}
6151
  @param device: the device to create
6152
  @param info: the extra 'metadata' we should attach to the device
6153
      (this will be represented as a LVM tag)
6154
  @type force_open: boolean
6155
  @param force_open: this parameter will be passes to the
6156
      L{backend.BlockdevCreate} function where it specifies
6157
      whether we run on primary or not, and it affects both
6158
      the child assembly and the device own Open() execution
6159

6160
  """
6161
  lu.cfg.SetDiskID(device, node)
6162
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6163
                                       instance.name, force_open, info)
6164
  result.Raise("Can't create block device %s on"
6165
               " node %s for instance %s" % (device, node, instance.name))
6166
  if device.physical_id is None:
6167
    device.physical_id = result.payload
6168

    
6169

    
6170
def _GenerateUniqueNames(lu, exts):
6171
  """Generate a suitable LV name.
6172

6173
  This will generate a logical volume name for the given instance.
6174

6175
  """
6176
  results = []
6177
  for val in exts:
6178
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6179
    results.append("%s%s" % (new_id, val))
6180
  return results
6181

    
6182

    
6183
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6184
                         p_minor, s_minor):
6185
  """Generate a drbd8 device complete with its children.
6186

6187
  """
6188
  port = lu.cfg.AllocatePort()
6189
  vgname = lu.cfg.GetVGName()
6190
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6191
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6192
                          logical_id=(vgname, names[0]))
6193
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6194
                          logical_id=(vgname, names[1]))
6195
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6196
                          logical_id=(primary, secondary, port,
6197
                                      p_minor, s_minor,
6198
                                      shared_secret),
6199
                          children=[dev_data, dev_meta],
6200
                          iv_name=iv_name)
6201
  return drbd_dev
6202

    
6203

    
6204
def _GenerateDiskTemplate(lu, template_name,
6205
                          instance_name, primary_node,
6206
                          secondary_nodes, disk_info,
6207
                          file_storage_dir, file_driver,
6208
                          base_index):
6209
  """Generate the entire disk layout for a given template type.
6210

6211
  """
6212
  #TODO: compute space requirements
6213

    
6214
  vgname = lu.cfg.GetVGName()
6215
  disk_count = len(disk_info)
6216
  disks = []
6217
  if template_name == constants.DT_DISKLESS:
6218
    pass
6219
  elif template_name == constants.DT_PLAIN:
6220
    if len(secondary_nodes) != 0:
6221
      raise errors.ProgrammerError("Wrong template configuration")
6222

    
6223
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6224
                                      for i in range(disk_count)])
6225
    for idx, disk in enumerate(disk_info):
6226
      disk_index = idx + base_index
6227
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6228
                              logical_id=(vgname, names[idx]),
6229
                              iv_name="disk/%d" % disk_index,
6230
                              mode=disk["mode"])
6231
      disks.append(disk_dev)
6232
  elif template_name == constants.DT_DRBD8:
6233
    if len(secondary_nodes) != 1:
6234
      raise errors.ProgrammerError("Wrong template configuration")
6235
    remote_node = secondary_nodes[0]
6236
    minors = lu.cfg.AllocateDRBDMinor(
6237
      [primary_node, remote_node] * len(disk_info), instance_name)
6238

    
6239
    names = []
6240
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6241
                                               for i in range(disk_count)]):
6242
      names.append(lv_prefix + "_data")
6243
      names.append(lv_prefix + "_meta")
6244
    for idx, disk in enumerate(disk_info):
6245
      disk_index = idx + base_index
6246
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6247
                                      disk["size"], names[idx*2:idx*2+2],
6248
                                      "disk/%d" % disk_index,
6249
                                      minors[idx*2], minors[idx*2+1])
6250
      disk_dev.mode = disk["mode"]
6251
      disks.append(disk_dev)
6252
  elif template_name == constants.DT_FILE:
6253
    if len(secondary_nodes) != 0:
6254
      raise errors.ProgrammerError("Wrong template configuration")
6255

    
6256
    _RequireFileStorage()
6257

    
6258
    for idx, disk in enumerate(disk_info):
6259
      disk_index = idx + base_index
6260
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6261
                              iv_name="disk/%d" % disk_index,
6262
                              logical_id=(file_driver,
6263
                                          "%s/disk%d" % (file_storage_dir,
6264
                                                         disk_index)),
6265
                              mode=disk["mode"])
6266
      disks.append(disk_dev)
6267
  else:
6268
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6269
  return disks
6270

    
6271

    
6272
def _GetInstanceInfoText(instance):
6273
  """Compute that text that should be added to the disk's metadata.
6274

6275
  """
6276
  return "originstname+%s" % instance.name
6277

    
6278

    
6279
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6280
  """Create all disks for an instance.
6281

6282
  This abstracts away some work from AddInstance.
6283

6284
  @type lu: L{LogicalUnit}
6285
  @param lu: the logical unit on whose behalf we execute
6286
  @type instance: L{objects.Instance}
6287
  @param instance: the instance whose disks we should create
6288
  @type to_skip: list
6289
  @param to_skip: list of indices to skip
6290
  @type target_node: string
6291
  @param target_node: if passed, overrides the target node for creation
6292
  @rtype: boolean
6293
  @return: the success of the creation
6294

6295
  """
6296
  info = _GetInstanceInfoText(instance)
6297
  if target_node is None:
6298
    pnode = instance.primary_node
6299
    all_nodes = instance.all_nodes
6300
  else:
6301
    pnode = target_node
6302
    all_nodes = [pnode]
6303

    
6304
  if instance.disk_template == constants.DT_FILE:
6305
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6306
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6307

    
6308
    result.Raise("Failed to create directory '%s' on"
6309
                 " node %s" % (file_storage_dir, pnode))
6310

    
6311
  # Note: this needs to be kept in sync with adding of disks in
6312
  # LUSetInstanceParams
6313
  for idx, device in enumerate(instance.disks):
6314
    if to_skip and idx in to_skip:
6315
      continue
6316
    logging.info("Creating volume %s for instance %s",
6317
                 device.iv_name, instance.name)
6318
    #HARDCODE
6319
    for node in all_nodes:
6320
      f_create = node == pnode
6321
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6322

    
6323

    
6324
def _RemoveDisks(lu, instance, target_node=None):
6325
  """Remove all disks for an instance.
6326

6327
  This abstracts away some work from `AddInstance()` and
6328
  `RemoveInstance()`. Note that in case some of the devices couldn't
6329
  be removed, the removal will continue with the other ones (compare
6330
  with `_CreateDisks()`).
6331

6332
  @type lu: L{LogicalUnit}
6333
  @param lu: the logical unit on whose behalf we execute
6334
  @type instance: L{objects.Instance}
6335
  @param instance: the instance whose disks we should remove
6336
  @type target_node: string
6337
  @param target_node: used to override the node on which to remove the disks
6338
  @rtype: boolean
6339
  @return: the success of the removal
6340

6341
  """
6342
  logging.info("Removing block devices for instance %s", instance.name)
6343

    
6344
  all_result = True
6345
  for device in instance.disks:
6346
    if target_node:
6347
      edata = [(target_node, device)]
6348
    else:
6349
      edata = device.ComputeNodeTree(instance.primary_node)
6350
    for node, disk in edata:
6351
      lu.cfg.SetDiskID(disk, node)
6352
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6353
      if msg:
6354
        lu.LogWarning("Could not remove block device %s on node %s,"
6355
                      " continuing anyway: %s", device.iv_name, node, msg)
6356
        all_result = False
6357

    
6358
  if instance.disk_template == constants.DT_FILE:
6359
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6360
    if target_node:
6361
      tgt = target_node
6362
    else:
6363
      tgt = instance.primary_node
6364
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6365
    if result.fail_msg:
6366
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6367
                    file_storage_dir, instance.primary_node, result.fail_msg)
6368
      all_result = False
6369

    
6370
  return all_result
6371

    
6372

    
6373
def _ComputeDiskSize(disk_template, disks):
6374
  """Compute disk size requirements in the volume group
6375

6376
  """
6377
  # Required free disk space as a function of disk and swap space
6378
  req_size_dict = {
6379
    constants.DT_DISKLESS: None,
6380
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6381
    # 128 MB are added for drbd metadata for each disk
6382
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6383
    constants.DT_FILE: None,
6384
  }
6385

    
6386
  if disk_template not in req_size_dict:
6387
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6388
                                 " is unknown" %  disk_template)
6389

    
6390
  return req_size_dict[disk_template]
6391

    
6392

    
6393
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6394
  """Hypervisor parameter validation.
6395

6396
  This function abstract the hypervisor parameter validation to be
6397
  used in both instance create and instance modify.
6398

6399
  @type lu: L{LogicalUnit}
6400
  @param lu: the logical unit for which we check
6401
  @type nodenames: list
6402
  @param nodenames: the list of nodes on which we should check
6403
  @type hvname: string
6404
  @param hvname: the name of the hypervisor we should use
6405
  @type hvparams: dict
6406
  @param hvparams: the parameters which we need to check
6407
  @raise errors.OpPrereqError: if the parameters are not valid
6408

6409
  """
6410
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6411
                                                  hvname,
6412
                                                  hvparams)
6413
  for node in nodenames:
6414
    info = hvinfo[node]
6415
    if info.offline:
6416
      continue
6417
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6418

    
6419

    
6420
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6421
  """OS parameters validation.
6422

6423
  @type lu: L{LogicalUnit}
6424
  @param lu: the logical unit for which we check
6425
  @type required: boolean
6426
  @param required: whether the validation should fail if the OS is not
6427
      found
6428
  @type nodenames: list
6429
  @param nodenames: the list of nodes on which we should check
6430
  @type osname: string
6431
  @param osname: the name of the hypervisor we should use
6432
  @type osparams: dict
6433
  @param osparams: the parameters which we need to check
6434
  @raise errors.OpPrereqError: if the parameters are not valid
6435

6436
  """
6437
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6438
                                   [constants.OS_VALIDATE_PARAMETERS],
6439
                                   osparams)
6440
  for node, nres in result.items():
6441
    # we don't check for offline cases since this should be run only
6442
    # against the master node and/or an instance's nodes
6443
    nres.Raise("OS Parameters validation failed on node %s" % node)
6444
    if not nres.payload:
6445
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6446
                 osname, node)
6447

    
6448

    
6449
class LUCreateInstance(LogicalUnit):
6450
  """Create an instance.
6451

6452
  """
6453
  HPATH = "instance-add"
6454
  HTYPE = constants.HTYPE_INSTANCE
6455
  _OP_PARAMS = [
6456
    _PInstanceName,
6457
    ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6458
    ("start", True, _TBool),
6459
    ("wait_for_sync", True, _TBool),
6460
    ("ip_check", True, _TBool),
6461
    ("name_check", True, _TBool),
6462
    ("disks", _NoDefault, _TListOf(_TDict)),
6463
    ("nics", _NoDefault, _TListOf(_TDict)),
6464
    ("hvparams", _EmptyDict, _TDict),
6465
    ("beparams", _EmptyDict, _TDict),
6466
    ("osparams", _EmptyDict, _TDict),
6467
    ("no_install", None, _TMaybeBool),
6468
    ("os_type", None, _TMaybeString),
6469
    ("force_variant", False, _TBool),
6470
    ("source_handshake", None, _TOr(_TList, _TNone)),
6471
    ("source_x509_ca", None, _TOr(_TList, _TNone)),
6472
    ("source_instance_name", None, _TMaybeString),
6473
    ("src_node", None, _TMaybeString),
6474
    ("src_path", None, _TMaybeString),
6475
    ("pnode", None, _TMaybeString),
6476
    ("snode", None, _TMaybeString),
6477
    ("iallocator", None, _TMaybeString),
6478
    ("hypervisor", None, _TMaybeString),
6479
    ("disk_template", _NoDefault, _CheckDiskTemplate),
6480
    ("identify_defaults", False, _TBool),
6481
    ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6482
    ("file_storage_dir", None, _TMaybeString),
6483
    ("dry_run", False, _TBool),
6484
    ]
6485
  REQ_BGL = False
6486

    
6487
  def CheckArguments(self):
6488
    """Check arguments.
6489

6490
    """
6491
    # do not require name_check to ease forward/backward compatibility
6492
    # for tools
6493
    if self.op.no_install and self.op.start:
6494
      self.LogInfo("No-installation mode selected, disabling startup")
6495
      self.op.start = False
6496
    # validate/normalize the instance name
6497
    self.op.instance_name = \
6498
      netutils.HostInfo.NormalizeName(self.op.instance_name)
6499

    
6500
    if self.op.ip_check and not self.op.name_check:
6501
      # TODO: make the ip check more flexible and not depend on the name check
6502
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6503
                                 errors.ECODE_INVAL)
6504

    
6505
    # check nics' parameter names
6506
    for nic in self.op.nics:
6507
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6508

    
6509
    # check disks. parameter names and consistent adopt/no-adopt strategy
6510
    has_adopt = has_no_adopt = False
6511
    for disk in self.op.disks:
6512
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6513
      if "adopt" in disk:
6514
        has_adopt = True
6515
      else:
6516
        has_no_adopt = True
6517
    if has_adopt and has_no_adopt:
6518
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6519
                                 errors.ECODE_INVAL)
6520
    if has_adopt:
6521
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6522
        raise errors.OpPrereqError("Disk adoption is not supported for the"
6523
                                   " '%s' disk template" %
6524
                                   self.op.disk_template,
6525
                                   errors.ECODE_INVAL)
6526
      if self.op.iallocator is not None:
6527
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6528
                                   " iallocator script", errors.ECODE_INVAL)
6529
      if self.op.mode == constants.INSTANCE_IMPORT:
6530
        raise errors.OpPrereqError("Disk adoption not allowed for"
6531
                                   " instance import", errors.ECODE_INVAL)
6532

    
6533
    self.adopt_disks = has_adopt
6534

    
6535
    # instance name verification
6536
    if self.op.name_check:
6537
      self.hostname1 = netutils.GetHostInfo(self.op.instance_name)
6538
      self.op.instance_name = self.hostname1.name
6539
      # used in CheckPrereq for ip ping check
6540
      self.check_ip = self.hostname1.ip
6541
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6542
      raise errors.OpPrereqError("Remote imports require names to be checked" %
6543
                                 errors.ECODE_INVAL)
6544
    else:
6545
      self.check_ip = None
6546

    
6547
    # file storage checks
6548
    if (self.op.file_driver and
6549
        not self.op.file_driver in constants.FILE_DRIVER):
6550
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6551
                                 self.op.file_driver, errors.ECODE_INVAL)
6552

    
6553
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6554
      raise errors.OpPrereqError("File storage directory path not absolute",
6555
                                 errors.ECODE_INVAL)
6556

    
6557
    ### Node/iallocator related checks
6558
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6559

    
6560
    self._cds = _GetClusterDomainSecret()
6561

    
6562
    if self.op.mode == constants.INSTANCE_IMPORT:
6563
      # On import force_variant must be True, because if we forced it at
6564
      # initial install, our only chance when importing it back is that it
6565
      # works again!
6566
      self.op.force_variant = True
6567

    
6568
      if self.op.no_install:
6569
        self.LogInfo("No-installation mode has no effect during import")
6570

    
6571
    elif self.op.mode == constants.INSTANCE_CREATE:
6572
      if self.op.os_type is None:
6573
        raise errors.OpPrereqError("No guest OS specified",
6574
                                   errors.ECODE_INVAL)
6575
      if self.op.disk_template is None:
6576
        raise errors.OpPrereqError("No disk template specified",
6577
                                   errors.ECODE_INVAL)
6578

    
6579
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6580
      # Check handshake to ensure both clusters have the same domain secret
6581
      src_handshake = self.op.source_handshake
6582
      if not src_handshake:
6583
        raise errors.OpPrereqError("Missing source handshake",
6584
                                   errors.ECODE_INVAL)
6585

    
6586
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6587
                                                           src_handshake)
6588
      if errmsg:
6589
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6590
                                   errors.ECODE_INVAL)
6591

    
6592
      # Load and check source CA
6593
      self.source_x509_ca_pem = self.op.source_x509_ca
6594
      if not self.source_x509_ca_pem:
6595
        raise errors.OpPrereqError("Missing source X509 CA",
6596
                                   errors.ECODE_INVAL)
6597

    
6598
      try:
6599
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6600
                                                    self._cds)
6601
      except OpenSSL.crypto.Error, err:
6602
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6603
                                   (err, ), errors.ECODE_INVAL)
6604

    
6605
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6606
      if errcode is not None:
6607
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6608
                                   errors.ECODE_INVAL)
6609

    
6610
      self.source_x509_ca = cert
6611

    
6612
      src_instance_name = self.op.source_instance_name
6613
      if not src_instance_name:
6614
        raise errors.OpPrereqError("Missing source instance name",
6615
                                   errors.ECODE_INVAL)
6616

    
6617
      norm_name = netutils.HostInfo.NormalizeName(src_instance_name)
6618
      self.source_instance_name = netutils.GetHostInfo(norm_name).name
6619

    
6620
    else:
6621
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6622
                                 self.op.mode, errors.ECODE_INVAL)
6623

    
6624
  def ExpandNames(self):
6625
    """ExpandNames for CreateInstance.
6626

6627
    Figure out the right locks for instance creation.
6628

6629
    """
6630
    self.needed_locks = {}
6631

    
6632
    instance_name = self.op.instance_name
6633
    # this is just a preventive check, but someone might still add this
6634
    # instance in the meantime, and creation will fail at lock-add time
6635
    if instance_name in self.cfg.GetInstanceList():
6636
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6637
                                 instance_name, errors.ECODE_EXISTS)
6638

    
6639
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6640

    
6641
    if self.op.iallocator:
6642
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6643
    else:
6644
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6645
      nodelist = [self.op.pnode]
6646
      if self.op.snode is not None:
6647
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6648
        nodelist.append(self.op.snode)
6649
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6650

    
6651
    # in case of import lock the source node too
6652
    if self.op.mode == constants.INSTANCE_IMPORT:
6653
      src_node = self.op.src_node
6654
      src_path = self.op.src_path
6655

    
6656
      if src_path is None:
6657
        self.op.src_path = src_path = self.op.instance_name
6658

    
6659
      if src_node is None:
6660
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6661
        self.op.src_node = None
6662
        if os.path.isabs(src_path):
6663
          raise errors.OpPrereqError("Importing an instance from an absolute"
6664
                                     " path requires a source node option.",
6665
                                     errors.ECODE_INVAL)
6666
      else:
6667
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6668
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6669
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6670
        if not os.path.isabs(src_path):
6671
          self.op.src_path = src_path = \
6672
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6673

    
6674
  def _RunAllocator(self):
6675
    """Run the allocator based on input opcode.
6676

6677
    """
6678
    nics = [n.ToDict() for n in self.nics]
6679
    ial = IAllocator(self.cfg, self.rpc,
6680
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6681
                     name=self.op.instance_name,
6682
                     disk_template=self.op.disk_template,
6683
                     tags=[],
6684
                     os=self.op.os_type,
6685
                     vcpus=self.be_full[constants.BE_VCPUS],
6686
                     mem_size=self.be_full[constants.BE_MEMORY],
6687
                     disks=self.disks,
6688
                     nics=nics,
6689
                     hypervisor=self.op.hypervisor,
6690
                     )
6691

    
6692
    ial.Run(self.op.iallocator)
6693

    
6694
    if not ial.success:
6695
      raise errors.OpPrereqError("Can't compute nodes using"
6696
                                 " iallocator '%s': %s" %
6697
                                 (self.op.iallocator, ial.info),
6698
                                 errors.ECODE_NORES)
6699
    if len(ial.result) != ial.required_nodes:
6700
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6701
                                 " of nodes (%s), required %s" %
6702
                                 (self.op.iallocator, len(ial.result),
6703
                                  ial.required_nodes), errors.ECODE_FAULT)
6704
    self.op.pnode = ial.result[0]
6705
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6706
                 self.op.instance_name, self.op.iallocator,
6707
                 utils.CommaJoin(ial.result))
6708
    if ial.required_nodes == 2:
6709
      self.op.snode = ial.result[1]
6710

    
6711
  def BuildHooksEnv(self):
6712
    """Build hooks env.
6713

6714
    This runs on master, primary and secondary nodes of the instance.
6715

6716
    """
6717
    env = {
6718
      "ADD_MODE": self.op.mode,
6719
      }
6720
    if self.op.mode == constants.INSTANCE_IMPORT:
6721
      env["SRC_NODE"] = self.op.src_node
6722
      env["SRC_PATH"] = self.op.src_path
6723
      env["SRC_IMAGES"] = self.src_images
6724

    
6725
    env.update(_BuildInstanceHookEnv(
6726
      name=self.op.instance_name,
6727
      primary_node=self.op.pnode,
6728
      secondary_nodes=self.secondaries,
6729
      status=self.op.start,
6730
      os_type=self.op.os_type,
6731
      memory=self.be_full[constants.BE_MEMORY],
6732
      vcpus=self.be_full[constants.BE_VCPUS],
6733
      nics=_NICListToTuple(self, self.nics),
6734
      disk_template=self.op.disk_template,
6735
      disks=[(d["size"], d["mode"]) for d in self.disks],
6736
      bep=self.be_full,
6737
      hvp=self.hv_full,
6738
      hypervisor_name=self.op.hypervisor,
6739
    ))
6740

    
6741
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6742
          self.secondaries)
6743
    return env, nl, nl
6744

    
6745
  def _ReadExportInfo(self):
6746
    """Reads the export information from disk.
6747

6748
    It will override the opcode source node and path with the actual
6749
    information, if these two were not specified before.
6750

6751
    @return: the export information
6752

6753
    """
6754
    assert self.op.mode == constants.INSTANCE_IMPORT
6755

    
6756
    src_node = self.op.src_node
6757
    src_path = self.op.src_path
6758

    
6759
    if src_node is None:
6760
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6761
      exp_list = self.rpc.call_export_list(locked_nodes)
6762
      found = False
6763
      for node in exp_list:
6764
        if exp_list[node].fail_msg:
6765
          continue
6766
        if src_path in exp_list[node].payload:
6767
          found = True
6768
          self.op.src_node = src_node = node
6769
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6770
                                                       src_path)
6771
          break
6772
      if not found:
6773
        raise errors.OpPrereqError("No export found for relative path %s" %
6774
                                    src_path, errors.ECODE_INVAL)
6775

    
6776
    _CheckNodeOnline(self, src_node)
6777
    result = self.rpc.call_export_info(src_node, src_path)
6778
    result.Raise("No export or invalid export found in dir %s" % src_path)
6779

    
6780
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6781
    if not export_info.has_section(constants.INISECT_EXP):
6782
      raise errors.ProgrammerError("Corrupted export config",
6783
                                   errors.ECODE_ENVIRON)
6784

    
6785
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6786
    if (int(ei_version) != constants.EXPORT_VERSION):
6787
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6788
                                 (ei_version, constants.EXPORT_VERSION),
6789
                                 errors.ECODE_ENVIRON)
6790
    return export_info
6791

    
6792
  def _ReadExportParams(self, einfo):
6793
    """Use export parameters as defaults.
6794

6795
    In case the opcode doesn't specify (as in override) some instance
6796
    parameters, then try to use them from the export information, if
6797
    that declares them.
6798

6799
    """
6800
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6801

    
6802
    if self.op.disk_template is None:
6803
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6804
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6805
                                          "disk_template")
6806
      else:
6807
        raise errors.OpPrereqError("No disk template specified and the export"
6808
                                   " is missing the disk_template information",
6809
                                   errors.ECODE_INVAL)
6810

    
6811
    if not self.op.disks:
6812
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6813
        disks = []
6814
        # TODO: import the disk iv_name too
6815
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6816
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6817
          disks.append({"size": disk_sz})
6818
        self.op.disks = disks
6819
      else:
6820
        raise errors.OpPrereqError("No disk info specified and the export"
6821
                                   " is missing the disk information",
6822
                                   errors.ECODE_INVAL)
6823

    
6824
    if (not self.op.nics and
6825
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6826
      nics = []
6827
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6828
        ndict = {}
6829
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6830
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6831
          ndict[name] = v
6832
        nics.append(ndict)
6833
      self.op.nics = nics
6834

    
6835
    if (self.op.hypervisor is None and
6836
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6837
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6838
    if einfo.has_section(constants.INISECT_HYP):
6839
      # use the export parameters but do not override the ones
6840
      # specified by the user
6841
      for name, value in einfo.items(constants.INISECT_HYP):
6842
        if name not in self.op.hvparams:
6843
          self.op.hvparams[name] = value
6844

    
6845
    if einfo.has_section(constants.INISECT_BEP):
6846
      # use the parameters, without overriding
6847
      for name, value in einfo.items(constants.INISECT_BEP):
6848
        if name not in self.op.beparams:
6849
          self.op.beparams[name] = value
6850
    else:
6851
      # try to read the parameters old style, from the main section
6852
      for name in constants.BES_PARAMETERS:
6853
        if (name not in self.op.beparams and
6854
            einfo.has_option(constants.INISECT_INS, name)):
6855
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6856

    
6857
    if einfo.has_section(constants.INISECT_OSP):
6858
      # use the parameters, without overriding
6859
      for name, value in einfo.items(constants.INISECT_OSP):
6860
        if name not in self.op.osparams:
6861
          self.op.osparams[name] = value
6862

    
6863
  def _RevertToDefaults(self, cluster):
6864
    """Revert the instance parameters to the default values.
6865

6866
    """
6867
    # hvparams
6868
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6869
    for name in self.op.hvparams.keys():
6870
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6871
        del self.op.hvparams[name]
6872
    # beparams
6873
    be_defs = cluster.SimpleFillBE({})
6874
    for name in self.op.beparams.keys():
6875
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6876
        del self.op.beparams[name]
6877
    # nic params
6878
    nic_defs = cluster.SimpleFillNIC({})
6879
    for nic in self.op.nics:
6880
      for name in constants.NICS_PARAMETERS:
6881
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6882
          del nic[name]
6883
    # osparams
6884
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6885
    for name in self.op.osparams.keys():
6886
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
6887
        del self.op.osparams[name]
6888

    
6889
  def CheckPrereq(self):
6890
    """Check prerequisites.
6891

6892
    """
6893
    if self.op.mode == constants.INSTANCE_IMPORT:
6894
      export_info = self._ReadExportInfo()
6895
      self._ReadExportParams(export_info)
6896

    
6897
    _CheckDiskTemplate(self.op.disk_template)
6898

    
6899
    if (not self.cfg.GetVGName() and
6900
        self.op.disk_template not in constants.DTS_NOT_LVM):
6901
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6902
                                 " instances", errors.ECODE_STATE)
6903

    
6904
    if self.op.hypervisor is None:
6905
      self.op.hypervisor = self.cfg.GetHypervisorType()
6906

    
6907
    cluster = self.cfg.GetClusterInfo()
6908
    enabled_hvs = cluster.enabled_hypervisors
6909
    if self.op.hypervisor not in enabled_hvs:
6910
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6911
                                 " cluster (%s)" % (self.op.hypervisor,
6912
                                  ",".join(enabled_hvs)),
6913
                                 errors.ECODE_STATE)
6914

    
6915
    # check hypervisor parameter syntax (locally)
6916
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6917
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6918
                                      self.op.hvparams)
6919
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6920
    hv_type.CheckParameterSyntax(filled_hvp)
6921
    self.hv_full = filled_hvp
6922
    # check that we don't specify global parameters on an instance
6923
    _CheckGlobalHvParams(self.op.hvparams)
6924

    
6925
    # fill and remember the beparams dict
6926
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6927
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
6928

    
6929
    # build os parameters
6930
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6931

    
6932
    # now that hvp/bep are in final format, let's reset to defaults,
6933
    # if told to do so
6934
    if self.op.identify_defaults:
6935
      self._RevertToDefaults(cluster)
6936

    
6937
    # NIC buildup
6938
    self.nics = []
6939
    for idx, nic in enumerate(self.op.nics):
6940
      nic_mode_req = nic.get("mode", None)
6941
      nic_mode = nic_mode_req
6942
      if nic_mode is None:
6943
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6944

    
6945
      # in routed mode, for the first nic, the default ip is 'auto'
6946
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6947
        default_ip_mode = constants.VALUE_AUTO
6948
      else:
6949
        default_ip_mode = constants.VALUE_NONE
6950

    
6951
      # ip validity checks
6952
      ip = nic.get("ip", default_ip_mode)
6953
      if ip is None or ip.lower() == constants.VALUE_NONE:
6954
        nic_ip = None
6955
      elif ip.lower() == constants.VALUE_AUTO:
6956
        if not self.op.name_check:
6957
          raise errors.OpPrereqError("IP address set to auto but name checks"
6958
                                     " have been skipped. Aborting.",
6959
                                     errors.ECODE_INVAL)
6960
        nic_ip = self.hostname1.ip
6961
      else:
6962
        if not netutils.IsValidIP4(ip):
6963
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6964
                                     " like a valid IP" % ip,
6965
                                     errors.ECODE_INVAL)
6966
        nic_ip = ip
6967

    
6968
      # TODO: check the ip address for uniqueness
6969
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6970
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6971
                                   errors.ECODE_INVAL)
6972

    
6973
      # MAC address verification
6974
      mac = nic.get("mac", constants.VALUE_AUTO)
6975
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6976
        mac = utils.NormalizeAndValidateMac(mac)
6977

    
6978
        try:
6979
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6980
        except errors.ReservationError:
6981
          raise errors.OpPrereqError("MAC address %s already in use"
6982
                                     " in cluster" % mac,
6983
                                     errors.ECODE_NOTUNIQUE)
6984

    
6985
      # bridge verification
6986
      bridge = nic.get("bridge", None)
6987
      link = nic.get("link", None)
6988
      if bridge and link:
6989
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6990
                                   " at the same time", errors.ECODE_INVAL)
6991
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6992
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6993
                                   errors.ECODE_INVAL)
6994
      elif bridge:
6995
        link = bridge
6996

    
6997
      nicparams = {}
6998
      if nic_mode_req:
6999
        nicparams[constants.NIC_MODE] = nic_mode_req
7000
      if link:
7001
        nicparams[constants.NIC_LINK] = link
7002

    
7003
      check_params = cluster.SimpleFillNIC(nicparams)
7004
      objects.NIC.CheckParameterSyntax(check_params)
7005
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7006

    
7007
    # disk checks/pre-build
7008
    self.disks = []
7009
    for disk in self.op.disks:
7010
      mode = disk.get("mode", constants.DISK_RDWR)
7011
      if mode not in constants.DISK_ACCESS_SET:
7012
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7013
                                   mode, errors.ECODE_INVAL)
7014
      size = disk.get("size", None)
7015
      if size is None:
7016
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7017
      try:
7018
        size = int(size)
7019
      except (TypeError, ValueError):
7020
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7021
                                   errors.ECODE_INVAL)
7022
      new_disk = {"size": size, "mode": mode}
7023
      if "adopt" in disk:
7024
        new_disk["adopt"] = disk["adopt"]
7025
      self.disks.append(new_disk)
7026

    
7027
    if self.op.mode == constants.INSTANCE_IMPORT:
7028

    
7029
      # Check that the new instance doesn't have less disks than the export
7030
      instance_disks = len(self.disks)
7031
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7032
      if instance_disks < export_disks:
7033
        raise errors.OpPrereqError("Not enough disks to import."
7034
                                   " (instance: %d, export: %d)" %
7035
                                   (instance_disks, export_disks),
7036
                                   errors.ECODE_INVAL)
7037

    
7038
      disk_images = []
7039
      for idx in range(export_disks):
7040
        option = 'disk%d_dump' % idx
7041
        if export_info.has_option(constants.INISECT_INS, option):
7042
          # FIXME: are the old os-es, disk sizes, etc. useful?
7043
          export_name = export_info.get(constants.INISECT_INS, option)
7044
          image = utils.PathJoin(self.op.src_path, export_name)
7045
          disk_images.append(image)
7046
        else:
7047
          disk_images.append(False)
7048

    
7049
      self.src_images = disk_images
7050

    
7051
      old_name = export_info.get(constants.INISECT_INS, 'name')
7052
      try:
7053
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7054
      except (TypeError, ValueError), err:
7055
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7056
                                   " an integer: %s" % str(err),
7057
                                   errors.ECODE_STATE)
7058
      if self.op.instance_name == old_name:
7059
        for idx, nic in enumerate(self.nics):
7060
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7061
            nic_mac_ini = 'nic%d_mac' % idx
7062
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7063

    
7064
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7065

    
7066
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7067
    if self.op.ip_check:
7068
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7069
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7070
                                   (self.check_ip, self.op.instance_name),
7071
                                   errors.ECODE_NOTUNIQUE)
7072

    
7073
    #### mac address generation
7074
    # By generating here the mac address both the allocator and the hooks get
7075
    # the real final mac address rather than the 'auto' or 'generate' value.
7076
    # There is a race condition between the generation and the instance object
7077
    # creation, which means that we know the mac is valid now, but we're not
7078
    # sure it will be when we actually add the instance. If things go bad
7079
    # adding the instance will abort because of a duplicate mac, and the
7080
    # creation job will fail.
7081
    for nic in self.nics:
7082
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7083
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7084

    
7085
    #### allocator run
7086

    
7087
    if self.op.iallocator is not None:
7088
      self._RunAllocator()
7089

    
7090
    #### node related checks
7091

    
7092
    # check primary node
7093
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7094
    assert self.pnode is not None, \
7095
      "Cannot retrieve locked node %s" % self.op.pnode
7096
    if pnode.offline:
7097
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7098
                                 pnode.name, errors.ECODE_STATE)
7099
    if pnode.drained:
7100
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7101
                                 pnode.name, errors.ECODE_STATE)
7102

    
7103
    self.secondaries = []
7104

    
7105
    # mirror node verification
7106
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7107
      if self.op.snode is None:
7108
        raise errors.OpPrereqError("The networked disk templates need"
7109
                                   " a mirror node", errors.ECODE_INVAL)
7110
      if self.op.snode == pnode.name:
7111
        raise errors.OpPrereqError("The secondary node cannot be the"
7112
                                   " primary node.", errors.ECODE_INVAL)
7113
      _CheckNodeOnline(self, self.op.snode)
7114
      _CheckNodeNotDrained(self, self.op.snode)
7115
      self.secondaries.append(self.op.snode)
7116

    
7117
    nodenames = [pnode.name] + self.secondaries
7118

    
7119
    req_size = _ComputeDiskSize(self.op.disk_template,
7120
                                self.disks)
7121

    
7122
    # Check lv size requirements, if not adopting
7123
    if req_size is not None and not self.adopt_disks:
7124
      _CheckNodesFreeDisk(self, nodenames, req_size)
7125

    
7126
    if self.adopt_disks: # instead, we must check the adoption data
7127
      all_lvs = set([i["adopt"] for i in self.disks])
7128
      if len(all_lvs) != len(self.disks):
7129
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7130
                                   errors.ECODE_INVAL)
7131
      for lv_name in all_lvs:
7132
        try:
7133
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7134
        except errors.ReservationError:
7135
          raise errors.OpPrereqError("LV named %s used by another instance" %
7136
                                     lv_name, errors.ECODE_NOTUNIQUE)
7137

    
7138
      node_lvs = self.rpc.call_lv_list([pnode.name],
7139
                                       self.cfg.GetVGName())[pnode.name]
7140
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7141
      node_lvs = node_lvs.payload
7142
      delta = all_lvs.difference(node_lvs.keys())
7143
      if delta:
7144
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7145
                                   utils.CommaJoin(delta),
7146
                                   errors.ECODE_INVAL)
7147
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7148
      if online_lvs:
7149
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7150
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7151
                                   errors.ECODE_STATE)
7152
      # update the size of disk based on what is found
7153
      for dsk in self.disks:
7154
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7155

    
7156
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7157

    
7158
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7159
    # check OS parameters (remotely)
7160
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7161

    
7162
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7163

    
7164
    # memory check on primary node
7165
    if self.op.start:
7166
      _CheckNodeFreeMemory(self, self.pnode.name,
7167
                           "creating instance %s" % self.op.instance_name,
7168
                           self.be_full[constants.BE_MEMORY],
7169
                           self.op.hypervisor)
7170

    
7171
    self.dry_run_result = list(nodenames)
7172

    
7173
  def Exec(self, feedback_fn):
7174
    """Create and add the instance to the cluster.
7175

7176
    """
7177
    instance = self.op.instance_name
7178
    pnode_name = self.pnode.name
7179

    
7180
    ht_kind = self.op.hypervisor
7181
    if ht_kind in constants.HTS_REQ_PORT:
7182
      network_port = self.cfg.AllocatePort()
7183
    else:
7184
      network_port = None
7185

    
7186
    if constants.ENABLE_FILE_STORAGE:
7187
      # this is needed because os.path.join does not accept None arguments
7188
      if self.op.file_storage_dir is None:
7189
        string_file_storage_dir = ""
7190
      else:
7191
        string_file_storage_dir = self.op.file_storage_dir
7192

    
7193
      # build the full file storage dir path
7194
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7195
                                        string_file_storage_dir, instance)
7196
    else:
7197
      file_storage_dir = ""
7198

    
7199
    disks = _GenerateDiskTemplate(self,
7200
                                  self.op.disk_template,
7201
                                  instance, pnode_name,
7202
                                  self.secondaries,
7203
                                  self.disks,
7204
                                  file_storage_dir,
7205
                                  self.op.file_driver,
7206
                                  0)
7207

    
7208
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7209
                            primary_node=pnode_name,
7210
                            nics=self.nics, disks=disks,
7211
                            disk_template=self.op.disk_template,
7212
                            admin_up=False,
7213
                            network_port=network_port,
7214
                            beparams=self.op.beparams,
7215
                            hvparams=self.op.hvparams,
7216
                            hypervisor=self.op.hypervisor,
7217
                            osparams=self.op.osparams,
7218
                            )
7219

    
7220
    if self.adopt_disks:
7221
      # rename LVs to the newly-generated names; we need to construct
7222
      # 'fake' LV disks with the old data, plus the new unique_id
7223
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7224
      rename_to = []
7225
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7226
        rename_to.append(t_dsk.logical_id)
7227
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7228
        self.cfg.SetDiskID(t_dsk, pnode_name)
7229
      result = self.rpc.call_blockdev_rename(pnode_name,
7230
                                             zip(tmp_disks, rename_to))
7231
      result.Raise("Failed to rename adoped LVs")
7232
    else:
7233
      feedback_fn("* creating instance disks...")
7234
      try:
7235
        _CreateDisks(self, iobj)
7236
      except errors.OpExecError:
7237
        self.LogWarning("Device creation failed, reverting...")
7238
        try:
7239
          _RemoveDisks(self, iobj)
7240
        finally:
7241
          self.cfg.ReleaseDRBDMinors(instance)
7242
          raise
7243

    
7244
    feedback_fn("adding instance %s to cluster config" % instance)
7245

    
7246
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7247

    
7248
    # Declare that we don't want to remove the instance lock anymore, as we've
7249
    # added the instance to the config
7250
    del self.remove_locks[locking.LEVEL_INSTANCE]
7251
    # Unlock all the nodes
7252
    if self.op.mode == constants.INSTANCE_IMPORT:
7253
      nodes_keep = [self.op.src_node]
7254
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7255
                       if node != self.op.src_node]
7256
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7257
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7258
    else:
7259
      self.context.glm.release(locking.LEVEL_NODE)
7260
      del self.acquired_locks[locking.LEVEL_NODE]
7261

    
7262
    if self.op.wait_for_sync:
7263
      disk_abort = not _WaitForSync(self, iobj)
7264
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7265
      # make sure the disks are not degraded (still sync-ing is ok)
7266
      time.sleep(15)
7267
      feedback_fn("* checking mirrors status")
7268
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7269
    else:
7270
      disk_abort = False
7271

    
7272
    if disk_abort:
7273
      _RemoveDisks(self, iobj)
7274
      self.cfg.RemoveInstance(iobj.name)
7275
      # Make sure the instance lock gets removed
7276
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7277
      raise errors.OpExecError("There are some degraded disks for"
7278
                               " this instance")
7279

    
7280
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7281
      if self.op.mode == constants.INSTANCE_CREATE:
7282
        if not self.op.no_install:
7283
          feedback_fn("* running the instance OS create scripts...")
7284
          # FIXME: pass debug option from opcode to backend
7285
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7286
                                                 self.op.debug_level)
7287
          result.Raise("Could not add os for instance %s"
7288
                       " on node %s" % (instance, pnode_name))
7289

    
7290
      elif self.op.mode == constants.INSTANCE_IMPORT:
7291
        feedback_fn("* running the instance OS import scripts...")
7292

    
7293
        transfers = []
7294

    
7295
        for idx, image in enumerate(self.src_images):
7296
          if not image:
7297
            continue
7298

    
7299
          # FIXME: pass debug option from opcode to backend
7300
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7301
                                             constants.IEIO_FILE, (image, ),
7302
                                             constants.IEIO_SCRIPT,
7303
                                             (iobj.disks[idx], idx),
7304
                                             None)
7305
          transfers.append(dt)
7306

    
7307
        import_result = \
7308
          masterd.instance.TransferInstanceData(self, feedback_fn,
7309
                                                self.op.src_node, pnode_name,
7310
                                                self.pnode.secondary_ip,
7311
                                                iobj, transfers)
7312
        if not compat.all(import_result):
7313
          self.LogWarning("Some disks for instance %s on node %s were not"
7314
                          " imported successfully" % (instance, pnode_name))
7315

    
7316
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7317
        feedback_fn("* preparing remote import...")
7318
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
7319
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7320

    
7321
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7322
                                                     self.source_x509_ca,
7323
                                                     self._cds, timeouts)
7324
        if not compat.all(disk_results):
7325
          # TODO: Should the instance still be started, even if some disks
7326
          # failed to import (valid for local imports, too)?
7327
          self.LogWarning("Some disks for instance %s on node %s were not"
7328
                          " imported successfully" % (instance, pnode_name))
7329

    
7330
        # Run rename script on newly imported instance
7331
        assert iobj.name == instance
7332
        feedback_fn("Running rename script for %s" % instance)
7333
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7334
                                                   self.source_instance_name,
7335
                                                   self.op.debug_level)
7336
        if result.fail_msg:
7337
          self.LogWarning("Failed to run rename script for %s on node"
7338
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7339

    
7340
      else:
7341
        # also checked in the prereq part
7342
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7343
                                     % self.op.mode)
7344

    
7345
    if self.op.start:
7346
      iobj.admin_up = True
7347
      self.cfg.Update(iobj, feedback_fn)
7348
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7349
      feedback_fn("* starting instance...")
7350
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7351
      result.Raise("Could not start instance")
7352

    
7353
    return list(iobj.all_nodes)
7354

    
7355

    
7356
class LUConnectConsole(NoHooksLU):
7357
  """Connect to an instance's console.
7358

7359
  This is somewhat special in that it returns the command line that
7360
  you need to run on the master node in order to connect to the
7361
  console.
7362

7363
  """
7364
  _OP_PARAMS = [
7365
    _PInstanceName
7366
    ]
7367
  REQ_BGL = False
7368

    
7369
  def ExpandNames(self):
7370
    self._ExpandAndLockInstance()
7371

    
7372
  def CheckPrereq(self):
7373
    """Check prerequisites.
7374

7375
    This checks that the instance is in the cluster.
7376

7377
    """
7378
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7379
    assert self.instance is not None, \
7380
      "Cannot retrieve locked instance %s" % self.op.instance_name
7381
    _CheckNodeOnline(self, self.instance.primary_node)
7382

    
7383
  def Exec(self, feedback_fn):
7384
    """Connect to the console of an instance
7385

7386
    """
7387
    instance = self.instance
7388
    node = instance.primary_node
7389

    
7390
    node_insts = self.rpc.call_instance_list([node],
7391
                                             [instance.hypervisor])[node]
7392
    node_insts.Raise("Can't get node information from %s" % node)
7393

    
7394
    if instance.name not in node_insts.payload:
7395
      raise errors.OpExecError("Instance %s is not running." % instance.name)
7396

    
7397
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7398

    
7399
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7400
    cluster = self.cfg.GetClusterInfo()
7401
    # beparams and hvparams are passed separately, to avoid editing the
7402
    # instance and then saving the defaults in the instance itself.
7403
    hvparams = cluster.FillHV(instance)
7404
    beparams = cluster.FillBE(instance)
7405
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7406

    
7407
    # build ssh cmdline
7408
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7409

    
7410

    
7411
class LUReplaceDisks(LogicalUnit):
7412
  """Replace the disks of an instance.
7413

7414
  """
7415
  HPATH = "mirrors-replace"
7416
  HTYPE = constants.HTYPE_INSTANCE
7417
  _OP_PARAMS = [
7418
    _PInstanceName,
7419
    ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7420
    ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7421
    ("remote_node", None, _TMaybeString),
7422
    ("iallocator", None, _TMaybeString),
7423
    ("early_release", False, _TBool),
7424
    ]
7425
  REQ_BGL = False
7426

    
7427
  def CheckArguments(self):
7428
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7429
                                  self.op.iallocator)
7430

    
7431
  def ExpandNames(self):
7432
    self._ExpandAndLockInstance()
7433

    
7434
    if self.op.iallocator is not None:
7435
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7436

    
7437
    elif self.op.remote_node is not None:
7438
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7439
      self.op.remote_node = remote_node
7440

    
7441
      # Warning: do not remove the locking of the new secondary here
7442
      # unless DRBD8.AddChildren is changed to work in parallel;
7443
      # currently it doesn't since parallel invocations of
7444
      # FindUnusedMinor will conflict
7445
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7446
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7447

    
7448
    else:
7449
      self.needed_locks[locking.LEVEL_NODE] = []
7450
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7451

    
7452
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7453
                                   self.op.iallocator, self.op.remote_node,
7454
                                   self.op.disks, False, self.op.early_release)
7455

    
7456
    self.tasklets = [self.replacer]
7457

    
7458
  def DeclareLocks(self, level):
7459
    # If we're not already locking all nodes in the set we have to declare the
7460
    # instance's primary/secondary nodes.
7461
    if (level == locking.LEVEL_NODE and
7462
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7463
      self._LockInstancesNodes()
7464

    
7465
  def BuildHooksEnv(self):
7466
    """Build hooks env.
7467

7468
    This runs on the master, the primary and all the secondaries.
7469

7470
    """
7471
    instance = self.replacer.instance
7472
    env = {
7473
      "MODE": self.op.mode,
7474
      "NEW_SECONDARY": self.op.remote_node,
7475
      "OLD_SECONDARY": instance.secondary_nodes[0],
7476
      }
7477
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7478
    nl = [
7479
      self.cfg.GetMasterNode(),
7480
      instance.primary_node,
7481
      ]
7482
    if self.op.remote_node is not None:
7483
      nl.append(self.op.remote_node)
7484
    return env, nl, nl
7485

    
7486

    
7487
class TLReplaceDisks(Tasklet):
7488
  """Replaces disks for an instance.
7489

7490
  Note: Locking is not within the scope of this class.
7491

7492
  """
7493
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7494
               disks, delay_iallocator, early_release):
7495
    """Initializes this class.
7496

7497
    """
7498
    Tasklet.__init__(self, lu)
7499

    
7500
    # Parameters
7501
    self.instance_name = instance_name
7502
    self.mode = mode
7503
    self.iallocator_name = iallocator_name
7504
    self.remote_node = remote_node
7505
    self.disks = disks
7506
    self.delay_iallocator = delay_iallocator
7507
    self.early_release = early_release
7508

    
7509
    # Runtime data
7510
    self.instance = None
7511
    self.new_node = None
7512
    self.target_node = None
7513
    self.other_node = None
7514
    self.remote_node_info = None
7515
    self.node_secondary_ip = None
7516

    
7517
  @staticmethod
7518
  def CheckArguments(mode, remote_node, iallocator):
7519
    """Helper function for users of this class.
7520

7521
    """
7522
    # check for valid parameter combination
7523
    if mode == constants.REPLACE_DISK_CHG:
7524
      if remote_node is None and iallocator is None:
7525
        raise errors.OpPrereqError("When changing the secondary either an"
7526
                                   " iallocator script must be used or the"
7527
                                   " new node given", errors.ECODE_INVAL)
7528

    
7529
      if remote_node is not None and iallocator is not None:
7530
        raise errors.OpPrereqError("Give either the iallocator or the new"
7531
                                   " secondary, not both", errors.ECODE_INVAL)
7532

    
7533
    elif remote_node is not None or iallocator is not None:
7534
      # Not replacing the secondary
7535
      raise errors.OpPrereqError("The iallocator and new node options can"
7536
                                 " only be used when changing the"
7537
                                 " secondary node", errors.ECODE_INVAL)
7538

    
7539
  @staticmethod
7540
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7541
    """Compute a new secondary node using an IAllocator.
7542

7543
    """
7544
    ial = IAllocator(lu.cfg, lu.rpc,
7545
                     mode=constants.IALLOCATOR_MODE_RELOC,
7546
                     name=instance_name,
7547
                     relocate_from=relocate_from)
7548

    
7549
    ial.Run(iallocator_name)
7550

    
7551
    if not ial.success:
7552
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7553
                                 " %s" % (iallocator_name, ial.info),
7554
                                 errors.ECODE_NORES)
7555

    
7556
    if len(ial.result) != ial.required_nodes:
7557
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7558
                                 " of nodes (%s), required %s" %
7559
                                 (iallocator_name,
7560
                                  len(ial.result), ial.required_nodes),
7561
                                 errors.ECODE_FAULT)
7562

    
7563
    remote_node_name = ial.result[0]
7564

    
7565
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7566
               instance_name, remote_node_name)
7567

    
7568
    return remote_node_name
7569

    
7570
  def _FindFaultyDisks(self, node_name):
7571
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7572
                                    node_name, True)
7573

    
7574
  def CheckPrereq(self):
7575
    """Check prerequisites.
7576

7577
    This checks that the instance is in the cluster.
7578

7579
    """
7580
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7581
    assert instance is not None, \
7582
      "Cannot retrieve locked instance %s" % self.instance_name
7583

    
7584
    if instance.disk_template != constants.DT_DRBD8:
7585
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7586
                                 " instances", errors.ECODE_INVAL)
7587

    
7588
    if len(instance.secondary_nodes) != 1:
7589
      raise errors.OpPrereqError("The instance has a strange layout,"
7590
                                 " expected one secondary but found %d" %
7591
                                 len(instance.secondary_nodes),
7592
                                 errors.ECODE_FAULT)
7593

    
7594
    if not self.delay_iallocator:
7595
      self._CheckPrereq2()
7596

    
7597
  def _CheckPrereq2(self):
7598
    """Check prerequisites, second part.
7599

7600
    This function should always be part of CheckPrereq. It was separated and is
7601
    now called from Exec because during node evacuation iallocator was only
7602
    called with an unmodified cluster model, not taking planned changes into
7603
    account.
7604

7605
    """
7606
    instance = self.instance
7607
    secondary_node = instance.secondary_nodes[0]
7608

    
7609
    if self.iallocator_name is None:
7610
      remote_node = self.remote_node
7611
    else:
7612
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7613
                                       instance.name, instance.secondary_nodes)
7614

    
7615
    if remote_node is not None:
7616
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7617
      assert self.remote_node_info is not None, \
7618
        "Cannot retrieve locked node %s" % remote_node
7619
    else:
7620
      self.remote_node_info = None
7621

    
7622
    if remote_node == self.instance.primary_node:
7623
      raise errors.OpPrereqError("The specified node is the primary node of"
7624
                                 " the instance.", errors.ECODE_INVAL)
7625

    
7626
    if remote_node == secondary_node:
7627
      raise errors.OpPrereqError("The specified node is already the"
7628
                                 " secondary node of the instance.",
7629
                                 errors.ECODE_INVAL)
7630

    
7631
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7632
                                    constants.REPLACE_DISK_CHG):
7633
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7634
                                 errors.ECODE_INVAL)
7635

    
7636
    if self.mode == constants.REPLACE_DISK_AUTO:
7637
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7638
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7639

    
7640
      if faulty_primary and faulty_secondary:
7641
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7642
                                   " one node and can not be repaired"
7643
                                   " automatically" % self.instance_name,
7644
                                   errors.ECODE_STATE)
7645

    
7646
      if faulty_primary:
7647
        self.disks = faulty_primary
7648
        self.target_node = instance.primary_node
7649
        self.other_node = secondary_node
7650
        check_nodes = [self.target_node, self.other_node]
7651
      elif faulty_secondary:
7652
        self.disks = faulty_secondary
7653
        self.target_node = secondary_node
7654
        self.other_node = instance.primary_node
7655
        check_nodes = [self.target_node, self.other_node]
7656
      else:
7657
        self.disks = []
7658
        check_nodes = []
7659

    
7660
    else:
7661
      # Non-automatic modes
7662
      if self.mode == constants.REPLACE_DISK_PRI:
7663
        self.target_node = instance.primary_node
7664
        self.other_node = secondary_node
7665
        check_nodes = [self.target_node, self.other_node]
7666

    
7667
      elif self.mode == constants.REPLACE_DISK_SEC:
7668
        self.target_node = secondary_node
7669
        self.other_node = instance.primary_node
7670
        check_nodes = [self.target_node, self.other_node]
7671

    
7672
      elif self.mode == constants.REPLACE_DISK_CHG:
7673
        self.new_node = remote_node
7674
        self.other_node = instance.primary_node
7675
        self.target_node = secondary_node
7676
        check_nodes = [self.new_node, self.other_node]
7677

    
7678
        _CheckNodeNotDrained(self.lu, remote_node)
7679

    
7680
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7681
        assert old_node_info is not None
7682
        if old_node_info.offline and not self.early_release:
7683
          # doesn't make sense to delay the release
7684
          self.early_release = True
7685
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7686
                          " early-release mode", secondary_node)
7687

    
7688
      else:
7689
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7690
                                     self.mode)
7691

    
7692
      # If not specified all disks should be replaced
7693
      if not self.disks:
7694
        self.disks = range(len(self.instance.disks))
7695

    
7696
    for node in check_nodes:
7697
      _CheckNodeOnline(self.lu, node)
7698

    
7699
    # Check whether disks are valid
7700
    for disk_idx in self.disks:
7701
      instance.FindDisk(disk_idx)
7702

    
7703
    # Get secondary node IP addresses
7704
    node_2nd_ip = {}
7705

    
7706
    for node_name in [self.target_node, self.other_node, self.new_node]:
7707
      if node_name is not None:
7708
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7709

    
7710
    self.node_secondary_ip = node_2nd_ip
7711

    
7712
  def Exec(self, feedback_fn):
7713
    """Execute disk replacement.
7714

7715
    This dispatches the disk replacement to the appropriate handler.
7716

7717
    """
7718
    if self.delay_iallocator:
7719
      self._CheckPrereq2()
7720

    
7721
    if not self.disks:
7722
      feedback_fn("No disks need replacement")
7723
      return
7724

    
7725
    feedback_fn("Replacing disk(s) %s for %s" %
7726
                (utils.CommaJoin(self.disks), self.instance.name))
7727

    
7728
    activate_disks = (not self.instance.admin_up)
7729

    
7730
    # Activate the instance disks if we're replacing them on a down instance
7731
    if activate_disks:
7732
      _StartInstanceDisks(self.lu, self.instance, True)
7733

    
7734
    try:
7735
      # Should we replace the secondary node?
7736
      if self.new_node is not None:
7737
        fn = self._ExecDrbd8Secondary
7738
      else:
7739
        fn = self._ExecDrbd8DiskOnly
7740

    
7741
      return fn(feedback_fn)
7742

    
7743
    finally:
7744
      # Deactivate the instance disks if we're replacing them on a
7745
      # down instance
7746
      if activate_disks:
7747
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7748

    
7749
  def _CheckVolumeGroup(self, nodes):
7750
    self.lu.LogInfo("Checking volume groups")
7751

    
7752
    vgname = self.cfg.GetVGName()
7753

    
7754
    # Make sure volume group exists on all involved nodes
7755
    results = self.rpc.call_vg_list(nodes)
7756
    if not results:
7757
      raise errors.OpExecError("Can't list volume groups on the nodes")
7758

    
7759
    for node in nodes:
7760
      res = results[node]
7761
      res.Raise("Error checking node %s" % node)
7762
      if vgname not in res.payload:
7763
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7764
                                 (vgname, node))
7765

    
7766
  def _CheckDisksExistence(self, nodes):
7767
    # Check disk existence
7768
    for idx, dev in enumerate(self.instance.disks):
7769
      if idx not in self.disks:
7770
        continue
7771

    
7772
      for node in nodes:
7773
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7774
        self.cfg.SetDiskID(dev, node)
7775

    
7776
        result = self.rpc.call_blockdev_find(node, dev)
7777

    
7778
        msg = result.fail_msg
7779
        if msg or not result.payload:
7780
          if not msg:
7781
            msg = "disk not found"
7782
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7783
                                   (idx, node, msg))
7784

    
7785
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7786
    for idx, dev in enumerate(self.instance.disks):
7787
      if idx not in self.disks:
7788
        continue
7789

    
7790
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7791
                      (idx, node_name))
7792

    
7793
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7794
                                   ldisk=ldisk):
7795
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7796
                                 " replace disks for instance %s" %
7797
                                 (node_name, self.instance.name))
7798

    
7799
  def _CreateNewStorage(self, node_name):
7800
    vgname = self.cfg.GetVGName()
7801
    iv_names = {}
7802

    
7803
    for idx, dev in enumerate(self.instance.disks):
7804
      if idx not in self.disks:
7805
        continue
7806

    
7807
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7808

    
7809
      self.cfg.SetDiskID(dev, node_name)
7810

    
7811
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7812
      names = _GenerateUniqueNames(self.lu, lv_names)
7813

    
7814
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7815
                             logical_id=(vgname, names[0]))
7816
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7817
                             logical_id=(vgname, names[1]))
7818

    
7819
      new_lvs = [lv_data, lv_meta]
7820
      old_lvs = dev.children
7821
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7822

    
7823
      # we pass force_create=True to force the LVM creation
7824
      for new_lv in new_lvs:
7825
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7826
                        _GetInstanceInfoText(self.instance), False)
7827

    
7828
    return iv_names
7829

    
7830
  def _CheckDevices(self, node_name, iv_names):
7831
    for name, (dev, _, _) in iv_names.iteritems():
7832
      self.cfg.SetDiskID(dev, node_name)
7833

    
7834
      result = self.rpc.call_blockdev_find(node_name, dev)
7835

    
7836
      msg = result.fail_msg
7837
      if msg or not result.payload:
7838
        if not msg:
7839
          msg = "disk not found"
7840
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7841
                                 (name, msg))
7842

    
7843
      if result.payload.is_degraded:
7844
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7845

    
7846
  def _RemoveOldStorage(self, node_name, iv_names):
7847
    for name, (_, old_lvs, _) in iv_names.iteritems():
7848
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7849

    
7850
      for lv in old_lvs:
7851
        self.cfg.SetDiskID(lv, node_name)
7852

    
7853
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7854
        if msg:
7855
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7856
                             hint="remove unused LVs manually")
7857

    
7858
  def _ReleaseNodeLock(self, node_name):
7859
    """Releases the lock for a given node."""
7860
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7861

    
7862
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7863
    """Replace a disk on the primary or secondary for DRBD 8.
7864

7865
    The algorithm for replace is quite complicated:
7866

7867
      1. for each disk to be replaced:
7868

7869
        1. create new LVs on the target node with unique names
7870
        1. detach old LVs from the drbd device
7871
        1. rename old LVs to name_replaced.<time_t>
7872
        1. rename new LVs to old LVs
7873
        1. attach the new LVs (with the old names now) to the drbd device
7874

7875
      1. wait for sync across all devices
7876

7877
      1. for each modified disk:
7878

7879
        1. remove old LVs (which have the name name_replaces.<time_t>)
7880

7881
    Failures are not very well handled.
7882

7883
    """
7884
    steps_total = 6
7885

    
7886
    # Step: check device activation
7887
    self.lu.LogStep(1, steps_total, "Check device existence")
7888
    self._CheckDisksExistence([self.other_node, self.target_node])
7889
    self._CheckVolumeGroup([self.target_node, self.other_node])
7890

    
7891
    # Step: check other node consistency
7892
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7893
    self._CheckDisksConsistency(self.other_node,
7894
                                self.other_node == self.instance.primary_node,
7895
                                False)
7896

    
7897
    # Step: create new storage
7898
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7899
    iv_names = self._CreateNewStorage(self.target_node)
7900

    
7901
    # Step: for each lv, detach+rename*2+attach
7902
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7903
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7904
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7905

    
7906
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7907
                                                     old_lvs)
7908
      result.Raise("Can't detach drbd from local storage on node"
7909
                   " %s for device %s" % (self.target_node, dev.iv_name))
7910
      #dev.children = []
7911
      #cfg.Update(instance)
7912

    
7913
      # ok, we created the new LVs, so now we know we have the needed
7914
      # storage; as such, we proceed on the target node to rename
7915
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7916
      # using the assumption that logical_id == physical_id (which in
7917
      # turn is the unique_id on that node)
7918

    
7919
      # FIXME(iustin): use a better name for the replaced LVs
7920
      temp_suffix = int(time.time())
7921
      ren_fn = lambda d, suff: (d.physical_id[0],
7922
                                d.physical_id[1] + "_replaced-%s" % suff)
7923

    
7924
      # Build the rename list based on what LVs exist on the node
7925
      rename_old_to_new = []
7926
      for to_ren in old_lvs:
7927
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7928
        if not result.fail_msg and result.payload:
7929
          # device exists
7930
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7931

    
7932
      self.lu.LogInfo("Renaming the old LVs on the target node")
7933
      result = self.rpc.call_blockdev_rename(self.target_node,
7934
                                             rename_old_to_new)
7935
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7936

    
7937
      # Now we rename the new LVs to the old LVs
7938
      self.lu.LogInfo("Renaming the new LVs on the target node")
7939
      rename_new_to_old = [(new, old.physical_id)
7940
                           for old, new in zip(old_lvs, new_lvs)]
7941
      result = self.rpc.call_blockdev_rename(self.target_node,
7942
                                             rename_new_to_old)
7943
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7944

    
7945
      for old, new in zip(old_lvs, new_lvs):
7946
        new.logical_id = old.logical_id
7947
        self.cfg.SetDiskID(new, self.target_node)
7948

    
7949
      for disk in old_lvs:
7950
        disk.logical_id = ren_fn(disk, temp_suffix)
7951
        self.cfg.SetDiskID(disk, self.target_node)
7952

    
7953
      # Now that the new lvs have the old name, we can add them to the device
7954
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7955
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7956
                                                  new_lvs)
7957
      msg = result.fail_msg
7958
      if msg:
7959
        for new_lv in new_lvs:
7960
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7961
                                               new_lv).fail_msg
7962
          if msg2:
7963
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7964
                               hint=("cleanup manually the unused logical"
7965
                                     "volumes"))
7966
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7967

    
7968
      dev.children = new_lvs
7969

    
7970
      self.cfg.Update(self.instance, feedback_fn)
7971

    
7972
    cstep = 5
7973
    if self.early_release:
7974
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7975
      cstep += 1
7976
      self._RemoveOldStorage(self.target_node, iv_names)
7977
      # WARNING: we release both node locks here, do not do other RPCs
7978
      # than WaitForSync to the primary node
7979
      self._ReleaseNodeLock([self.target_node, self.other_node])
7980

    
7981
    # Wait for sync
7982
    # This can fail as the old devices are degraded and _WaitForSync
7983
    # does a combined result over all disks, so we don't check its return value
7984
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7985
    cstep += 1
7986
    _WaitForSync(self.lu, self.instance)
7987

    
7988
    # Check all devices manually
7989
    self._CheckDevices(self.instance.primary_node, iv_names)
7990

    
7991
    # Step: remove old storage
7992
    if not self.early_release:
7993
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7994
      cstep += 1
7995
      self._RemoveOldStorage(self.target_node, iv_names)
7996

    
7997
  def _ExecDrbd8Secondary(self, feedback_fn):
7998
    """Replace the secondary node for DRBD 8.
7999

8000
    The algorithm for replace is quite complicated:
8001
      - for all disks of the instance:
8002
        - create new LVs on the new node with same names
8003
        - shutdown the drbd device on the old secondary
8004
        - disconnect the drbd network on the primary
8005
        - create the drbd device on the new secondary
8006
        - network attach the drbd on the primary, using an artifice:
8007
          the drbd code for Attach() will connect to the network if it
8008
          finds a device which is connected to the good local disks but
8009
          not network enabled
8010
      - wait for sync across all devices
8011
      - remove all disks from the old secondary
8012

8013
    Failures are not very well handled.
8014

8015
    """
8016
    steps_total = 6
8017

    
8018
    # Step: check device activation
8019
    self.lu.LogStep(1, steps_total, "Check device existence")
8020
    self._CheckDisksExistence([self.instance.primary_node])
8021
    self._CheckVolumeGroup([self.instance.primary_node])
8022

    
8023
    # Step: check other node consistency
8024
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8025
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8026

    
8027
    # Step: create new storage
8028
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8029
    for idx, dev in enumerate(self.instance.disks):
8030
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8031
                      (self.new_node, idx))
8032
      # we pass force_create=True to force LVM creation
8033
      for new_lv in dev.children:
8034
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8035
                        _GetInstanceInfoText(self.instance), False)
8036

    
8037
    # Step 4: dbrd minors and drbd setups changes
8038
    # after this, we must manually remove the drbd minors on both the
8039
    # error and the success paths
8040
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8041
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8042
                                         for dev in self.instance.disks],
8043
                                        self.instance.name)
8044
    logging.debug("Allocated minors %r", minors)
8045

    
8046
    iv_names = {}
8047
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8048
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8049
                      (self.new_node, idx))
8050
      # create new devices on new_node; note that we create two IDs:
8051
      # one without port, so the drbd will be activated without
8052
      # networking information on the new node at this stage, and one
8053
      # with network, for the latter activation in step 4
8054
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8055
      if self.instance.primary_node == o_node1:
8056
        p_minor = o_minor1
8057
      else:
8058
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8059
        p_minor = o_minor2
8060

    
8061
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8062
                      p_minor, new_minor, o_secret)
8063
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8064
                    p_minor, new_minor, o_secret)
8065

    
8066
      iv_names[idx] = (dev, dev.children, new_net_id)
8067
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8068
                    new_net_id)
8069
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8070
                              logical_id=new_alone_id,
8071
                              children=dev.children,
8072
                              size=dev.size)
8073
      try:
8074
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8075
                              _GetInstanceInfoText(self.instance), False)
8076
      except errors.GenericError:
8077
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8078
        raise
8079

    
8080
    # We have new devices, shutdown the drbd on the old secondary
8081
    for idx, dev in enumerate(self.instance.disks):
8082
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8083
      self.cfg.SetDiskID(dev, self.target_node)
8084
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8085
      if msg:
8086
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8087
                           "node: %s" % (idx, msg),
8088
                           hint=("Please cleanup this device manually as"
8089
                                 " soon as possible"))
8090

    
8091
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8092
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8093
                                               self.node_secondary_ip,
8094
                                               self.instance.disks)\
8095
                                              [self.instance.primary_node]
8096

    
8097
    msg = result.fail_msg
8098
    if msg:
8099
      # detaches didn't succeed (unlikely)
8100
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8101
      raise errors.OpExecError("Can't detach the disks from the network on"
8102
                               " old node: %s" % (msg,))
8103

    
8104
    # if we managed to detach at least one, we update all the disks of
8105
    # the instance to point to the new secondary
8106
    self.lu.LogInfo("Updating instance configuration")
8107
    for dev, _, new_logical_id in iv_names.itervalues():
8108
      dev.logical_id = new_logical_id
8109
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8110

    
8111
    self.cfg.Update(self.instance, feedback_fn)
8112

    
8113
    # and now perform the drbd attach
8114
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8115
                    " (standalone => connected)")
8116
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8117
                                            self.new_node],
8118
                                           self.node_secondary_ip,
8119
                                           self.instance.disks,
8120
                                           self.instance.name,
8121
                                           False)
8122
    for to_node, to_result in result.items():
8123
      msg = to_result.fail_msg
8124
      if msg:
8125
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8126
                           to_node, msg,
8127
                           hint=("please do a gnt-instance info to see the"
8128
                                 " status of disks"))
8129
    cstep = 5
8130
    if self.early_release:
8131
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8132
      cstep += 1
8133
      self._RemoveOldStorage(self.target_node, iv_names)
8134
      # WARNING: we release all node locks here, do not do other RPCs
8135
      # than WaitForSync to the primary node
8136
      self._ReleaseNodeLock([self.instance.primary_node,
8137
                             self.target_node,
8138
                             self.new_node])
8139

    
8140
    # Wait for sync
8141
    # This can fail as the old devices are degraded and _WaitForSync
8142
    # does a combined result over all disks, so we don't check its return value
8143
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8144
    cstep += 1
8145
    _WaitForSync(self.lu, self.instance)
8146

    
8147
    # Check all devices manually
8148
    self._CheckDevices(self.instance.primary_node, iv_names)
8149

    
8150
    # Step: remove old storage
8151
    if not self.early_release:
8152
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8153
      self._RemoveOldStorage(self.target_node, iv_names)
8154

    
8155

    
8156
class LURepairNodeStorage(NoHooksLU):
8157
  """Repairs the volume group on a node.
8158

8159
  """
8160
  _OP_PARAMS = [
8161
    _PNodeName,
8162
    ("storage_type", _NoDefault, _CheckStorageType),
8163
    ("name", _NoDefault, _TNonEmptyString),
8164
    ("ignore_consistency", False, _TBool),
8165
    ]
8166
  REQ_BGL = False
8167

    
8168
  def CheckArguments(self):
8169
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8170

    
8171
    storage_type = self.op.storage_type
8172

    
8173
    if (constants.SO_FIX_CONSISTENCY not in
8174
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8175
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8176
                                 " repaired" % storage_type,
8177
                                 errors.ECODE_INVAL)
8178

    
8179
  def ExpandNames(self):
8180
    self.needed_locks = {
8181
      locking.LEVEL_NODE: [self.op.node_name],
8182
      }
8183

    
8184
  def _CheckFaultyDisks(self, instance, node_name):
8185
    """Ensure faulty disks abort the opcode or at least warn."""
8186
    try:
8187
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8188
                                  node_name, True):
8189
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8190
                                   " node '%s'" % (instance.name, node_name),
8191
                                   errors.ECODE_STATE)
8192
    except errors.OpPrereqError, err:
8193
      if self.op.ignore_consistency:
8194
        self.proc.LogWarning(str(err.args[0]))
8195
      else:
8196
        raise
8197

    
8198
  def CheckPrereq(self):
8199
    """Check prerequisites.
8200

8201
    """
8202
    # Check whether any instance on this node has faulty disks
8203
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8204
      if not inst.admin_up:
8205
        continue
8206
      check_nodes = set(inst.all_nodes)
8207
      check_nodes.discard(self.op.node_name)
8208
      for inst_node_name in check_nodes:
8209
        self._CheckFaultyDisks(inst, inst_node_name)
8210

    
8211
  def Exec(self, feedback_fn):
8212
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8213
                (self.op.name, self.op.node_name))
8214

    
8215
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8216
    result = self.rpc.call_storage_execute(self.op.node_name,
8217
                                           self.op.storage_type, st_args,
8218
                                           self.op.name,
8219
                                           constants.SO_FIX_CONSISTENCY)
8220
    result.Raise("Failed to repair storage unit '%s' on %s" %
8221
                 (self.op.name, self.op.node_name))
8222

    
8223

    
8224
class LUNodeEvacuationStrategy(NoHooksLU):
8225
  """Computes the node evacuation strategy.
8226

8227
  """
8228
  _OP_PARAMS = [
8229
    ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8230
    ("remote_node", None, _TMaybeString),
8231
    ("iallocator", None, _TMaybeString),
8232
    ]
8233
  REQ_BGL = False
8234

    
8235
  def CheckArguments(self):
8236
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8237

    
8238
  def ExpandNames(self):
8239
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8240
    self.needed_locks = locks = {}
8241
    if self.op.remote_node is None:
8242
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8243
    else:
8244
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8245
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8246

    
8247
  def Exec(self, feedback_fn):
8248
    if self.op.remote_node is not None:
8249
      instances = []
8250
      for node in self.op.nodes:
8251
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8252
      result = []
8253
      for i in instances:
8254
        if i.primary_node == self.op.remote_node:
8255
          raise errors.OpPrereqError("Node %s is the primary node of"
8256
                                     " instance %s, cannot use it as"
8257
                                     " secondary" %
8258
                                     (self.op.remote_node, i.name),
8259
                                     errors.ECODE_INVAL)
8260
        result.append([i.name, self.op.remote_node])
8261
    else:
8262
      ial = IAllocator(self.cfg, self.rpc,
8263
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8264
                       evac_nodes=self.op.nodes)
8265
      ial.Run(self.op.iallocator, validate=True)
8266
      if not ial.success:
8267
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8268
                                 errors.ECODE_NORES)
8269
      result = ial.result
8270
    return result
8271

    
8272

    
8273
class LUGrowDisk(LogicalUnit):
8274
  """Grow a disk of an instance.
8275

8276
  """
8277
  HPATH = "disk-grow"
8278
  HTYPE = constants.HTYPE_INSTANCE
8279
  _OP_PARAMS = [
8280
    _PInstanceName,
8281
    ("disk", _NoDefault, _TInt),
8282
    ("amount", _NoDefault, _TInt),
8283
    ("wait_for_sync", True, _TBool),
8284
    ]
8285
  REQ_BGL = False
8286

    
8287
  def ExpandNames(self):
8288
    self._ExpandAndLockInstance()
8289
    self.needed_locks[locking.LEVEL_NODE] = []
8290
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8291

    
8292
  def DeclareLocks(self, level):
8293
    if level == locking.LEVEL_NODE:
8294
      self._LockInstancesNodes()
8295

    
8296
  def BuildHooksEnv(self):
8297
    """Build hooks env.
8298

8299
    This runs on the master, the primary and all the secondaries.
8300

8301
    """
8302
    env = {
8303
      "DISK": self.op.disk,
8304
      "AMOUNT": self.op.amount,
8305
      }
8306
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8307
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8308
    return env, nl, nl
8309

    
8310
  def CheckPrereq(self):
8311
    """Check prerequisites.
8312

8313
    This checks that the instance is in the cluster.
8314

8315
    """
8316
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8317
    assert instance is not None, \
8318
      "Cannot retrieve locked instance %s" % self.op.instance_name
8319
    nodenames = list(instance.all_nodes)
8320
    for node in nodenames:
8321
      _CheckNodeOnline(self, node)
8322

    
8323
    self.instance = instance
8324

    
8325
    if instance.disk_template not in constants.DTS_GROWABLE:
8326
      raise errors.OpPrereqError("Instance's disk layout does not support"
8327
                                 " growing.", errors.ECODE_INVAL)
8328

    
8329
    self.disk = instance.FindDisk(self.op.disk)
8330

    
8331
    if instance.disk_template != constants.DT_FILE:
8332
      # TODO: check the free disk space for file, when that feature will be
8333
      # supported
8334
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8335

    
8336
  def Exec(self, feedback_fn):
8337
    """Execute disk grow.
8338

8339
    """
8340
    instance = self.instance
8341
    disk = self.disk
8342

    
8343
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8344
    if not disks_ok:
8345
      raise errors.OpExecError("Cannot activate block device to grow")
8346

    
8347
    for node in instance.all_nodes:
8348
      self.cfg.SetDiskID(disk, node)
8349
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8350
      result.Raise("Grow request failed to node %s" % node)
8351

    
8352
      # TODO: Rewrite code to work properly
8353
      # DRBD goes into sync mode for a short amount of time after executing the
8354
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8355
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8356
      # time is a work-around.
8357
      time.sleep(5)
8358

    
8359
    disk.RecordGrow(self.op.amount)
8360
    self.cfg.Update(instance, feedback_fn)
8361
    if self.op.wait_for_sync:
8362
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8363
      if disk_abort:
8364
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8365
                             " status.\nPlease check the instance.")
8366
      if not instance.admin_up:
8367
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8368
    elif not instance.admin_up:
8369
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8370
                           " not supposed to be running because no wait for"
8371
                           " sync mode was requested.")
8372

    
8373

    
8374
class LUQueryInstanceData(NoHooksLU):
8375
  """Query runtime instance data.
8376

8377
  """
8378
  _OP_PARAMS = [
8379
    ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8380
    ("static", False, _TBool),
8381
    ]
8382
  REQ_BGL = False
8383

    
8384
  def ExpandNames(self):
8385
    self.needed_locks = {}
8386
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8387

    
8388
    if self.op.instances:
8389
      self.wanted_names = []
8390
      for name in self.op.instances:
8391
        full_name = _ExpandInstanceName(self.cfg, name)
8392
        self.wanted_names.append(full_name)
8393
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8394
    else:
8395
      self.wanted_names = None
8396
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8397

    
8398
    self.needed_locks[locking.LEVEL_NODE] = []
8399
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8400

    
8401
  def DeclareLocks(self, level):
8402
    if level == locking.LEVEL_NODE:
8403
      self._LockInstancesNodes()
8404

    
8405
  def CheckPrereq(self):
8406
    """Check prerequisites.
8407

8408
    This only checks the optional instance list against the existing names.
8409

8410
    """
8411
    if self.wanted_names is None:
8412
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8413

    
8414
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8415
                             in self.wanted_names]
8416

    
8417
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8418
    """Returns the status of a block device
8419

8420
    """
8421
    if self.op.static or not node:
8422
      return None
8423

    
8424
    self.cfg.SetDiskID(dev, node)
8425

    
8426
    result = self.rpc.call_blockdev_find(node, dev)
8427
    if result.offline:
8428
      return None
8429

    
8430
    result.Raise("Can't compute disk status for %s" % instance_name)
8431

    
8432
    status = result.payload
8433
    if status is None:
8434
      return None
8435

    
8436
    return (status.dev_path, status.major, status.minor,
8437
            status.sync_percent, status.estimated_time,
8438
            status.is_degraded, status.ldisk_status)
8439

    
8440
  def _ComputeDiskStatus(self, instance, snode, dev):
8441
    """Compute block device status.
8442

8443
    """
8444
    if dev.dev_type in constants.LDS_DRBD:
8445
      # we change the snode then (otherwise we use the one passed in)
8446
      if dev.logical_id[0] == instance.primary_node:
8447
        snode = dev.logical_id[1]
8448
      else:
8449
        snode = dev.logical_id[0]
8450

    
8451
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8452
                                              instance.name, dev)
8453
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8454

    
8455
    if dev.children:
8456
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8457
                      for child in dev.children]
8458
    else:
8459
      dev_children = []
8460

    
8461
    data = {
8462
      "iv_name": dev.iv_name,
8463
      "dev_type": dev.dev_type,
8464
      "logical_id": dev.logical_id,
8465
      "physical_id": dev.physical_id,
8466
      "pstatus": dev_pstatus,
8467
      "sstatus": dev_sstatus,
8468
      "children": dev_children,
8469
      "mode": dev.mode,
8470
      "size": dev.size,
8471
      }
8472

    
8473
    return data
8474

    
8475
  def Exec(self, feedback_fn):
8476
    """Gather and return data"""
8477
    result = {}
8478

    
8479
    cluster = self.cfg.GetClusterInfo()
8480

    
8481
    for instance in self.wanted_instances:
8482
      if not self.op.static:
8483
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8484
                                                  instance.name,
8485
                                                  instance.hypervisor)
8486
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8487
        remote_info = remote_info.payload
8488
        if remote_info and "state" in remote_info:
8489
          remote_state = "up"
8490
        else:
8491
          remote_state = "down"
8492
      else:
8493
        remote_state = None
8494
      if instance.admin_up:
8495
        config_state = "up"
8496
      else:
8497
        config_state = "down"
8498

    
8499
      disks = [self._ComputeDiskStatus(instance, None, device)
8500
               for device in instance.disks]
8501

    
8502
      idict = {
8503
        "name": instance.name,
8504
        "config_state": config_state,
8505
        "run_state": remote_state,
8506
        "pnode": instance.primary_node,
8507
        "snodes": instance.secondary_nodes,
8508
        "os": instance.os,
8509
        # this happens to be the same format used for hooks
8510
        "nics": _NICListToTuple(self, instance.nics),
8511
        "disk_template": instance.disk_template,
8512
        "disks": disks,
8513
        "hypervisor": instance.hypervisor,
8514
        "network_port": instance.network_port,
8515
        "hv_instance": instance.hvparams,
8516
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8517
        "be_instance": instance.beparams,
8518
        "be_actual": cluster.FillBE(instance),
8519
        "os_instance": instance.osparams,
8520
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8521
        "serial_no": instance.serial_no,
8522
        "mtime": instance.mtime,
8523
        "ctime": instance.ctime,
8524
        "uuid": instance.uuid,
8525
        }
8526

    
8527
      result[instance.name] = idict
8528

    
8529
    return result
8530

    
8531

    
8532
class LUSetInstanceParams(LogicalUnit):
8533
  """Modifies an instances's parameters.
8534

8535
  """
8536
  HPATH = "instance-modify"
8537
  HTYPE = constants.HTYPE_INSTANCE
8538
  _OP_PARAMS = [
8539
    _PInstanceName,
8540
    ("nics", _EmptyList, _TList),
8541
    ("disks", _EmptyList, _TList),
8542
    ("beparams", _EmptyDict, _TDict),
8543
    ("hvparams", _EmptyDict, _TDict),
8544
    ("disk_template", None, _TMaybeString),
8545
    ("remote_node", None, _TMaybeString),
8546
    ("os_name", None, _TMaybeString),
8547
    ("force_variant", False, _TBool),
8548
    ("osparams", None, _TOr(_TDict, _TNone)),
8549
    _PForce,
8550
    ]
8551
  REQ_BGL = False
8552

    
8553
  def CheckArguments(self):
8554
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8555
            self.op.hvparams or self.op.beparams or self.op.os_name):
8556
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8557

    
8558
    if self.op.hvparams:
8559
      _CheckGlobalHvParams(self.op.hvparams)
8560

    
8561
    # Disk validation
8562
    disk_addremove = 0
8563
    for disk_op, disk_dict in self.op.disks:
8564
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8565
      if disk_op == constants.DDM_REMOVE:
8566
        disk_addremove += 1
8567
        continue
8568
      elif disk_op == constants.DDM_ADD:
8569
        disk_addremove += 1
8570
      else:
8571
        if not isinstance(disk_op, int):
8572
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8573
        if not isinstance(disk_dict, dict):
8574
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8575
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8576

    
8577
      if disk_op == constants.DDM_ADD:
8578
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8579
        if mode not in constants.DISK_ACCESS_SET:
8580
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8581
                                     errors.ECODE_INVAL)
8582
        size = disk_dict.get('size', None)
8583
        if size is None:
8584
          raise errors.OpPrereqError("Required disk parameter size missing",
8585
                                     errors.ECODE_INVAL)
8586
        try:
8587
          size = int(size)
8588
        except (TypeError, ValueError), err:
8589
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8590
                                     str(err), errors.ECODE_INVAL)
8591
        disk_dict['size'] = size
8592
      else:
8593
        # modification of disk
8594
        if 'size' in disk_dict:
8595
          raise errors.OpPrereqError("Disk size change not possible, use"
8596
                                     " grow-disk", errors.ECODE_INVAL)
8597

    
8598
    if disk_addremove > 1:
8599
      raise errors.OpPrereqError("Only one disk add or remove operation"
8600
                                 " supported at a time", errors.ECODE_INVAL)
8601

    
8602
    if self.op.disks and self.op.disk_template is not None:
8603
      raise errors.OpPrereqError("Disk template conversion and other disk"
8604
                                 " changes not supported at the same time",
8605
                                 errors.ECODE_INVAL)
8606

    
8607
    if self.op.disk_template:
8608
      _CheckDiskTemplate(self.op.disk_template)
8609
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8610
          self.op.remote_node is None):
8611
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8612
                                   " one requires specifying a secondary node",
8613
                                   errors.ECODE_INVAL)
8614

    
8615
    # NIC validation
8616
    nic_addremove = 0
8617
    for nic_op, nic_dict in self.op.nics:
8618
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8619
      if nic_op == constants.DDM_REMOVE:
8620
        nic_addremove += 1
8621
        continue
8622
      elif nic_op == constants.DDM_ADD:
8623
        nic_addremove += 1
8624
      else:
8625
        if not isinstance(nic_op, int):
8626
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8627
        if not isinstance(nic_dict, dict):
8628
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8629
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8630

    
8631
      # nic_dict should be a dict
8632
      nic_ip = nic_dict.get('ip', None)
8633
      if nic_ip is not None:
8634
        if nic_ip.lower() == constants.VALUE_NONE:
8635
          nic_dict['ip'] = None
8636
        else:
8637
          if not netutils.IsValidIP4(nic_ip):
8638
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8639
                                       errors.ECODE_INVAL)
8640

    
8641
      nic_bridge = nic_dict.get('bridge', None)
8642
      nic_link = nic_dict.get('link', None)
8643
      if nic_bridge and nic_link:
8644
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8645
                                   " at the same time", errors.ECODE_INVAL)
8646
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8647
        nic_dict['bridge'] = None
8648
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8649
        nic_dict['link'] = None
8650

    
8651
      if nic_op == constants.DDM_ADD:
8652
        nic_mac = nic_dict.get('mac', None)
8653
        if nic_mac is None:
8654
          nic_dict['mac'] = constants.VALUE_AUTO
8655

    
8656
      if 'mac' in nic_dict:
8657
        nic_mac = nic_dict['mac']
8658
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8659
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8660

    
8661
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8662
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8663
                                     " modifying an existing nic",
8664
                                     errors.ECODE_INVAL)
8665

    
8666
    if nic_addremove > 1:
8667
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8668
                                 " supported at a time", errors.ECODE_INVAL)
8669

    
8670
  def ExpandNames(self):
8671
    self._ExpandAndLockInstance()
8672
    self.needed_locks[locking.LEVEL_NODE] = []
8673
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8674

    
8675
  def DeclareLocks(self, level):
8676
    if level == locking.LEVEL_NODE:
8677
      self._LockInstancesNodes()
8678
      if self.op.disk_template and self.op.remote_node:
8679
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8680
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8681

    
8682
  def BuildHooksEnv(self):
8683
    """Build hooks env.
8684

8685
    This runs on the master, primary and secondaries.
8686

8687
    """
8688
    args = dict()
8689
    if constants.BE_MEMORY in self.be_new:
8690
      args['memory'] = self.be_new[constants.BE_MEMORY]
8691
    if constants.BE_VCPUS in self.be_new:
8692
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8693
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8694
    # information at all.
8695
    if self.op.nics:
8696
      args['nics'] = []
8697
      nic_override = dict(self.op.nics)
8698
      for idx, nic in enumerate(self.instance.nics):
8699
        if idx in nic_override:
8700
          this_nic_override = nic_override[idx]
8701
        else:
8702
          this_nic_override = {}
8703
        if 'ip' in this_nic_override:
8704
          ip = this_nic_override['ip']
8705
        else:
8706
          ip = nic.ip
8707
        if 'mac' in this_nic_override:
8708
          mac = this_nic_override['mac']
8709
        else:
8710
          mac = nic.mac
8711
        if idx in self.nic_pnew:
8712
          nicparams = self.nic_pnew[idx]
8713
        else:
8714
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8715
        mode = nicparams[constants.NIC_MODE]
8716
        link = nicparams[constants.NIC_LINK]
8717
        args['nics'].append((ip, mac, mode, link))
8718
      if constants.DDM_ADD in nic_override:
8719
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8720
        mac = nic_override[constants.DDM_ADD]['mac']
8721
        nicparams = self.nic_pnew[constants.DDM_ADD]
8722
        mode = nicparams[constants.NIC_MODE]
8723
        link = nicparams[constants.NIC_LINK]
8724
        args['nics'].append((ip, mac, mode, link))
8725
      elif constants.DDM_REMOVE in nic_override:
8726
        del args['nics'][-1]
8727

    
8728
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8729
    if self.op.disk_template:
8730
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8731
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8732
    return env, nl, nl
8733

    
8734
  def CheckPrereq(self):
8735
    """Check prerequisites.
8736

8737
    This only checks the instance list against the existing names.
8738

8739
    """
8740
    # checking the new params on the primary/secondary nodes
8741

    
8742
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8743
    cluster = self.cluster = self.cfg.GetClusterInfo()
8744
    assert self.instance is not None, \
8745
      "Cannot retrieve locked instance %s" % self.op.instance_name
8746
    pnode = instance.primary_node
8747
    nodelist = list(instance.all_nodes)
8748

    
8749
    # OS change
8750
    if self.op.os_name and not self.op.force:
8751
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8752
                      self.op.force_variant)
8753
      instance_os = self.op.os_name
8754
    else:
8755
      instance_os = instance.os
8756

    
8757
    if self.op.disk_template:
8758
      if instance.disk_template == self.op.disk_template:
8759
        raise errors.OpPrereqError("Instance already has disk template %s" %
8760
                                   instance.disk_template, errors.ECODE_INVAL)
8761

    
8762
      if (instance.disk_template,
8763
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8764
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8765
                                   " %s to %s" % (instance.disk_template,
8766
                                                  self.op.disk_template),
8767
                                   errors.ECODE_INVAL)
8768
      _CheckInstanceDown(self, instance, "cannot change disk template")
8769
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8770
        _CheckNodeOnline(self, self.op.remote_node)
8771
        _CheckNodeNotDrained(self, self.op.remote_node)
8772
        disks = [{"size": d.size} for d in instance.disks]
8773
        required = _ComputeDiskSize(self.op.disk_template, disks)
8774
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8775

    
8776
    # hvparams processing
8777
    if self.op.hvparams:
8778
      hv_type = instance.hypervisor
8779
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8780
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8781
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8782

    
8783
      # local check
8784
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8785
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8786
      self.hv_new = hv_new # the new actual values
8787
      self.hv_inst = i_hvdict # the new dict (without defaults)
8788
    else:
8789
      self.hv_new = self.hv_inst = {}
8790

    
8791
    # beparams processing
8792
    if self.op.beparams:
8793
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8794
                                   use_none=True)
8795
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8796
      be_new = cluster.SimpleFillBE(i_bedict)
8797
      self.be_new = be_new # the new actual values
8798
      self.be_inst = i_bedict # the new dict (without defaults)
8799
    else:
8800
      self.be_new = self.be_inst = {}
8801

    
8802
    # osparams processing
8803
    if self.op.osparams:
8804
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8805
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8806
      self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8807
      self.os_inst = i_osdict # the new dict (without defaults)
8808
    else:
8809
      self.os_new = self.os_inst = {}
8810

    
8811
    self.warn = []
8812

    
8813
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8814
      mem_check_list = [pnode]
8815
      if be_new[constants.BE_AUTO_BALANCE]:
8816
        # either we changed auto_balance to yes or it was from before
8817
        mem_check_list.extend(instance.secondary_nodes)
8818
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8819
                                                  instance.hypervisor)
8820
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8821
                                         instance.hypervisor)
8822
      pninfo = nodeinfo[pnode]
8823
      msg = pninfo.fail_msg
8824
      if msg:
8825
        # Assume the primary node is unreachable and go ahead
8826
        self.warn.append("Can't get info from primary node %s: %s" %
8827
                         (pnode,  msg))
8828
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8829
        self.warn.append("Node data from primary node %s doesn't contain"
8830
                         " free memory information" % pnode)
8831
      elif instance_info.fail_msg:
8832
        self.warn.append("Can't get instance runtime information: %s" %
8833
                        instance_info.fail_msg)
8834
      else:
8835
        if instance_info.payload:
8836
          current_mem = int(instance_info.payload['memory'])
8837
        else:
8838
          # Assume instance not running
8839
          # (there is a slight race condition here, but it's not very probable,
8840
          # and we have no other way to check)
8841
          current_mem = 0
8842
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8843
                    pninfo.payload['memory_free'])
8844
        if miss_mem > 0:
8845
          raise errors.OpPrereqError("This change will prevent the instance"
8846
                                     " from starting, due to %d MB of memory"
8847
                                     " missing on its primary node" % miss_mem,
8848
                                     errors.ECODE_NORES)
8849

    
8850
      if be_new[constants.BE_AUTO_BALANCE]:
8851
        for node, nres in nodeinfo.items():
8852
          if node not in instance.secondary_nodes:
8853
            continue
8854
          msg = nres.fail_msg
8855
          if msg:
8856
            self.warn.append("Can't get info from secondary node %s: %s" %
8857
                             (node, msg))
8858
          elif not isinstance(nres.payload.get('memory_free', None), int):
8859
            self.warn.append("Secondary node %s didn't return free"
8860
                             " memory information" % node)
8861
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8862
            self.warn.append("Not enough memory to failover instance to"
8863
                             " secondary node %s" % node)
8864

    
8865
    # NIC processing
8866
    self.nic_pnew = {}
8867
    self.nic_pinst = {}
8868
    for nic_op, nic_dict in self.op.nics:
8869
      if nic_op == constants.DDM_REMOVE:
8870
        if not instance.nics:
8871
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8872
                                     errors.ECODE_INVAL)
8873
        continue
8874
      if nic_op != constants.DDM_ADD:
8875
        # an existing nic
8876
        if not instance.nics:
8877
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8878
                                     " no NICs" % nic_op,
8879
                                     errors.ECODE_INVAL)
8880
        if nic_op < 0 or nic_op >= len(instance.nics):
8881
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8882
                                     " are 0 to %d" %
8883
                                     (nic_op, len(instance.nics) - 1),
8884
                                     errors.ECODE_INVAL)
8885
        old_nic_params = instance.nics[nic_op].nicparams
8886
        old_nic_ip = instance.nics[nic_op].ip
8887
      else:
8888
        old_nic_params = {}
8889
        old_nic_ip = None
8890

    
8891
      update_params_dict = dict([(key, nic_dict[key])
8892
                                 for key in constants.NICS_PARAMETERS
8893
                                 if key in nic_dict])
8894

    
8895
      if 'bridge' in nic_dict:
8896
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8897

    
8898
      new_nic_params = _GetUpdatedParams(old_nic_params,
8899
                                         update_params_dict)
8900
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8901
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8902
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8903
      self.nic_pinst[nic_op] = new_nic_params
8904
      self.nic_pnew[nic_op] = new_filled_nic_params
8905
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8906

    
8907
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8908
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8909
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8910
        if msg:
8911
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8912
          if self.op.force:
8913
            self.warn.append(msg)
8914
          else:
8915
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8916
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8917
        if 'ip' in nic_dict:
8918
          nic_ip = nic_dict['ip']
8919
        else:
8920
          nic_ip = old_nic_ip
8921
        if nic_ip is None:
8922
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8923
                                     ' on a routed nic', errors.ECODE_INVAL)
8924
      if 'mac' in nic_dict:
8925
        nic_mac = nic_dict['mac']
8926
        if nic_mac is None:
8927
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8928
                                     errors.ECODE_INVAL)
8929
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8930
          # otherwise generate the mac
8931
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8932
        else:
8933
          # or validate/reserve the current one
8934
          try:
8935
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8936
          except errors.ReservationError:
8937
            raise errors.OpPrereqError("MAC address %s already in use"
8938
                                       " in cluster" % nic_mac,
8939
                                       errors.ECODE_NOTUNIQUE)
8940

    
8941
    # DISK processing
8942
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8943
      raise errors.OpPrereqError("Disk operations not supported for"
8944
                                 " diskless instances",
8945
                                 errors.ECODE_INVAL)
8946
    for disk_op, _ in self.op.disks:
8947
      if disk_op == constants.DDM_REMOVE:
8948
        if len(instance.disks) == 1:
8949
          raise errors.OpPrereqError("Cannot remove the last disk of"
8950
                                     " an instance", errors.ECODE_INVAL)
8951
        _CheckInstanceDown(self, instance, "cannot remove disks")
8952

    
8953
      if (disk_op == constants.DDM_ADD and
8954
          len(instance.nics) >= constants.MAX_DISKS):
8955
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8956
                                   " add more" % constants.MAX_DISKS,
8957
                                   errors.ECODE_STATE)
8958
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8959
        # an existing disk
8960
        if disk_op < 0 or disk_op >= len(instance.disks):
8961
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8962
                                     " are 0 to %d" %
8963
                                     (disk_op, len(instance.disks)),
8964
                                     errors.ECODE_INVAL)
8965

    
8966
    return
8967

    
8968
  def _ConvertPlainToDrbd(self, feedback_fn):
8969
    """Converts an instance from plain to drbd.
8970

8971
    """
8972
    feedback_fn("Converting template to drbd")
8973
    instance = self.instance
8974
    pnode = instance.primary_node
8975
    snode = self.op.remote_node
8976

    
8977
    # create a fake disk info for _GenerateDiskTemplate
8978
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8979
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8980
                                      instance.name, pnode, [snode],
8981
                                      disk_info, None, None, 0)
8982
    info = _GetInstanceInfoText(instance)
8983
    feedback_fn("Creating aditional volumes...")
8984
    # first, create the missing data and meta devices
8985
    for disk in new_disks:
8986
      # unfortunately this is... not too nice
8987
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8988
                            info, True)
8989
      for child in disk.children:
8990
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8991
    # at this stage, all new LVs have been created, we can rename the
8992
    # old ones
8993
    feedback_fn("Renaming original volumes...")
8994
    rename_list = [(o, n.children[0].logical_id)
8995
                   for (o, n) in zip(instance.disks, new_disks)]
8996
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8997
    result.Raise("Failed to rename original LVs")
8998

    
8999
    feedback_fn("Initializing DRBD devices...")
9000
    # all child devices are in place, we can now create the DRBD devices
9001
    for disk in new_disks:
9002
      for node in [pnode, snode]:
9003
        f_create = node == pnode
9004
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9005

    
9006
    # at this point, the instance has been modified
9007
    instance.disk_template = constants.DT_DRBD8
9008
    instance.disks = new_disks
9009
    self.cfg.Update(instance, feedback_fn)
9010

    
9011
    # disks are created, waiting for sync
9012
    disk_abort = not _WaitForSync(self, instance)
9013
    if disk_abort:
9014
      raise errors.OpExecError("There are some degraded disks for"
9015
                               " this instance, please cleanup manually")
9016

    
9017
  def _ConvertDrbdToPlain(self, feedback_fn):
9018
    """Converts an instance from drbd to plain.
9019

9020
    """
9021
    instance = self.instance
9022
    assert len(instance.secondary_nodes) == 1
9023
    pnode = instance.primary_node
9024
    snode = instance.secondary_nodes[0]
9025
    feedback_fn("Converting template to plain")
9026

    
9027
    old_disks = instance.disks
9028
    new_disks = [d.children[0] for d in old_disks]
9029

    
9030
    # copy over size and mode
9031
    for parent, child in zip(old_disks, new_disks):
9032
      child.size = parent.size
9033
      child.mode = parent.mode
9034

    
9035
    # update instance structure
9036
    instance.disks = new_disks
9037
    instance.disk_template = constants.DT_PLAIN
9038
    self.cfg.Update(instance, feedback_fn)
9039

    
9040
    feedback_fn("Removing volumes on the secondary node...")
9041
    for disk in old_disks:
9042
      self.cfg.SetDiskID(disk, snode)
9043
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9044
      if msg:
9045
        self.LogWarning("Could not remove block device %s on node %s,"
9046
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9047

    
9048
    feedback_fn("Removing unneeded volumes on the primary node...")
9049
    for idx, disk in enumerate(old_disks):
9050
      meta = disk.children[1]
9051
      self.cfg.SetDiskID(meta, pnode)
9052
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9053
      if msg:
9054
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9055
                        " continuing anyway: %s", idx, pnode, msg)
9056

    
9057

    
9058
  def Exec(self, feedback_fn):
9059
    """Modifies an instance.
9060

9061
    All parameters take effect only at the next restart of the instance.
9062

9063
    """
9064
    # Process here the warnings from CheckPrereq, as we don't have a
9065
    # feedback_fn there.
9066
    for warn in self.warn:
9067
      feedback_fn("WARNING: %s" % warn)
9068

    
9069
    result = []
9070
    instance = self.instance
9071
    # disk changes
9072
    for disk_op, disk_dict in self.op.disks:
9073
      if disk_op == constants.DDM_REMOVE:
9074
        # remove the last disk
9075
        device = instance.disks.pop()
9076
        device_idx = len(instance.disks)
9077
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9078
          self.cfg.SetDiskID(disk, node)
9079
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9080
          if msg:
9081
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9082
                            " continuing anyway", device_idx, node, msg)
9083
        result.append(("disk/%d" % device_idx, "remove"))
9084
      elif disk_op == constants.DDM_ADD:
9085
        # add a new disk
9086
        if instance.disk_template == constants.DT_FILE:
9087
          file_driver, file_path = instance.disks[0].logical_id
9088
          file_path = os.path.dirname(file_path)
9089
        else:
9090
          file_driver = file_path = None
9091
        disk_idx_base = len(instance.disks)
9092
        new_disk = _GenerateDiskTemplate(self,
9093
                                         instance.disk_template,
9094
                                         instance.name, instance.primary_node,
9095
                                         instance.secondary_nodes,
9096
                                         [disk_dict],
9097
                                         file_path,
9098
                                         file_driver,
9099
                                         disk_idx_base)[0]
9100
        instance.disks.append(new_disk)
9101
        info = _GetInstanceInfoText(instance)
9102

    
9103
        logging.info("Creating volume %s for instance %s",
9104
                     new_disk.iv_name, instance.name)
9105
        # Note: this needs to be kept in sync with _CreateDisks
9106
        #HARDCODE
9107
        for node in instance.all_nodes:
9108
          f_create = node == instance.primary_node
9109
          try:
9110
            _CreateBlockDev(self, node, instance, new_disk,
9111
                            f_create, info, f_create)
9112
          except errors.OpExecError, err:
9113
            self.LogWarning("Failed to create volume %s (%s) on"
9114
                            " node %s: %s",
9115
                            new_disk.iv_name, new_disk, node, err)
9116
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9117
                       (new_disk.size, new_disk.mode)))
9118
      else:
9119
        # change a given disk
9120
        instance.disks[disk_op].mode = disk_dict['mode']
9121
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9122

    
9123
    if self.op.disk_template:
9124
      r_shut = _ShutdownInstanceDisks(self, instance)
9125
      if not r_shut:
9126
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9127
                                 " proceed with disk template conversion")
9128
      mode = (instance.disk_template, self.op.disk_template)
9129
      try:
9130
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9131
      except:
9132
        self.cfg.ReleaseDRBDMinors(instance.name)
9133
        raise
9134
      result.append(("disk_template", self.op.disk_template))
9135

    
9136
    # NIC changes
9137
    for nic_op, nic_dict in self.op.nics:
9138
      if nic_op == constants.DDM_REMOVE:
9139
        # remove the last nic
9140
        del instance.nics[-1]
9141
        result.append(("nic.%d" % len(instance.nics), "remove"))
9142
      elif nic_op == constants.DDM_ADD:
9143
        # mac and bridge should be set, by now
9144
        mac = nic_dict['mac']
9145
        ip = nic_dict.get('ip', None)
9146
        nicparams = self.nic_pinst[constants.DDM_ADD]
9147
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9148
        instance.nics.append(new_nic)
9149
        result.append(("nic.%d" % (len(instance.nics) - 1),
9150
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9151
                       (new_nic.mac, new_nic.ip,
9152
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9153
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9154
                       )))
9155
      else:
9156
        for key in 'mac', 'ip':
9157
          if key in nic_dict:
9158
            setattr(instance.nics[nic_op], key, nic_dict[key])
9159
        if nic_op in self.nic_pinst:
9160
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9161
        for key, val in nic_dict.iteritems():
9162
          result.append(("nic.%s/%d" % (key, nic_op), val))
9163

    
9164
    # hvparams changes
9165
    if self.op.hvparams:
9166
      instance.hvparams = self.hv_inst
9167
      for key, val in self.op.hvparams.iteritems():
9168
        result.append(("hv/%s" % key, val))
9169

    
9170
    # beparams changes
9171
    if self.op.beparams:
9172
      instance.beparams = self.be_inst
9173
      for key, val in self.op.beparams.iteritems():
9174
        result.append(("be/%s" % key, val))
9175

    
9176
    # OS change
9177
    if self.op.os_name:
9178
      instance.os = self.op.os_name
9179

    
9180
    # osparams changes
9181
    if self.op.osparams:
9182
      instance.osparams = self.os_inst
9183
      for key, val in self.op.osparams.iteritems():
9184
        result.append(("os/%s" % key, val))
9185

    
9186
    self.cfg.Update(instance, feedback_fn)
9187

    
9188
    return result
9189

    
9190
  _DISK_CONVERSIONS = {
9191
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9192
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9193
    }
9194

    
9195

    
9196
class LUQueryExports(NoHooksLU):
9197
  """Query the exports list
9198

9199
  """
9200
  _OP_PARAMS = [
9201
    ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9202
    ("use_locking", False, _TBool),
9203
    ]
9204
  REQ_BGL = False
9205

    
9206
  def ExpandNames(self):
9207
    self.needed_locks = {}
9208
    self.share_locks[locking.LEVEL_NODE] = 1
9209
    if not self.op.nodes:
9210
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9211
    else:
9212
      self.needed_locks[locking.LEVEL_NODE] = \
9213
        _GetWantedNodes(self, self.op.nodes)
9214

    
9215
  def Exec(self, feedback_fn):
9216
    """Compute the list of all the exported system images.
9217

9218
    @rtype: dict
9219
    @return: a dictionary with the structure node->(export-list)
9220
        where export-list is a list of the instances exported on
9221
        that node.
9222

9223
    """
9224
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9225
    rpcresult = self.rpc.call_export_list(self.nodes)
9226
    result = {}
9227
    for node in rpcresult:
9228
      if rpcresult[node].fail_msg:
9229
        result[node] = False
9230
      else:
9231
        result[node] = rpcresult[node].payload
9232

    
9233
    return result
9234

    
9235

    
9236
class LUPrepareExport(NoHooksLU):
9237
  """Prepares an instance for an export and returns useful information.
9238

9239
  """
9240
  _OP_PARAMS = [
9241
    _PInstanceName,
9242
    ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9243
    ]
9244
  REQ_BGL = False
9245

    
9246
  def ExpandNames(self):
9247
    self._ExpandAndLockInstance()
9248

    
9249
  def CheckPrereq(self):
9250
    """Check prerequisites.
9251

9252
    """
9253
    instance_name = self.op.instance_name
9254

    
9255
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9256
    assert self.instance is not None, \
9257
          "Cannot retrieve locked instance %s" % self.op.instance_name
9258
    _CheckNodeOnline(self, self.instance.primary_node)
9259

    
9260
    self._cds = _GetClusterDomainSecret()
9261

    
9262
  def Exec(self, feedback_fn):
9263
    """Prepares an instance for an export.
9264

9265
    """
9266
    instance = self.instance
9267

    
9268
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9269
      salt = utils.GenerateSecret(8)
9270

    
9271
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9272
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9273
                                              constants.RIE_CERT_VALIDITY)
9274
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9275

    
9276
      (name, cert_pem) = result.payload
9277

    
9278
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9279
                                             cert_pem)
9280

    
9281
      return {
9282
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9283
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9284
                          salt),
9285
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9286
        }
9287

    
9288
    return None
9289

    
9290

    
9291
class LUExportInstance(LogicalUnit):
9292
  """Export an instance to an image in the cluster.
9293

9294
  """
9295
  HPATH = "instance-export"
9296
  HTYPE = constants.HTYPE_INSTANCE
9297
  _OP_PARAMS = [
9298
    _PInstanceName,
9299
    ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9300
    ("shutdown", True, _TBool),
9301
    _PShutdownTimeout,
9302
    ("remove_instance", False, _TBool),
9303
    ("ignore_remove_failures", False, _TBool),
9304
    ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9305
    ("x509_key_name", None, _TOr(_TList, _TNone)),
9306
    ("destination_x509_ca", None, _TMaybeString),
9307
    ]
9308
  REQ_BGL = False
9309

    
9310
  def CheckArguments(self):
9311
    """Check the arguments.
9312

9313
    """
9314
    self.x509_key_name = self.op.x509_key_name
9315
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9316

    
9317
    if self.op.remove_instance and not self.op.shutdown:
9318
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9319
                                 " down before")
9320

    
9321
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9322
      if not self.x509_key_name:
9323
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9324
                                   errors.ECODE_INVAL)
9325

    
9326
      if not self.dest_x509_ca_pem:
9327
        raise errors.OpPrereqError("Missing destination X509 CA",
9328
                                   errors.ECODE_INVAL)
9329

    
9330
  def ExpandNames(self):
9331
    self._ExpandAndLockInstance()
9332

    
9333
    # Lock all nodes for local exports
9334
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9335
      # FIXME: lock only instance primary and destination node
9336
      #
9337
      # Sad but true, for now we have do lock all nodes, as we don't know where
9338
      # the previous export might be, and in this LU we search for it and
9339
      # remove it from its current node. In the future we could fix this by:
9340
      #  - making a tasklet to search (share-lock all), then create the
9341
      #    new one, then one to remove, after
9342
      #  - removing the removal operation altogether
9343
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9344

    
9345
  def DeclareLocks(self, level):
9346
    """Last minute lock declaration."""
9347
    # All nodes are locked anyway, so nothing to do here.
9348

    
9349
  def BuildHooksEnv(self):
9350
    """Build hooks env.
9351

9352
    This will run on the master, primary node and target node.
9353

9354
    """
9355
    env = {
9356
      "EXPORT_MODE": self.op.mode,
9357
      "EXPORT_NODE": self.op.target_node,
9358
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9359
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9360
      # TODO: Generic function for boolean env variables
9361
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9362
      }
9363

    
9364
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9365

    
9366
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9367

    
9368
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9369
      nl.append(self.op.target_node)
9370

    
9371
    return env, nl, nl
9372

    
9373
  def CheckPrereq(self):
9374
    """Check prerequisites.
9375

9376
    This checks that the instance and node names are valid.
9377

9378
    """
9379
    instance_name = self.op.instance_name
9380

    
9381
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9382
    assert self.instance is not None, \
9383
          "Cannot retrieve locked instance %s" % self.op.instance_name
9384
    _CheckNodeOnline(self, self.instance.primary_node)
9385

    
9386
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9387
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9388
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9389
      assert self.dst_node is not None
9390

    
9391
      _CheckNodeOnline(self, self.dst_node.name)
9392
      _CheckNodeNotDrained(self, self.dst_node.name)
9393

    
9394
      self._cds = None
9395
      self.dest_disk_info = None
9396
      self.dest_x509_ca = None
9397

    
9398
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9399
      self.dst_node = None
9400

    
9401
      if len(self.op.target_node) != len(self.instance.disks):
9402
        raise errors.OpPrereqError(("Received destination information for %s"
9403
                                    " disks, but instance %s has %s disks") %
9404
                                   (len(self.op.target_node), instance_name,
9405
                                    len(self.instance.disks)),
9406
                                   errors.ECODE_INVAL)
9407

    
9408
      cds = _GetClusterDomainSecret()
9409

    
9410
      # Check X509 key name
9411
      try:
9412
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9413
      except (TypeError, ValueError), err:
9414
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9415

    
9416
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9417
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9418
                                   errors.ECODE_INVAL)
9419

    
9420
      # Load and verify CA
9421
      try:
9422
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9423
      except OpenSSL.crypto.Error, err:
9424
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9425
                                   (err, ), errors.ECODE_INVAL)
9426

    
9427
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9428
      if errcode is not None:
9429
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9430
                                   (msg, ), errors.ECODE_INVAL)
9431

    
9432
      self.dest_x509_ca = cert
9433

    
9434
      # Verify target information
9435
      disk_info = []
9436
      for idx, disk_data in enumerate(self.op.target_node):
9437
        try:
9438
          (host, port, magic) = \
9439
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9440
        except errors.GenericError, err:
9441
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9442
                                     (idx, err), errors.ECODE_INVAL)
9443

    
9444
        disk_info.append((host, port, magic))
9445

    
9446
      assert len(disk_info) == len(self.op.target_node)
9447
      self.dest_disk_info = disk_info
9448

    
9449
    else:
9450
      raise errors.ProgrammerError("Unhandled export mode %r" %
9451
                                   self.op.mode)
9452

    
9453
    # instance disk type verification
9454
    # TODO: Implement export support for file-based disks
9455
    for disk in self.instance.disks:
9456
      if disk.dev_type == constants.LD_FILE:
9457
        raise errors.OpPrereqError("Export not supported for instances with"
9458
                                   " file-based disks", errors.ECODE_INVAL)
9459

    
9460
  def _CleanupExports(self, feedback_fn):
9461
    """Removes exports of current instance from all other nodes.
9462

9463
    If an instance in a cluster with nodes A..D was exported to node C, its
9464
    exports will be removed from the nodes A, B and D.
9465

9466
    """
9467
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9468

    
9469
    nodelist = self.cfg.GetNodeList()
9470
    nodelist.remove(self.dst_node.name)
9471

    
9472
    # on one-node clusters nodelist will be empty after the removal
9473
    # if we proceed the backup would be removed because OpQueryExports
9474
    # substitutes an empty list with the full cluster node list.
9475
    iname = self.instance.name
9476
    if nodelist:
9477
      feedback_fn("Removing old exports for instance %s" % iname)
9478
      exportlist = self.rpc.call_export_list(nodelist)
9479
      for node in exportlist:
9480
        if exportlist[node].fail_msg:
9481
          continue
9482
        if iname in exportlist[node].payload:
9483
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9484
          if msg:
9485
            self.LogWarning("Could not remove older export for instance %s"
9486
                            " on node %s: %s", iname, node, msg)
9487

    
9488
  def Exec(self, feedback_fn):
9489
    """Export an instance to an image in the cluster.
9490

9491
    """
9492
    assert self.op.mode in constants.EXPORT_MODES
9493

    
9494
    instance = self.instance
9495
    src_node = instance.primary_node
9496

    
9497
    if self.op.shutdown:
9498
      # shutdown the instance, but not the disks
9499
      feedback_fn("Shutting down instance %s" % instance.name)
9500
      result = self.rpc.call_instance_shutdown(src_node, instance,
9501
                                               self.op.shutdown_timeout)
9502
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9503
      result.Raise("Could not shutdown instance %s on"
9504
                   " node %s" % (instance.name, src_node))
9505

    
9506
    # set the disks ID correctly since call_instance_start needs the
9507
    # correct drbd minor to create the symlinks
9508
    for disk in instance.disks:
9509
      self.cfg.SetDiskID(disk, src_node)
9510

    
9511
    activate_disks = (not instance.admin_up)
9512

    
9513
    if activate_disks:
9514
      # Activate the instance disks if we'exporting a stopped instance
9515
      feedback_fn("Activating disks for %s" % instance.name)
9516
      _StartInstanceDisks(self, instance, None)
9517

    
9518
    try:
9519
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9520
                                                     instance)
9521

    
9522
      helper.CreateSnapshots()
9523
      try:
9524
        if (self.op.shutdown and instance.admin_up and
9525
            not self.op.remove_instance):
9526
          assert not activate_disks
9527
          feedback_fn("Starting instance %s" % instance.name)
9528
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9529
          msg = result.fail_msg
9530
          if msg:
9531
            feedback_fn("Failed to start instance: %s" % msg)
9532
            _ShutdownInstanceDisks(self, instance)
9533
            raise errors.OpExecError("Could not start instance: %s" % msg)
9534

    
9535
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9536
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9537
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9538
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9539
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9540

    
9541
          (key_name, _, _) = self.x509_key_name
9542

    
9543
          dest_ca_pem = \
9544
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9545
                                            self.dest_x509_ca)
9546

    
9547
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9548
                                                     key_name, dest_ca_pem,
9549
                                                     timeouts)
9550
      finally:
9551
        helper.Cleanup()
9552

    
9553
      # Check for backwards compatibility
9554
      assert len(dresults) == len(instance.disks)
9555
      assert compat.all(isinstance(i, bool) for i in dresults), \
9556
             "Not all results are boolean: %r" % dresults
9557

    
9558
    finally:
9559
      if activate_disks:
9560
        feedback_fn("Deactivating disks for %s" % instance.name)
9561
        _ShutdownInstanceDisks(self, instance)
9562

    
9563
    if not (compat.all(dresults) and fin_resu):
9564
      failures = []
9565
      if not fin_resu:
9566
        failures.append("export finalization")
9567
      if not compat.all(dresults):
9568
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9569
                               if not dsk)
9570
        failures.append("disk export: disk(s) %s" % fdsk)
9571

    
9572
      raise errors.OpExecError("Export failed, errors in %s" %
9573
                               utils.CommaJoin(failures))
9574

    
9575
    # At this point, the export was successful, we can cleanup/finish
9576

    
9577
    # Remove instance if requested
9578
    if self.op.remove_instance:
9579
      feedback_fn("Removing instance %s" % instance.name)
9580
      _RemoveInstance(self, feedback_fn, instance,
9581
                      self.op.ignore_remove_failures)
9582

    
9583
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9584
      self._CleanupExports(feedback_fn)
9585

    
9586
    return fin_resu, dresults
9587

    
9588

    
9589
class LURemoveExport(NoHooksLU):
9590
  """Remove exports related to the named instance.
9591

9592
  """
9593
  _OP_PARAMS = [
9594
    _PInstanceName,
9595
    ]
9596
  REQ_BGL = False
9597

    
9598
  def ExpandNames(self):
9599
    self.needed_locks = {}
9600
    # We need all nodes to be locked in order for RemoveExport to work, but we
9601
    # don't need to lock the instance itself, as nothing will happen to it (and
9602
    # we can remove exports also for a removed instance)
9603
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9604

    
9605
  def Exec(self, feedback_fn):
9606
    """Remove any export.
9607

9608
    """
9609
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9610
    # If the instance was not found we'll try with the name that was passed in.
9611
    # This will only work if it was an FQDN, though.
9612
    fqdn_warn = False
9613
    if not instance_name:
9614
      fqdn_warn = True
9615
      instance_name = self.op.instance_name
9616

    
9617
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9618
    exportlist = self.rpc.call_export_list(locked_nodes)
9619
    found = False
9620
    for node in exportlist:
9621
      msg = exportlist[node].fail_msg
9622
      if msg:
9623
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9624
        continue
9625
      if instance_name in exportlist[node].payload:
9626
        found = True
9627
        result = self.rpc.call_export_remove(node, instance_name)
9628
        msg = result.fail_msg
9629
        if msg:
9630
          logging.error("Could not remove export for instance %s"
9631
                        " on node %s: %s", instance_name, node, msg)
9632

    
9633
    if fqdn_warn and not found:
9634
      feedback_fn("Export not found. If trying to remove an export belonging"
9635
                  " to a deleted instance please use its Fully Qualified"
9636
                  " Domain Name.")
9637

    
9638

    
9639
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9640
  """Generic tags LU.
9641

9642
  This is an abstract class which is the parent of all the other tags LUs.
9643

9644
  """
9645

    
9646
  def ExpandNames(self):
9647
    self.needed_locks = {}
9648
    if self.op.kind == constants.TAG_NODE:
9649
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9650
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9651
    elif self.op.kind == constants.TAG_INSTANCE:
9652
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9653
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9654

    
9655
  def CheckPrereq(self):
9656
    """Check prerequisites.
9657

9658
    """
9659
    if self.op.kind == constants.TAG_CLUSTER:
9660
      self.target = self.cfg.GetClusterInfo()
9661
    elif self.op.kind == constants.TAG_NODE:
9662
      self.target = self.cfg.GetNodeInfo(self.op.name)
9663
    elif self.op.kind == constants.TAG_INSTANCE:
9664
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9665
    else:
9666
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9667
                                 str(self.op.kind), errors.ECODE_INVAL)
9668

    
9669

    
9670
class LUGetTags(TagsLU):
9671
  """Returns the tags of a given object.
9672

9673
  """
9674
  _OP_PARAMS = [
9675
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9676
    ("name", _NoDefault, _TNonEmptyString),
9677
    ]
9678
  REQ_BGL = False
9679

    
9680
  def Exec(self, feedback_fn):
9681
    """Returns the tag list.
9682

9683
    """
9684
    return list(self.target.GetTags())
9685

    
9686

    
9687
class LUSearchTags(NoHooksLU):
9688
  """Searches the tags for a given pattern.
9689

9690
  """
9691
  _OP_PARAMS = [
9692
    ("pattern", _NoDefault, _TNonEmptyString),
9693
    ]
9694
  REQ_BGL = False
9695

    
9696
  def ExpandNames(self):
9697
    self.needed_locks = {}
9698

    
9699
  def CheckPrereq(self):
9700
    """Check prerequisites.
9701

9702
    This checks the pattern passed for validity by compiling it.
9703

9704
    """
9705
    try:
9706
      self.re = re.compile(self.op.pattern)
9707
    except re.error, err:
9708
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9709
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9710

    
9711
  def Exec(self, feedback_fn):
9712
    """Returns the tag list.
9713

9714
    """
9715
    cfg = self.cfg
9716
    tgts = [("/cluster", cfg.GetClusterInfo())]
9717
    ilist = cfg.GetAllInstancesInfo().values()
9718
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9719
    nlist = cfg.GetAllNodesInfo().values()
9720
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9721
    results = []
9722
    for path, target in tgts:
9723
      for tag in target.GetTags():
9724
        if self.re.search(tag):
9725
          results.append((path, tag))
9726
    return results
9727

    
9728

    
9729
class LUAddTags(TagsLU):
9730
  """Sets a tag on a given object.
9731

9732
  """
9733
  _OP_PARAMS = [
9734
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9735
    ("name", _NoDefault, _TNonEmptyString),
9736
    ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9737
    ]
9738
  REQ_BGL = False
9739

    
9740
  def CheckPrereq(self):
9741
    """Check prerequisites.
9742

9743
    This checks the type and length of the tag name and value.
9744

9745
    """
9746
    TagsLU.CheckPrereq(self)
9747
    for tag in self.op.tags:
9748
      objects.TaggableObject.ValidateTag(tag)
9749

    
9750
  def Exec(self, feedback_fn):
9751
    """Sets the tag.
9752

9753
    """
9754
    try:
9755
      for tag in self.op.tags:
9756
        self.target.AddTag(tag)
9757
    except errors.TagError, err:
9758
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9759
    self.cfg.Update(self.target, feedback_fn)
9760

    
9761

    
9762
class LUDelTags(TagsLU):
9763
  """Delete a list of tags from a given object.
9764

9765
  """
9766
  _OP_PARAMS = [
9767
    ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9768
    ("name", _NoDefault, _TNonEmptyString),
9769
    ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9770
    ]
9771
  REQ_BGL = False
9772

    
9773
  def CheckPrereq(self):
9774
    """Check prerequisites.
9775

9776
    This checks that we have the given tag.
9777

9778
    """
9779
    TagsLU.CheckPrereq(self)
9780
    for tag in self.op.tags:
9781
      objects.TaggableObject.ValidateTag(tag)
9782
    del_tags = frozenset(self.op.tags)
9783
    cur_tags = self.target.GetTags()
9784
    if not del_tags <= cur_tags:
9785
      diff_tags = del_tags - cur_tags
9786
      diff_names = ["'%s'" % tag for tag in diff_tags]
9787
      diff_names.sort()
9788
      raise errors.OpPrereqError("Tag(s) %s not found" %
9789
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9790

    
9791
  def Exec(self, feedback_fn):
9792
    """Remove the tag from the object.
9793

9794
    """
9795
    for tag in self.op.tags:
9796
      self.target.RemoveTag(tag)
9797
    self.cfg.Update(self.target, feedback_fn)
9798

    
9799

    
9800
class LUTestDelay(NoHooksLU):
9801
  """Sleep for a specified amount of time.
9802

9803
  This LU sleeps on the master and/or nodes for a specified amount of
9804
  time.
9805

9806
  """
9807
  _OP_PARAMS = [
9808
    ("duration", _NoDefault, _TFloat),
9809
    ("on_master", True, _TBool),
9810
    ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9811
    ("repeat", 0, _TPositiveInt)
9812
    ]
9813
  REQ_BGL = False
9814

    
9815
  def ExpandNames(self):
9816
    """Expand names and set required locks.
9817

9818
    This expands the node list, if any.
9819

9820
    """
9821
    self.needed_locks = {}
9822
    if self.op.on_nodes:
9823
      # _GetWantedNodes can be used here, but is not always appropriate to use
9824
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9825
      # more information.
9826
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9827
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9828

    
9829
  def _TestDelay(self):
9830
    """Do the actual sleep.
9831

9832
    """
9833
    if self.op.on_master:
9834
      if not utils.TestDelay(self.op.duration):
9835
        raise errors.OpExecError("Error during master delay test")
9836
    if self.op.on_nodes:
9837
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9838
      for node, node_result in result.items():
9839
        node_result.Raise("Failure during rpc call to node %s" % node)
9840

    
9841
  def Exec(self, feedback_fn):
9842
    """Execute the test delay opcode, with the wanted repetitions.
9843

9844
    """
9845
    if self.op.repeat == 0:
9846
      self._TestDelay()
9847
    else:
9848
      top_value = self.op.repeat - 1
9849
      for i in range(self.op.repeat):
9850
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9851
        self._TestDelay()
9852

    
9853

    
9854
class LUTestJobqueue(NoHooksLU):
9855
  """Utility LU to test some aspects of the job queue.
9856

9857
  """
9858
  _OP_PARAMS = [
9859
    ("notify_waitlock", False, _TBool),
9860
    ("notify_exec", False, _TBool),
9861
    ("log_messages", _EmptyList, _TListOf(_TString)),
9862
    ("fail", False, _TBool),
9863
    ]
9864
  REQ_BGL = False
9865

    
9866
  # Must be lower than default timeout for WaitForJobChange to see whether it
9867
  # notices changed jobs
9868
  _CLIENT_CONNECT_TIMEOUT = 20.0
9869
  _CLIENT_CONFIRM_TIMEOUT = 60.0
9870

    
9871
  @classmethod
9872
  def _NotifyUsingSocket(cls, cb, errcls):
9873
    """Opens a Unix socket and waits for another program to connect.
9874

9875
    @type cb: callable
9876
    @param cb: Callback to send socket name to client
9877
    @type errcls: class
9878
    @param errcls: Exception class to use for errors
9879

9880
    """
9881
    # Using a temporary directory as there's no easy way to create temporary
9882
    # sockets without writing a custom loop around tempfile.mktemp and
9883
    # socket.bind
9884
    tmpdir = tempfile.mkdtemp()
9885
    try:
9886
      tmpsock = utils.PathJoin(tmpdir, "sock")
9887

    
9888
      logging.debug("Creating temporary socket at %s", tmpsock)
9889
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
9890
      try:
9891
        sock.bind(tmpsock)
9892
        sock.listen(1)
9893

    
9894
        # Send details to client
9895
        cb(tmpsock)
9896

    
9897
        # Wait for client to connect before continuing
9898
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
9899
        try:
9900
          (conn, _) = sock.accept()
9901
        except socket.error, err:
9902
          raise errcls("Client didn't connect in time (%s)" % err)
9903
      finally:
9904
        sock.close()
9905
    finally:
9906
      # Remove as soon as client is connected
9907
      shutil.rmtree(tmpdir)
9908

    
9909
    # Wait for client to close
9910
    try:
9911
      try:
9912
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
9913
        conn.recv(1)
9914
      except socket.error, err:
9915
        raise errcls("Client failed to confirm notification (%s)" % err)
9916
    finally:
9917
      conn.close()
9918

    
9919
  def _SendNotification(self, test, arg, sockname):
9920
    """Sends a notification to the client.
9921

9922
    @type test: string
9923
    @param test: Test name
9924
    @param arg: Test argument (depends on test)
9925
    @type sockname: string
9926
    @param sockname: Socket path
9927

9928
    """
9929
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
9930

    
9931
  def _Notify(self, prereq, test, arg):
9932
    """Notifies the client of a test.
9933

9934
    @type prereq: bool
9935
    @param prereq: Whether this is a prereq-phase test
9936
    @type test: string
9937
    @param test: Test name
9938
    @param arg: Test argument (depends on test)
9939

9940
    """
9941
    if prereq:
9942
      errcls = errors.OpPrereqError
9943
    else:
9944
      errcls = errors.OpExecError
9945

    
9946
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
9947
                                                  test, arg),
9948
                                   errcls)
9949

    
9950
  def CheckArguments(self):
9951
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
9952
    self.expandnames_calls = 0
9953

    
9954
  def ExpandNames(self):
9955
    checkargs_calls = getattr(self, "checkargs_calls", 0)
9956
    if checkargs_calls < 1:
9957
      raise errors.ProgrammerError("CheckArguments was not called")
9958

    
9959
    self.expandnames_calls += 1
9960

    
9961
    if self.op.notify_waitlock:
9962
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
9963

    
9964
    self.LogInfo("Expanding names")
9965

    
9966
    # Get lock on master node (just to get a lock, not for a particular reason)
9967
    self.needed_locks = {
9968
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
9969
      }
9970

    
9971
  def Exec(self, feedback_fn):
9972
    if self.expandnames_calls < 1:
9973
      raise errors.ProgrammerError("ExpandNames was not called")
9974

    
9975
    if self.op.notify_exec:
9976
      self._Notify(False, constants.JQT_EXEC, None)
9977

    
9978
    self.LogInfo("Executing")
9979

    
9980
    if self.op.log_messages:
9981
      for idx, msg in enumerate(self.op.log_messages):
9982
        self.LogInfo("Sending log message %s", idx + 1)
9983
        feedback_fn(constants.JQT_MSGPREFIX + msg)
9984
        # Report how many test messages have been sent
9985
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
9986

    
9987
    if self.op.fail:
9988
      raise errors.OpExecError("Opcode failure was requested")
9989

    
9990
    return True
9991

    
9992

    
9993
class IAllocator(object):
9994
  """IAllocator framework.
9995

9996
  An IAllocator instance has three sets of attributes:
9997
    - cfg that is needed to query the cluster
9998
    - input data (all members of the _KEYS class attribute are required)
9999
    - four buffer attributes (in|out_data|text), that represent the
10000
      input (to the external script) in text and data structure format,
10001
      and the output from it, again in two formats
10002
    - the result variables from the script (success, info, nodes) for
10003
      easy usage
10004

10005
  """
10006
  # pylint: disable-msg=R0902
10007
  # lots of instance attributes
10008
  _ALLO_KEYS = [
10009
    "name", "mem_size", "disks", "disk_template",
10010
    "os", "tags", "nics", "vcpus", "hypervisor",
10011
    ]
10012
  _RELO_KEYS = [
10013
    "name", "relocate_from",
10014
    ]
10015
  _EVAC_KEYS = [
10016
    "evac_nodes",
10017
    ]
10018

    
10019
  def __init__(self, cfg, rpc, mode, **kwargs):
10020
    self.cfg = cfg
10021
    self.rpc = rpc
10022
    # init buffer variables
10023
    self.in_text = self.out_text = self.in_data = self.out_data = None
10024
    # init all input fields so that pylint is happy
10025
    self.mode = mode
10026
    self.mem_size = self.disks = self.disk_template = None
10027
    self.os = self.tags = self.nics = self.vcpus = None
10028
    self.hypervisor = None
10029
    self.relocate_from = None
10030
    self.name = None
10031
    self.evac_nodes = None
10032
    # computed fields
10033
    self.required_nodes = None
10034
    # init result fields
10035
    self.success = self.info = self.result = None
10036
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10037
      keyset = self._ALLO_KEYS
10038
      fn = self._AddNewInstance
10039
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10040
      keyset = self._RELO_KEYS
10041
      fn = self._AddRelocateInstance
10042
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10043
      keyset = self._EVAC_KEYS
10044
      fn = self._AddEvacuateNodes
10045
    else:
10046
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10047
                                   " IAllocator" % self.mode)
10048
    for key in kwargs:
10049
      if key not in keyset:
10050
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
10051
                                     " IAllocator" % key)
10052
      setattr(self, key, kwargs[key])
10053

    
10054
    for key in keyset:
10055
      if key not in kwargs:
10056
        raise errors.ProgrammerError("Missing input parameter '%s' to"
10057
                                     " IAllocator" % key)
10058
    self._BuildInputData(fn)
10059

    
10060
  def _ComputeClusterData(self):
10061
    """Compute the generic allocator input data.
10062

10063
    This is the data that is independent of the actual operation.
10064

10065
    """
10066
    cfg = self.cfg
10067
    cluster_info = cfg.GetClusterInfo()
10068
    # cluster data
10069
    data = {
10070
      "version": constants.IALLOCATOR_VERSION,
10071
      "cluster_name": cfg.GetClusterName(),
10072
      "cluster_tags": list(cluster_info.GetTags()),
10073
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10074
      # we don't have job IDs
10075
      }
10076
    iinfo = cfg.GetAllInstancesInfo().values()
10077
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10078

    
10079
    # node data
10080
    node_results = {}
10081
    node_list = cfg.GetNodeList()
10082

    
10083
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10084
      hypervisor_name = self.hypervisor
10085
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10086
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10087
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10088
      hypervisor_name = cluster_info.enabled_hypervisors[0]
10089

    
10090
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10091
                                        hypervisor_name)
10092
    node_iinfo = \
10093
      self.rpc.call_all_instances_info(node_list,
10094
                                       cluster_info.enabled_hypervisors)
10095
    for nname, nresult in node_data.items():
10096
      # first fill in static (config-based) values
10097
      ninfo = cfg.GetNodeInfo(nname)
10098
      pnr = {
10099
        "tags": list(ninfo.GetTags()),
10100
        "primary_ip": ninfo.primary_ip,
10101
        "secondary_ip": ninfo.secondary_ip,
10102
        "offline": ninfo.offline,
10103
        "drained": ninfo.drained,
10104
        "master_candidate": ninfo.master_candidate,
10105
        }
10106

    
10107
      if not (ninfo.offline or ninfo.drained):
10108
        nresult.Raise("Can't get data for node %s" % nname)
10109
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10110
                                nname)
10111
        remote_info = nresult.payload
10112

    
10113
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
10114
                     'vg_size', 'vg_free', 'cpu_total']:
10115
          if attr not in remote_info:
10116
            raise errors.OpExecError("Node '%s' didn't return attribute"
10117
                                     " '%s'" % (nname, attr))
10118
          if not isinstance(remote_info[attr], int):
10119
            raise errors.OpExecError("Node '%s' returned invalid value"
10120
                                     " for '%s': %s" %
10121
                                     (nname, attr, remote_info[attr]))
10122
        # compute memory used by primary instances
10123
        i_p_mem = i_p_up_mem = 0
10124
        for iinfo, beinfo in i_list:
10125
          if iinfo.primary_node == nname:
10126
            i_p_mem += beinfo[constants.BE_MEMORY]
10127
            if iinfo.name not in node_iinfo[nname].payload:
10128
              i_used_mem = 0
10129
            else:
10130
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10131
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10132
            remote_info['memory_free'] -= max(0, i_mem_diff)
10133

    
10134
            if iinfo.admin_up:
10135
              i_p_up_mem += beinfo[constants.BE_MEMORY]
10136

    
10137
        # compute memory used by instances
10138
        pnr_dyn = {
10139
          "total_memory": remote_info['memory_total'],
10140
          "reserved_memory": remote_info['memory_dom0'],
10141
          "free_memory": remote_info['memory_free'],
10142
          "total_disk": remote_info['vg_size'],
10143
          "free_disk": remote_info['vg_free'],
10144
          "total_cpus": remote_info['cpu_total'],
10145
          "i_pri_memory": i_p_mem,
10146
          "i_pri_up_memory": i_p_up_mem,
10147
          }
10148
        pnr.update(pnr_dyn)
10149

    
10150
      node_results[nname] = pnr
10151
    data["nodes"] = node_results
10152

    
10153
    # instance data
10154
    instance_data = {}
10155
    for iinfo, beinfo in i_list:
10156
      nic_data = []
10157
      for nic in iinfo.nics:
10158
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10159
        nic_dict = {"mac": nic.mac,
10160
                    "ip": nic.ip,
10161
                    "mode": filled_params[constants.NIC_MODE],
10162
                    "link": filled_params[constants.NIC_LINK],
10163
                   }
10164
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10165
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10166
        nic_data.append(nic_dict)
10167
      pir = {
10168
        "tags": list(iinfo.GetTags()),
10169
        "admin_up": iinfo.admin_up,
10170
        "vcpus": beinfo[constants.BE_VCPUS],
10171
        "memory": beinfo[constants.BE_MEMORY],
10172
        "os": iinfo.os,
10173
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10174
        "nics": nic_data,
10175
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10176
        "disk_template": iinfo.disk_template,
10177
        "hypervisor": iinfo.hypervisor,
10178
        }
10179
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10180
                                                 pir["disks"])
10181
      instance_data[iinfo.name] = pir
10182

    
10183
    data["instances"] = instance_data
10184

    
10185
    self.in_data = data
10186

    
10187
  def _AddNewInstance(self):
10188
    """Add new instance data to allocator structure.
10189

10190
    This in combination with _AllocatorGetClusterData will create the
10191
    correct structure needed as input for the allocator.
10192

10193
    The checks for the completeness of the opcode must have already been
10194
    done.
10195

10196
    """
10197
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10198

    
10199
    if self.disk_template in constants.DTS_NET_MIRROR:
10200
      self.required_nodes = 2
10201
    else:
10202
      self.required_nodes = 1
10203
    request = {
10204
      "name": self.name,
10205
      "disk_template": self.disk_template,
10206
      "tags": self.tags,
10207
      "os": self.os,
10208
      "vcpus": self.vcpus,
10209
      "memory": self.mem_size,
10210
      "disks": self.disks,
10211
      "disk_space_total": disk_space,
10212
      "nics": self.nics,
10213
      "required_nodes": self.required_nodes,
10214
      }
10215
    return request
10216

    
10217
  def _AddRelocateInstance(self):
10218
    """Add relocate instance data to allocator structure.
10219

10220
    This in combination with _IAllocatorGetClusterData will create the
10221
    correct structure needed as input for the allocator.
10222

10223
    The checks for the completeness of the opcode must have already been
10224
    done.
10225

10226
    """
10227
    instance = self.cfg.GetInstanceInfo(self.name)
10228
    if instance is None:
10229
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
10230
                                   " IAllocator" % self.name)
10231

    
10232
    if instance.disk_template not in constants.DTS_NET_MIRROR:
10233
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10234
                                 errors.ECODE_INVAL)
10235

    
10236
    if len(instance.secondary_nodes) != 1:
10237
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
10238
                                 errors.ECODE_STATE)
10239

    
10240
    self.required_nodes = 1
10241
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
10242
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10243

    
10244
    request = {
10245
      "name": self.name,
10246
      "disk_space_total": disk_space,
10247
      "required_nodes": self.required_nodes,
10248
      "relocate_from": self.relocate_from,
10249
      }
10250
    return request
10251

    
10252
  def _AddEvacuateNodes(self):
10253
    """Add evacuate nodes data to allocator structure.
10254

10255
    """
10256
    request = {
10257
      "evac_nodes": self.evac_nodes
10258
      }
10259
    return request
10260

    
10261
  def _BuildInputData(self, fn):
10262
    """Build input data structures.
10263

10264
    """
10265
    self._ComputeClusterData()
10266

    
10267
    request = fn()
10268
    request["type"] = self.mode
10269
    self.in_data["request"] = request
10270

    
10271
    self.in_text = serializer.Dump(self.in_data)
10272

    
10273
  def Run(self, name, validate=True, call_fn=None):
10274
    """Run an instance allocator and return the results.
10275

10276
    """
10277
    if call_fn is None:
10278
      call_fn = self.rpc.call_iallocator_runner
10279

    
10280
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10281
    result.Raise("Failure while running the iallocator script")
10282

    
10283
    self.out_text = result.payload
10284
    if validate:
10285
      self._ValidateResult()
10286

    
10287
  def _ValidateResult(self):
10288
    """Process the allocator results.
10289

10290
    This will process and if successful save the result in
10291
    self.out_data and the other parameters.
10292

10293
    """
10294
    try:
10295
      rdict = serializer.Load(self.out_text)
10296
    except Exception, err:
10297
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10298

    
10299
    if not isinstance(rdict, dict):
10300
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10301

    
10302
    # TODO: remove backwards compatiblity in later versions
10303
    if "nodes" in rdict and "result" not in rdict:
10304
      rdict["result"] = rdict["nodes"]
10305
      del rdict["nodes"]
10306

    
10307
    for key in "success", "info", "result":
10308
      if key not in rdict:
10309
        raise errors.OpExecError("Can't parse iallocator results:"
10310
                                 " missing key '%s'" % key)
10311
      setattr(self, key, rdict[key])
10312

    
10313
    if not isinstance(rdict["result"], list):
10314
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10315
                               " is not a list")
10316
    self.out_data = rdict
10317

    
10318

    
10319
class LUTestAllocator(NoHooksLU):
10320
  """Run allocator tests.
10321

10322
  This LU runs the allocator tests
10323

10324
  """
10325
  _OP_PARAMS = [
10326
    ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10327
    ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10328
    ("name", _NoDefault, _TNonEmptyString),
10329
    ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10330
      _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10331
               _TOr(_TNone, _TNonEmptyString))))),
10332
    ("disks", _NoDefault, _TOr(_TNone, _TList)),
10333
    ("hypervisor", None, _TMaybeString),
10334
    ("allocator", None, _TMaybeString),
10335
    ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10336
    ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10337
    ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10338
    ("os", None, _TMaybeString),
10339
    ("disk_template", None, _TMaybeString),
10340
    ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10341
    ]
10342

    
10343
  def CheckPrereq(self):
10344
    """Check prerequisites.
10345

10346
    This checks the opcode parameters depending on the director and mode test.
10347

10348
    """
10349
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10350
      for attr in ["mem_size", "disks", "disk_template",
10351
                   "os", "tags", "nics", "vcpus"]:
10352
        if not hasattr(self.op, attr):
10353
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10354
                                     attr, errors.ECODE_INVAL)
10355
      iname = self.cfg.ExpandInstanceName(self.op.name)
10356
      if iname is not None:
10357
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10358
                                   iname, errors.ECODE_EXISTS)
10359
      if not isinstance(self.op.nics, list):
10360
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10361
                                   errors.ECODE_INVAL)
10362
      if not isinstance(self.op.disks, list):
10363
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10364
                                   errors.ECODE_INVAL)
10365
      for row in self.op.disks:
10366
        if (not isinstance(row, dict) or
10367
            "size" not in row or
10368
            not isinstance(row["size"], int) or
10369
            "mode" not in row or
10370
            row["mode"] not in ['r', 'w']):
10371
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10372
                                     " parameter", errors.ECODE_INVAL)
10373
      if self.op.hypervisor is None:
10374
        self.op.hypervisor = self.cfg.GetHypervisorType()
10375
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10376
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10377
      self.op.name = fname
10378
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10379
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10380
      if not hasattr(self.op, "evac_nodes"):
10381
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10382
                                   " opcode input", errors.ECODE_INVAL)
10383
    else:
10384
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10385
                                 self.op.mode, errors.ECODE_INVAL)
10386

    
10387
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10388
      if self.op.allocator is None:
10389
        raise errors.OpPrereqError("Missing allocator name",
10390
                                   errors.ECODE_INVAL)
10391
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10392
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10393
                                 self.op.direction, errors.ECODE_INVAL)
10394

    
10395
  def Exec(self, feedback_fn):
10396
    """Run the allocator test.
10397

10398
    """
10399
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10400
      ial = IAllocator(self.cfg, self.rpc,
10401
                       mode=self.op.mode,
10402
                       name=self.op.name,
10403
                       mem_size=self.op.mem_size,
10404
                       disks=self.op.disks,
10405
                       disk_template=self.op.disk_template,
10406
                       os=self.op.os,
10407
                       tags=self.op.tags,
10408
                       nics=self.op.nics,
10409
                       vcpus=self.op.vcpus,
10410
                       hypervisor=self.op.hypervisor,
10411
                       )
10412
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10413
      ial = IAllocator(self.cfg, self.rpc,
10414
                       mode=self.op.mode,
10415
                       name=self.op.name,
10416
                       relocate_from=list(self.relocate_from),
10417
                       )
10418
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10419
      ial = IAllocator(self.cfg, self.rpc,
10420
                       mode=self.op.mode,
10421
                       evac_nodes=self.op.evac_nodes)
10422
    else:
10423
      raise errors.ProgrammerError("Uncatched mode %s in"
10424
                                   " LUTestAllocator.Exec", self.op.mode)
10425

    
10426
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10427
      result = ial.in_text
10428
    else:
10429
      ial.Run(self.op.allocator, validate=False)
10430
      result = ial.out_text
10431
    return result