Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ aa29e95f

History | View | Annotate | Download (399.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import ht
58
from ganeti import query
59
from ganeti import qlang
60

    
61
import ganeti.masterd.instance # pylint: disable-msg=W0611
62

    
63
# Common opcode attributes
64

    
65
#: output fields for a query operation
66
_POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
67

    
68

    
69
#: the shutdown timeout
70
_PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
71
                     ht.TPositiveInt)
72

    
73
#: the force parameter
74
_PForce = ("force", False, ht.TBool)
75

    
76
#: a required instance name (for single-instance LUs)
77
_PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
78

    
79
#: Whether to ignore offline nodes
80
_PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
81

    
82
#: a required node name (for single-node LUs)
83
_PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
84

    
85
#: a required node group name (for single-group LUs)
86
_PGroupName = ("group_name", ht.NoDefault, ht.TNonEmptyString)
87

    
88
#: the migration type (live/non-live)
89
_PMigrationMode = ("mode", None,
90
                   ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
91

    
92
#: the obsolete 'live' mode (boolean)
93
_PMigrationLive = ("live", None, ht.TMaybeBool)
94

    
95

    
96
def _SupportsOob(cfg, node):
97
  """Tells if node supports OOB.
98

99
  @type cfg: L{config.ConfigWriter}
100
  @param cfg: The cluster configuration
101
  @type node: L{objects.Node}
102
  @param node: The node
103
  @return: The OOB script if supported or an empty string otherwise
104

105
  """
106
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
107

    
108

    
109
# End types
110
class LogicalUnit(object):
111
  """Logical Unit base class.
112

113
  Subclasses must follow these rules:
114
    - implement ExpandNames
115
    - implement CheckPrereq (except when tasklets are used)
116
    - implement Exec (except when tasklets are used)
117
    - implement BuildHooksEnv
118
    - redefine HPATH and HTYPE
119
    - optionally redefine their run requirements:
120
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
121

122
  Note that all commands require root permissions.
123

124
  @ivar dry_run_result: the value (if any) that will be returned to the caller
125
      in dry-run mode (signalled by opcode dry_run parameter)
126
  @cvar _OP_PARAMS: a list of opcode attributes, the default values
127
      they should get if not already defined, and types they must match
128

129
  """
130
  HPATH = None
131
  HTYPE = None
132
  _OP_PARAMS = []
133
  REQ_BGL = True
134

    
135
  def __init__(self, processor, op, context, rpc):
136
    """Constructor for LogicalUnit.
137

138
    This needs to be overridden in derived classes in order to check op
139
    validity.
140

141
    """
142
    self.proc = processor
143
    self.op = op
144
    self.cfg = context.cfg
145
    self.context = context
146
    self.rpc = rpc
147
    # Dicts used to declare locking needs to mcpu
148
    self.needed_locks = None
149
    self.acquired_locks = {}
150
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
151
    self.add_locks = {}
152
    self.remove_locks = {}
153
    # Used to force good behavior when calling helper functions
154
    self.recalculate_locks = {}
155
    self.__ssh = None
156
    # logging
157
    self.Log = processor.Log # pylint: disable-msg=C0103
158
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
159
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
160
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
161
    # support for dry-run
162
    self.dry_run_result = None
163
    # support for generic debug attribute
164
    if (not hasattr(self.op, "debug_level") or
165
        not isinstance(self.op.debug_level, int)):
166
      self.op.debug_level = 0
167

    
168
    # Tasklets
169
    self.tasklets = None
170

    
171
    # The new kind-of-type-system
172
    op_id = self.op.OP_ID
173
    for attr_name, aval, test in self._OP_PARAMS:
174
      if not hasattr(op, attr_name):
175
        if aval == ht.NoDefault:
176
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
177
                                     (op_id, attr_name), errors.ECODE_INVAL)
178
        else:
179
          if callable(aval):
180
            dval = aval()
181
          else:
182
            dval = aval
183
          setattr(self.op, attr_name, dval)
184
      attr_val = getattr(op, attr_name)
185
      if test == ht.NoType:
186
        # no tests here
187
        continue
188
      if not callable(test):
189
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
190
                                     " given type is not a proper type (%s)" %
191
                                     (op_id, attr_name, test))
192
      if not test(attr_val):
193
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
194
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
195
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
196
                                   (op_id, attr_name), errors.ECODE_INVAL)
197

    
198
    self.CheckArguments()
199

    
200
  def __GetSSH(self):
201
    """Returns the SshRunner object
202

203
    """
204
    if not self.__ssh:
205
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
206
    return self.__ssh
207

    
208
  ssh = property(fget=__GetSSH)
209

    
210
  def CheckArguments(self):
211
    """Check syntactic validity for the opcode arguments.
212

213
    This method is for doing a simple syntactic check and ensure
214
    validity of opcode parameters, without any cluster-related
215
    checks. While the same can be accomplished in ExpandNames and/or
216
    CheckPrereq, doing these separate is better because:
217

218
      - ExpandNames is left as as purely a lock-related function
219
      - CheckPrereq is run after we have acquired locks (and possible
220
        waited for them)
221

222
    The function is allowed to change the self.op attribute so that
223
    later methods can no longer worry about missing parameters.
224

225
    """
226
    pass
227

    
228
  def ExpandNames(self):
229
    """Expand names for this LU.
230

231
    This method is called before starting to execute the opcode, and it should
232
    update all the parameters of the opcode to their canonical form (e.g. a
233
    short node name must be fully expanded after this method has successfully
234
    completed). This way locking, hooks, logging, etc. can work correctly.
235

236
    LUs which implement this method must also populate the self.needed_locks
237
    member, as a dict with lock levels as keys, and a list of needed lock names
238
    as values. Rules:
239

240
      - use an empty dict if you don't need any lock
241
      - if you don't need any lock at a particular level omit that level
242
      - don't put anything for the BGL level
243
      - if you want all locks at a level use locking.ALL_SET as a value
244

245
    If you need to share locks (rather than acquire them exclusively) at one
246
    level you can modify self.share_locks, setting a true value (usually 1) for
247
    that level. By default locks are not shared.
248

249
    This function can also define a list of tasklets, which then will be
250
    executed in order instead of the usual LU-level CheckPrereq and Exec
251
    functions, if those are not defined by the LU.
252

253
    Examples::
254

255
      # Acquire all nodes and one instance
256
      self.needed_locks = {
257
        locking.LEVEL_NODE: locking.ALL_SET,
258
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
259
      }
260
      # Acquire just two nodes
261
      self.needed_locks = {
262
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
263
      }
264
      # Acquire no locks
265
      self.needed_locks = {} # No, you can't leave it to the default value None
266

267
    """
268
    # The implementation of this method is mandatory only if the new LU is
269
    # concurrent, so that old LUs don't need to be changed all at the same
270
    # time.
271
    if self.REQ_BGL:
272
      self.needed_locks = {} # Exclusive LUs don't need locks.
273
    else:
274
      raise NotImplementedError
275

    
276
  def DeclareLocks(self, level):
277
    """Declare LU locking needs for a level
278

279
    While most LUs can just declare their locking needs at ExpandNames time,
280
    sometimes there's the need to calculate some locks after having acquired
281
    the ones before. This function is called just before acquiring locks at a
282
    particular level, but after acquiring the ones at lower levels, and permits
283
    such calculations. It can be used to modify self.needed_locks, and by
284
    default it does nothing.
285

286
    This function is only called if you have something already set in
287
    self.needed_locks for the level.
288

289
    @param level: Locking level which is going to be locked
290
    @type level: member of ganeti.locking.LEVELS
291

292
    """
293

    
294
  def CheckPrereq(self):
295
    """Check prerequisites for this LU.
296

297
    This method should check that the prerequisites for the execution
298
    of this LU are fulfilled. It can do internode communication, but
299
    it should be idempotent - no cluster or system changes are
300
    allowed.
301

302
    The method should raise errors.OpPrereqError in case something is
303
    not fulfilled. Its return value is ignored.
304

305
    This method should also update all the parameters of the opcode to
306
    their canonical form if it hasn't been done by ExpandNames before.
307

308
    """
309
    if self.tasklets is not None:
310
      for (idx, tl) in enumerate(self.tasklets):
311
        logging.debug("Checking prerequisites for tasklet %s/%s",
312
                      idx + 1, len(self.tasklets))
313
        tl.CheckPrereq()
314
    else:
315
      pass
316

    
317
  def Exec(self, feedback_fn):
318
    """Execute the LU.
319

320
    This method should implement the actual work. It should raise
321
    errors.OpExecError for failures that are somewhat dealt with in
322
    code, or expected.
323

324
    """
325
    if self.tasklets is not None:
326
      for (idx, tl) in enumerate(self.tasklets):
327
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
328
        tl.Exec(feedback_fn)
329
    else:
330
      raise NotImplementedError
331

    
332
  def BuildHooksEnv(self):
333
    """Build hooks environment for this LU.
334

335
    This method should return a three-node tuple consisting of: a dict
336
    containing the environment that will be used for running the
337
    specific hook for this LU, a list of node names on which the hook
338
    should run before the execution, and a list of node names on which
339
    the hook should run after the execution.
340

341
    The keys of the dict must not have 'GANETI_' prefixed as this will
342
    be handled in the hooks runner. Also note additional keys will be
343
    added by the hooks runner. If the LU doesn't define any
344
    environment, an empty dict (and not None) should be returned.
345

346
    No nodes should be returned as an empty list (and not None).
347

348
    Note that if the HPATH for a LU class is None, this function will
349
    not be called.
350

351
    """
352
    raise NotImplementedError
353

    
354
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
355
    """Notify the LU about the results of its hooks.
356

357
    This method is called every time a hooks phase is executed, and notifies
358
    the Logical Unit about the hooks' result. The LU can then use it to alter
359
    its result based on the hooks.  By default the method does nothing and the
360
    previous result is passed back unchanged but any LU can define it if it
361
    wants to use the local cluster hook-scripts somehow.
362

363
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
364
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
365
    @param hook_results: the results of the multi-node hooks rpc call
366
    @param feedback_fn: function used send feedback back to the caller
367
    @param lu_result: the previous Exec result this LU had, or None
368
        in the PRE phase
369
    @return: the new Exec result, based on the previous result
370
        and hook results
371

372
    """
373
    # API must be kept, thus we ignore the unused argument and could
374
    # be a function warnings
375
    # pylint: disable-msg=W0613,R0201
376
    return lu_result
377

    
378
  def _ExpandAndLockInstance(self):
379
    """Helper function to expand and lock an instance.
380

381
    Many LUs that work on an instance take its name in self.op.instance_name
382
    and need to expand it and then declare the expanded name for locking. This
383
    function does it, and then updates self.op.instance_name to the expanded
384
    name. It also initializes needed_locks as a dict, if this hasn't been done
385
    before.
386

387
    """
388
    if self.needed_locks is None:
389
      self.needed_locks = {}
390
    else:
391
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
392
        "_ExpandAndLockInstance called with instance-level locks set"
393
    self.op.instance_name = _ExpandInstanceName(self.cfg,
394
                                                self.op.instance_name)
395
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
396

    
397
  def _LockInstancesNodes(self, primary_only=False):
398
    """Helper function to declare instances' nodes for locking.
399

400
    This function should be called after locking one or more instances to lock
401
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
402
    with all primary or secondary nodes for instances already locked and
403
    present in self.needed_locks[locking.LEVEL_INSTANCE].
404

405
    It should be called from DeclareLocks, and for safety only works if
406
    self.recalculate_locks[locking.LEVEL_NODE] is set.
407

408
    In the future it may grow parameters to just lock some instance's nodes, or
409
    to just lock primaries or secondary nodes, if needed.
410

411
    If should be called in DeclareLocks in a way similar to::
412

413
      if level == locking.LEVEL_NODE:
414
        self._LockInstancesNodes()
415

416
    @type primary_only: boolean
417
    @param primary_only: only lock primary nodes of locked instances
418

419
    """
420
    assert locking.LEVEL_NODE in self.recalculate_locks, \
421
      "_LockInstancesNodes helper function called with no nodes to recalculate"
422

    
423
    # TODO: check if we're really been called with the instance locks held
424

    
425
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
426
    # future we might want to have different behaviors depending on the value
427
    # of self.recalculate_locks[locking.LEVEL_NODE]
428
    wanted_nodes = []
429
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
430
      instance = self.context.cfg.GetInstanceInfo(instance_name)
431
      wanted_nodes.append(instance.primary_node)
432
      if not primary_only:
433
        wanted_nodes.extend(instance.secondary_nodes)
434

    
435
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
436
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
437
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
438
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
439

    
440
    del self.recalculate_locks[locking.LEVEL_NODE]
441

    
442

    
443
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
444
  """Simple LU which runs no hooks.
445

446
  This LU is intended as a parent for other LogicalUnits which will
447
  run no hooks, in order to reduce duplicate code.
448

449
  """
450
  HPATH = None
451
  HTYPE = None
452

    
453
  def BuildHooksEnv(self):
454
    """Empty BuildHooksEnv for NoHooksLu.
455

456
    This just raises an error.
457

458
    """
459
    assert False, "BuildHooksEnv called for NoHooksLUs"
460

    
461

    
462
class Tasklet:
463
  """Tasklet base class.
464

465
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
466
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
467
  tasklets know nothing about locks.
468

469
  Subclasses must follow these rules:
470
    - Implement CheckPrereq
471
    - Implement Exec
472

473
  """
474
  def __init__(self, lu):
475
    self.lu = lu
476

    
477
    # Shortcuts
478
    self.cfg = lu.cfg
479
    self.rpc = lu.rpc
480

    
481
  def CheckPrereq(self):
482
    """Check prerequisites for this tasklets.
483

484
    This method should check whether the prerequisites for the execution of
485
    this tasklet are fulfilled. It can do internode communication, but it
486
    should be idempotent - no cluster or system changes are allowed.
487

488
    The method should raise errors.OpPrereqError in case something is not
489
    fulfilled. Its return value is ignored.
490

491
    This method should also update all parameters to their canonical form if it
492
    hasn't been done before.
493

494
    """
495
    pass
496

    
497
  def Exec(self, feedback_fn):
498
    """Execute the tasklet.
499

500
    This method should implement the actual work. It should raise
501
    errors.OpExecError for failures that are somewhat dealt with in code, or
502
    expected.
503

504
    """
505
    raise NotImplementedError
506

    
507

    
508
class _QueryBase:
509
  """Base for query utility classes.
510

511
  """
512
  #: Attribute holding field definitions
513
  FIELDS = None
514

    
515
  def __init__(self, names, fields, use_locking):
516
    """Initializes this class.
517

518
    """
519
    self.names = names
520
    self.use_locking = use_locking
521

    
522
    self.query = query.Query(self.FIELDS, fields)
523
    self.requested_data = self.query.RequestedData()
524

    
525
    self.do_locking = None
526
    self.wanted = None
527

    
528
  def _GetNames(self, lu, all_names, lock_level):
529
    """Helper function to determine names asked for in the query.
530

531
    """
532
    if self.do_locking:
533
      names = lu.acquired_locks[lock_level]
534
    else:
535
      names = all_names
536

    
537
    if self.wanted == locking.ALL_SET:
538
      assert not self.names
539
      # caller didn't specify names, so ordering is not important
540
      return utils.NiceSort(names)
541

    
542
    # caller specified names and we must keep the same order
543
    assert self.names
544
    assert not self.do_locking or lu.acquired_locks[lock_level]
545

    
546
    missing = set(self.wanted).difference(names)
547
    if missing:
548
      raise errors.OpExecError("Some items were removed before retrieving"
549
                               " their data: %s" % missing)
550

    
551
    # Return expanded names
552
    return self.wanted
553

    
554
  @classmethod
555
  def FieldsQuery(cls, fields):
556
    """Returns list of available fields.
557

558
    @return: List of L{objects.QueryFieldDefinition}
559

560
    """
561
    return query.QueryFields(cls.FIELDS, fields)
562

    
563
  def ExpandNames(self, lu):
564
    """Expand names for this query.
565

566
    See L{LogicalUnit.ExpandNames}.
567

568
    """
569
    raise NotImplementedError()
570

    
571
  def DeclareLocks(self, lu, level):
572
    """Declare locks for this query.
573

574
    See L{LogicalUnit.DeclareLocks}.
575

576
    """
577
    raise NotImplementedError()
578

    
579
  def _GetQueryData(self, lu):
580
    """Collects all data for this query.
581

582
    @return: Query data object
583

584
    """
585
    raise NotImplementedError()
586

    
587
  def NewStyleQuery(self, lu):
588
    """Collect data and execute query.
589

590
    """
591
    data = self._GetQueryData(lu)
592

    
593
    return objects.QueryResponse(data=self.query.Query(data),
594
                                 fields=self.query.GetFields()).ToDict()
595

    
596
  def OldStyleQuery(self, lu):
597
    """Collect data and execute query.
598

599
    """
600
    return self.query.OldStyleQuery(self._GetQueryData(lu))
601

    
602

    
603
def _GetWantedNodes(lu, nodes):
604
  """Returns list of checked and expanded node names.
605

606
  @type lu: L{LogicalUnit}
607
  @param lu: the logical unit on whose behalf we execute
608
  @type nodes: list
609
  @param nodes: list of node names or None for all nodes
610
  @rtype: list
611
  @return: the list of nodes, sorted
612
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
613

614
  """
615
  if nodes:
616
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
617

    
618
  return utils.NiceSort(lu.cfg.GetNodeList())
619

    
620

    
621
def _GetWantedInstances(lu, instances):
622
  """Returns list of checked and expanded instance names.
623

624
  @type lu: L{LogicalUnit}
625
  @param lu: the logical unit on whose behalf we execute
626
  @type instances: list
627
  @param instances: list of instance names or None for all instances
628
  @rtype: list
629
  @return: the list of instances, sorted
630
  @raise errors.OpPrereqError: if the instances parameter is wrong type
631
  @raise errors.OpPrereqError: if any of the passed instances is not found
632

633
  """
634
  if instances:
635
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
636
  else:
637
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
638
  return wanted
639

    
640

    
641
def _GetUpdatedParams(old_params, update_dict,
642
                      use_default=True, use_none=False):
643
  """Return the new version of a parameter dictionary.
644

645
  @type old_params: dict
646
  @param old_params: old parameters
647
  @type update_dict: dict
648
  @param update_dict: dict containing new parameter values, or
649
      constants.VALUE_DEFAULT to reset the parameter to its default
650
      value
651
  @param use_default: boolean
652
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
653
      values as 'to be deleted' values
654
  @param use_none: boolean
655
  @type use_none: whether to recognise C{None} values as 'to be
656
      deleted' values
657
  @rtype: dict
658
  @return: the new parameter dictionary
659

660
  """
661
  params_copy = copy.deepcopy(old_params)
662
  for key, val in update_dict.iteritems():
663
    if ((use_default and val == constants.VALUE_DEFAULT) or
664
        (use_none and val is None)):
665
      try:
666
        del params_copy[key]
667
      except KeyError:
668
        pass
669
    else:
670
      params_copy[key] = val
671
  return params_copy
672

    
673

    
674
def _CheckOutputFields(static, dynamic, selected):
675
  """Checks whether all selected fields are valid.
676

677
  @type static: L{utils.FieldSet}
678
  @param static: static fields set
679
  @type dynamic: L{utils.FieldSet}
680
  @param dynamic: dynamic fields set
681

682
  """
683
  f = utils.FieldSet()
684
  f.Extend(static)
685
  f.Extend(dynamic)
686

    
687
  delta = f.NonMatching(selected)
688
  if delta:
689
    raise errors.OpPrereqError("Unknown output fields selected: %s"
690
                               % ",".join(delta), errors.ECODE_INVAL)
691

    
692

    
693
def _CheckGlobalHvParams(params):
694
  """Validates that given hypervisor params are not global ones.
695

696
  This will ensure that instances don't get customised versions of
697
  global params.
698

699
  """
700
  used_globals = constants.HVC_GLOBALS.intersection(params)
701
  if used_globals:
702
    msg = ("The following hypervisor parameters are global and cannot"
703
           " be customized at instance level, please modify them at"
704
           " cluster level: %s" % utils.CommaJoin(used_globals))
705
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
706

    
707

    
708
def _CheckNodeOnline(lu, node, msg=None):
709
  """Ensure that a given node is online.
710

711
  @param lu: the LU on behalf of which we make the check
712
  @param node: the node to check
713
  @param msg: if passed, should be a message to replace the default one
714
  @raise errors.OpPrereqError: if the node is offline
715

716
  """
717
  if msg is None:
718
    msg = "Can't use offline node"
719
  if lu.cfg.GetNodeInfo(node).offline:
720
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
721

    
722

    
723
def _CheckNodeNotDrained(lu, node):
724
  """Ensure that a given node is not drained.
725

726
  @param lu: the LU on behalf of which we make the check
727
  @param node: the node to check
728
  @raise errors.OpPrereqError: if the node is drained
729

730
  """
731
  if lu.cfg.GetNodeInfo(node).drained:
732
    raise errors.OpPrereqError("Can't use drained node %s" % node,
733
                               errors.ECODE_STATE)
734

    
735

    
736
def _CheckNodeVmCapable(lu, node):
737
  """Ensure that a given node is vm capable.
738

739
  @param lu: the LU on behalf of which we make the check
740
  @param node: the node to check
741
  @raise errors.OpPrereqError: if the node is not vm capable
742

743
  """
744
  if not lu.cfg.GetNodeInfo(node).vm_capable:
745
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
746
                               errors.ECODE_STATE)
747

    
748

    
749
def _CheckNodeHasOS(lu, node, os_name, force_variant):
750
  """Ensure that a node supports a given OS.
751

752
  @param lu: the LU on behalf of which we make the check
753
  @param node: the node to check
754
  @param os_name: the OS to query about
755
  @param force_variant: whether to ignore variant errors
756
  @raise errors.OpPrereqError: if the node is not supporting the OS
757

758
  """
759
  result = lu.rpc.call_os_get(node, os_name)
760
  result.Raise("OS '%s' not in supported OS list for node %s" %
761
               (os_name, node),
762
               prereq=True, ecode=errors.ECODE_INVAL)
763
  if not force_variant:
764
    _CheckOSVariant(result.payload, os_name)
765

    
766

    
767
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
768
  """Ensure that a node has the given secondary ip.
769

770
  @type lu: L{LogicalUnit}
771
  @param lu: the LU on behalf of which we make the check
772
  @type node: string
773
  @param node: the node to check
774
  @type secondary_ip: string
775
  @param secondary_ip: the ip to check
776
  @type prereq: boolean
777
  @param prereq: whether to throw a prerequisite or an execute error
778
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
779
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
780

781
  """
782
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
783
  result.Raise("Failure checking secondary ip on node %s" % node,
784
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
785
  if not result.payload:
786
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
787
           " please fix and re-run this command" % secondary_ip)
788
    if prereq:
789
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
790
    else:
791
      raise errors.OpExecError(msg)
792

    
793

    
794
def _RequireFileStorage():
795
  """Checks that file storage is enabled.
796

797
  @raise errors.OpPrereqError: when file storage is disabled
798

799
  """
800
  if not constants.ENABLE_FILE_STORAGE:
801
    raise errors.OpPrereqError("File storage disabled at configure time",
802
                               errors.ECODE_INVAL)
803

    
804

    
805
def _CheckDiskTemplate(template):
806
  """Ensure a given disk template is valid.
807

808
  """
809
  if template not in constants.DISK_TEMPLATES:
810
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
811
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
812
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
813
  if template == constants.DT_FILE:
814
    _RequireFileStorage()
815
  return True
816

    
817

    
818
def _CheckStorageType(storage_type):
819
  """Ensure a given storage type is valid.
820

821
  """
822
  if storage_type not in constants.VALID_STORAGE_TYPES:
823
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
824
                               errors.ECODE_INVAL)
825
  if storage_type == constants.ST_FILE:
826
    _RequireFileStorage()
827
  return True
828

    
829

    
830
def _GetClusterDomainSecret():
831
  """Reads the cluster domain secret.
832

833
  """
834
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
835
                               strict=True)
836

    
837

    
838
def _CheckInstanceDown(lu, instance, reason):
839
  """Ensure that an instance is not running."""
840
  if instance.admin_up:
841
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
842
                               (instance.name, reason), errors.ECODE_STATE)
843

    
844
  pnode = instance.primary_node
845
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
846
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
847
              prereq=True, ecode=errors.ECODE_ENVIRON)
848

    
849
  if instance.name in ins_l.payload:
850
    raise errors.OpPrereqError("Instance %s is running, %s" %
851
                               (instance.name, reason), errors.ECODE_STATE)
852

    
853

    
854
def _ExpandItemName(fn, name, kind):
855
  """Expand an item name.
856

857
  @param fn: the function to use for expansion
858
  @param name: requested item name
859
  @param kind: text description ('Node' or 'Instance')
860
  @return: the resolved (full) name
861
  @raise errors.OpPrereqError: if the item is not found
862

863
  """
864
  full_name = fn(name)
865
  if full_name is None:
866
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
867
                               errors.ECODE_NOENT)
868
  return full_name
869

    
870

    
871
def _ExpandNodeName(cfg, name):
872
  """Wrapper over L{_ExpandItemName} for nodes."""
873
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
874

    
875

    
876
def _ExpandInstanceName(cfg, name):
877
  """Wrapper over L{_ExpandItemName} for instance."""
878
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
879

    
880

    
881
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
882
                          memory, vcpus, nics, disk_template, disks,
883
                          bep, hvp, hypervisor_name):
884
  """Builds instance related env variables for hooks
885

886
  This builds the hook environment from individual variables.
887

888
  @type name: string
889
  @param name: the name of the instance
890
  @type primary_node: string
891
  @param primary_node: the name of the instance's primary node
892
  @type secondary_nodes: list
893
  @param secondary_nodes: list of secondary nodes as strings
894
  @type os_type: string
895
  @param os_type: the name of the instance's OS
896
  @type status: boolean
897
  @param status: the should_run status of the instance
898
  @type memory: string
899
  @param memory: the memory size of the instance
900
  @type vcpus: string
901
  @param vcpus: the count of VCPUs the instance has
902
  @type nics: list
903
  @param nics: list of tuples (ip, mac, mode, link) representing
904
      the NICs the instance has
905
  @type disk_template: string
906
  @param disk_template: the disk template of the instance
907
  @type disks: list
908
  @param disks: the list of (size, mode) pairs
909
  @type bep: dict
910
  @param bep: the backend parameters for the instance
911
  @type hvp: dict
912
  @param hvp: the hypervisor parameters for the instance
913
  @type hypervisor_name: string
914
  @param hypervisor_name: the hypervisor for the instance
915
  @rtype: dict
916
  @return: the hook environment for this instance
917

918
  """
919
  if status:
920
    str_status = "up"
921
  else:
922
    str_status = "down"
923
  env = {
924
    "OP_TARGET": name,
925
    "INSTANCE_NAME": name,
926
    "INSTANCE_PRIMARY": primary_node,
927
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
928
    "INSTANCE_OS_TYPE": os_type,
929
    "INSTANCE_STATUS": str_status,
930
    "INSTANCE_MEMORY": memory,
931
    "INSTANCE_VCPUS": vcpus,
932
    "INSTANCE_DISK_TEMPLATE": disk_template,
933
    "INSTANCE_HYPERVISOR": hypervisor_name,
934
  }
935

    
936
  if nics:
937
    nic_count = len(nics)
938
    for idx, (ip, mac, mode, link) in enumerate(nics):
939
      if ip is None:
940
        ip = ""
941
      env["INSTANCE_NIC%d_IP" % idx] = ip
942
      env["INSTANCE_NIC%d_MAC" % idx] = mac
943
      env["INSTANCE_NIC%d_MODE" % idx] = mode
944
      env["INSTANCE_NIC%d_LINK" % idx] = link
945
      if mode == constants.NIC_MODE_BRIDGED:
946
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
947
  else:
948
    nic_count = 0
949

    
950
  env["INSTANCE_NIC_COUNT"] = nic_count
951

    
952
  if disks:
953
    disk_count = len(disks)
954
    for idx, (size, mode) in enumerate(disks):
955
      env["INSTANCE_DISK%d_SIZE" % idx] = size
956
      env["INSTANCE_DISK%d_MODE" % idx] = mode
957
  else:
958
    disk_count = 0
959

    
960
  env["INSTANCE_DISK_COUNT"] = disk_count
961

    
962
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
963
    for key, value in source.items():
964
      env["INSTANCE_%s_%s" % (kind, key)] = value
965

    
966
  return env
967

    
968

    
969
def _NICListToTuple(lu, nics):
970
  """Build a list of nic information tuples.
971

972
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
973
  value in LUQueryInstanceData.
974

975
  @type lu:  L{LogicalUnit}
976
  @param lu: the logical unit on whose behalf we execute
977
  @type nics: list of L{objects.NIC}
978
  @param nics: list of nics to convert to hooks tuples
979

980
  """
981
  hooks_nics = []
982
  cluster = lu.cfg.GetClusterInfo()
983
  for nic in nics:
984
    ip = nic.ip
985
    mac = nic.mac
986
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
987
    mode = filled_params[constants.NIC_MODE]
988
    link = filled_params[constants.NIC_LINK]
989
    hooks_nics.append((ip, mac, mode, link))
990
  return hooks_nics
991

    
992

    
993
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
994
  """Builds instance related env variables for hooks from an object.
995

996
  @type lu: L{LogicalUnit}
997
  @param lu: the logical unit on whose behalf we execute
998
  @type instance: L{objects.Instance}
999
  @param instance: the instance for which we should build the
1000
      environment
1001
  @type override: dict
1002
  @param override: dictionary with key/values that will override
1003
      our values
1004
  @rtype: dict
1005
  @return: the hook environment dictionary
1006

1007
  """
1008
  cluster = lu.cfg.GetClusterInfo()
1009
  bep = cluster.FillBE(instance)
1010
  hvp = cluster.FillHV(instance)
1011
  args = {
1012
    'name': instance.name,
1013
    'primary_node': instance.primary_node,
1014
    'secondary_nodes': instance.secondary_nodes,
1015
    'os_type': instance.os,
1016
    'status': instance.admin_up,
1017
    'memory': bep[constants.BE_MEMORY],
1018
    'vcpus': bep[constants.BE_VCPUS],
1019
    'nics': _NICListToTuple(lu, instance.nics),
1020
    'disk_template': instance.disk_template,
1021
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1022
    'bep': bep,
1023
    'hvp': hvp,
1024
    'hypervisor_name': instance.hypervisor,
1025
  }
1026
  if override:
1027
    args.update(override)
1028
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1029

    
1030

    
1031
def _AdjustCandidatePool(lu, exceptions):
1032
  """Adjust the candidate pool after node operations.
1033

1034
  """
1035
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1036
  if mod_list:
1037
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1038
               utils.CommaJoin(node.name for node in mod_list))
1039
    for name in mod_list:
1040
      lu.context.ReaddNode(name)
1041
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1042
  if mc_now > mc_max:
1043
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1044
               (mc_now, mc_max))
1045

    
1046

    
1047
def _DecideSelfPromotion(lu, exceptions=None):
1048
  """Decide whether I should promote myself as a master candidate.
1049

1050
  """
1051
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1052
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1053
  # the new node will increase mc_max with one, so:
1054
  mc_should = min(mc_should + 1, cp_size)
1055
  return mc_now < mc_should
1056

    
1057

    
1058
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1059
  """Check that the brigdes needed by a list of nics exist.
1060

1061
  """
1062
  cluster = lu.cfg.GetClusterInfo()
1063
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1064
  brlist = [params[constants.NIC_LINK] for params in paramslist
1065
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1066
  if brlist:
1067
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1068
    result.Raise("Error checking bridges on destination node '%s'" %
1069
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1070

    
1071

    
1072
def _CheckInstanceBridgesExist(lu, instance, node=None):
1073
  """Check that the brigdes needed by an instance exist.
1074

1075
  """
1076
  if node is None:
1077
    node = instance.primary_node
1078
  _CheckNicsBridgesExist(lu, instance.nics, node)
1079

    
1080

    
1081
def _CheckOSVariant(os_obj, name):
1082
  """Check whether an OS name conforms to the os variants specification.
1083

1084
  @type os_obj: L{objects.OS}
1085
  @param os_obj: OS object to check
1086
  @type name: string
1087
  @param name: OS name passed by the user, to check for validity
1088

1089
  """
1090
  if not os_obj.supported_variants:
1091
    return
1092
  variant = objects.OS.GetVariant(name)
1093
  if not variant:
1094
    raise errors.OpPrereqError("OS name must include a variant",
1095
                               errors.ECODE_INVAL)
1096

    
1097
  if variant not in os_obj.supported_variants:
1098
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1099

    
1100

    
1101
def _GetNodeInstancesInner(cfg, fn):
1102
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1103

    
1104

    
1105
def _GetNodeInstances(cfg, node_name):
1106
  """Returns a list of all primary and secondary instances on a node.
1107

1108
  """
1109

    
1110
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1111

    
1112

    
1113
def _GetNodePrimaryInstances(cfg, node_name):
1114
  """Returns primary instances on a node.
1115

1116
  """
1117
  return _GetNodeInstancesInner(cfg,
1118
                                lambda inst: node_name == inst.primary_node)
1119

    
1120

    
1121
def _GetNodeSecondaryInstances(cfg, node_name):
1122
  """Returns secondary instances on a node.
1123

1124
  """
1125
  return _GetNodeInstancesInner(cfg,
1126
                                lambda inst: node_name in inst.secondary_nodes)
1127

    
1128

    
1129
def _GetStorageTypeArgs(cfg, storage_type):
1130
  """Returns the arguments for a storage type.
1131

1132
  """
1133
  # Special case for file storage
1134
  if storage_type == constants.ST_FILE:
1135
    # storage.FileStorage wants a list of storage directories
1136
    return [[cfg.GetFileStorageDir()]]
1137

    
1138
  return []
1139

    
1140

    
1141
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1142
  faulty = []
1143

    
1144
  for dev in instance.disks:
1145
    cfg.SetDiskID(dev, node_name)
1146

    
1147
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1148
  result.Raise("Failed to get disk status from node %s" % node_name,
1149
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1150

    
1151
  for idx, bdev_status in enumerate(result.payload):
1152
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1153
      faulty.append(idx)
1154

    
1155
  return faulty
1156

    
1157

    
1158
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1159
  """Check the sanity of iallocator and node arguments and use the
1160
  cluster-wide iallocator if appropriate.
1161

1162
  Check that at most one of (iallocator, node) is specified. If none is
1163
  specified, then the LU's opcode's iallocator slot is filled with the
1164
  cluster-wide default iallocator.
1165

1166
  @type iallocator_slot: string
1167
  @param iallocator_slot: the name of the opcode iallocator slot
1168
  @type node_slot: string
1169
  @param node_slot: the name of the opcode target node slot
1170

1171
  """
1172
  node = getattr(lu.op, node_slot, None)
1173
  iallocator = getattr(lu.op, iallocator_slot, None)
1174

    
1175
  if node is not None and iallocator is not None:
1176
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1177
                               errors.ECODE_INVAL)
1178
  elif node is None and iallocator is None:
1179
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1180
    if default_iallocator:
1181
      setattr(lu.op, iallocator_slot, default_iallocator)
1182
    else:
1183
      raise errors.OpPrereqError("No iallocator or node given and no"
1184
                                 " cluster-wide default iallocator found."
1185
                                 " Please specify either an iallocator or a"
1186
                                 " node, or set a cluster-wide default"
1187
                                 " iallocator.")
1188

    
1189

    
1190
class LUPostInitCluster(LogicalUnit):
1191
  """Logical unit for running hooks after cluster initialization.
1192

1193
  """
1194
  HPATH = "cluster-init"
1195
  HTYPE = constants.HTYPE_CLUSTER
1196

    
1197
  def BuildHooksEnv(self):
1198
    """Build hooks env.
1199

1200
    """
1201
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1202
    mn = self.cfg.GetMasterNode()
1203
    return env, [], [mn]
1204

    
1205
  def Exec(self, feedback_fn):
1206
    """Nothing to do.
1207

1208
    """
1209
    return True
1210

    
1211

    
1212
class LUDestroyCluster(LogicalUnit):
1213
  """Logical unit for destroying the cluster.
1214

1215
  """
1216
  HPATH = "cluster-destroy"
1217
  HTYPE = constants.HTYPE_CLUSTER
1218

    
1219
  def BuildHooksEnv(self):
1220
    """Build hooks env.
1221

1222
    """
1223
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1224
    return env, [], []
1225

    
1226
  def CheckPrereq(self):
1227
    """Check prerequisites.
1228

1229
    This checks whether the cluster is empty.
1230

1231
    Any errors are signaled by raising errors.OpPrereqError.
1232

1233
    """
1234
    master = self.cfg.GetMasterNode()
1235

    
1236
    nodelist = self.cfg.GetNodeList()
1237
    if len(nodelist) != 1 or nodelist[0] != master:
1238
      raise errors.OpPrereqError("There are still %d node(s) in"
1239
                                 " this cluster." % (len(nodelist) - 1),
1240
                                 errors.ECODE_INVAL)
1241
    instancelist = self.cfg.GetInstanceList()
1242
    if instancelist:
1243
      raise errors.OpPrereqError("There are still %d instance(s) in"
1244
                                 " this cluster." % len(instancelist),
1245
                                 errors.ECODE_INVAL)
1246

    
1247
  def Exec(self, feedback_fn):
1248
    """Destroys the cluster.
1249

1250
    """
1251
    master = self.cfg.GetMasterNode()
1252

    
1253
    # Run post hooks on master node before it's removed
1254
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1255
    try:
1256
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1257
    except:
1258
      # pylint: disable-msg=W0702
1259
      self.LogWarning("Errors occurred running hooks on %s" % master)
1260

    
1261
    result = self.rpc.call_node_stop_master(master, False)
1262
    result.Raise("Could not disable the master role")
1263

    
1264
    return master
1265

    
1266

    
1267
def _VerifyCertificate(filename):
1268
  """Verifies a certificate for LUVerifyCluster.
1269

1270
  @type filename: string
1271
  @param filename: Path to PEM file
1272

1273
  """
1274
  try:
1275
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1276
                                           utils.ReadFile(filename))
1277
  except Exception, err: # pylint: disable-msg=W0703
1278
    return (LUVerifyCluster.ETYPE_ERROR,
1279
            "Failed to load X509 certificate %s: %s" % (filename, err))
1280

    
1281
  (errcode, msg) = \
1282
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1283
                                constants.SSL_CERT_EXPIRATION_ERROR)
1284

    
1285
  if msg:
1286
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1287
  else:
1288
    fnamemsg = None
1289

    
1290
  if errcode is None:
1291
    return (None, fnamemsg)
1292
  elif errcode == utils.CERT_WARNING:
1293
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1294
  elif errcode == utils.CERT_ERROR:
1295
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1296

    
1297
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1298

    
1299

    
1300
class LUVerifyCluster(LogicalUnit):
1301
  """Verifies the cluster status.
1302

1303
  """
1304
  HPATH = "cluster-verify"
1305
  HTYPE = constants.HTYPE_CLUSTER
1306
  _OP_PARAMS = [
1307
    ("skip_checks", ht.EmptyList,
1308
     ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1309
    ("verbose", False, ht.TBool),
1310
    ("error_codes", False, ht.TBool),
1311
    ("debug_simulate_errors", False, ht.TBool),
1312
    ]
1313
  REQ_BGL = False
1314

    
1315
  TCLUSTER = "cluster"
1316
  TNODE = "node"
1317
  TINSTANCE = "instance"
1318

    
1319
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1320
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1321
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1322
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1323
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1324
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1325
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1326
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1327
  ENODEDRBD = (TNODE, "ENODEDRBD")
1328
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1329
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1330
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1331
  ENODEHV = (TNODE, "ENODEHV")
1332
  ENODELVM = (TNODE, "ENODELVM")
1333
  ENODEN1 = (TNODE, "ENODEN1")
1334
  ENODENET = (TNODE, "ENODENET")
1335
  ENODEOS = (TNODE, "ENODEOS")
1336
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1337
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1338
  ENODERPC = (TNODE, "ENODERPC")
1339
  ENODESSH = (TNODE, "ENODESSH")
1340
  ENODEVERSION = (TNODE, "ENODEVERSION")
1341
  ENODESETUP = (TNODE, "ENODESETUP")
1342
  ENODETIME = (TNODE, "ENODETIME")
1343

    
1344
  ETYPE_FIELD = "code"
1345
  ETYPE_ERROR = "ERROR"
1346
  ETYPE_WARNING = "WARNING"
1347

    
1348
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1349

    
1350
  class NodeImage(object):
1351
    """A class representing the logical and physical status of a node.
1352

1353
    @type name: string
1354
    @ivar name: the node name to which this object refers
1355
    @ivar volumes: a structure as returned from
1356
        L{ganeti.backend.GetVolumeList} (runtime)
1357
    @ivar instances: a list of running instances (runtime)
1358
    @ivar pinst: list of configured primary instances (config)
1359
    @ivar sinst: list of configured secondary instances (config)
1360
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1361
        of this node (config)
1362
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1363
    @ivar dfree: free disk, as reported by the node (runtime)
1364
    @ivar offline: the offline status (config)
1365
    @type rpc_fail: boolean
1366
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1367
        not whether the individual keys were correct) (runtime)
1368
    @type lvm_fail: boolean
1369
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1370
    @type hyp_fail: boolean
1371
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1372
    @type ghost: boolean
1373
    @ivar ghost: whether this is a known node or not (config)
1374
    @type os_fail: boolean
1375
    @ivar os_fail: whether the RPC call didn't return valid OS data
1376
    @type oslist: list
1377
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1378
    @type vm_capable: boolean
1379
    @ivar vm_capable: whether the node can host instances
1380

1381
    """
1382
    def __init__(self, offline=False, name=None, vm_capable=True):
1383
      self.name = name
1384
      self.volumes = {}
1385
      self.instances = []
1386
      self.pinst = []
1387
      self.sinst = []
1388
      self.sbp = {}
1389
      self.mfree = 0
1390
      self.dfree = 0
1391
      self.offline = offline
1392
      self.vm_capable = vm_capable
1393
      self.rpc_fail = False
1394
      self.lvm_fail = False
1395
      self.hyp_fail = False
1396
      self.ghost = False
1397
      self.os_fail = False
1398
      self.oslist = {}
1399

    
1400
  def ExpandNames(self):
1401
    self.needed_locks = {
1402
      locking.LEVEL_NODE: locking.ALL_SET,
1403
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1404
    }
1405
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1406

    
1407
  def _Error(self, ecode, item, msg, *args, **kwargs):
1408
    """Format an error message.
1409

1410
    Based on the opcode's error_codes parameter, either format a
1411
    parseable error code, or a simpler error string.
1412

1413
    This must be called only from Exec and functions called from Exec.
1414

1415
    """
1416
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1417
    itype, etxt = ecode
1418
    # first complete the msg
1419
    if args:
1420
      msg = msg % args
1421
    # then format the whole message
1422
    if self.op.error_codes:
1423
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1424
    else:
1425
      if item:
1426
        item = " " + item
1427
      else:
1428
        item = ""
1429
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1430
    # and finally report it via the feedback_fn
1431
    self._feedback_fn("  - %s" % msg)
1432

    
1433
  def _ErrorIf(self, cond, *args, **kwargs):
1434
    """Log an error message if the passed condition is True.
1435

1436
    """
1437
    cond = bool(cond) or self.op.debug_simulate_errors
1438
    if cond:
1439
      self._Error(*args, **kwargs)
1440
    # do not mark the operation as failed for WARN cases only
1441
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1442
      self.bad = self.bad or cond
1443

    
1444
  def _VerifyNode(self, ninfo, nresult):
1445
    """Perform some basic validation on data returned from a node.
1446

1447
      - check the result data structure is well formed and has all the
1448
        mandatory fields
1449
      - check ganeti version
1450

1451
    @type ninfo: L{objects.Node}
1452
    @param ninfo: the node to check
1453
    @param nresult: the results from the node
1454
    @rtype: boolean
1455
    @return: whether overall this call was successful (and we can expect
1456
         reasonable values in the respose)
1457

1458
    """
1459
    node = ninfo.name
1460
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1461

    
1462
    # main result, nresult should be a non-empty dict
1463
    test = not nresult or not isinstance(nresult, dict)
1464
    _ErrorIf(test, self.ENODERPC, node,
1465
                  "unable to verify node: no data returned")
1466
    if test:
1467
      return False
1468

    
1469
    # compares ganeti version
1470
    local_version = constants.PROTOCOL_VERSION
1471
    remote_version = nresult.get("version", None)
1472
    test = not (remote_version and
1473
                isinstance(remote_version, (list, tuple)) and
1474
                len(remote_version) == 2)
1475
    _ErrorIf(test, self.ENODERPC, node,
1476
             "connection to node returned invalid data")
1477
    if test:
1478
      return False
1479

    
1480
    test = local_version != remote_version[0]
1481
    _ErrorIf(test, self.ENODEVERSION, node,
1482
             "incompatible protocol versions: master %s,"
1483
             " node %s", local_version, remote_version[0])
1484
    if test:
1485
      return False
1486

    
1487
    # node seems compatible, we can actually try to look into its results
1488

    
1489
    # full package version
1490
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1491
                  self.ENODEVERSION, node,
1492
                  "software version mismatch: master %s, node %s",
1493
                  constants.RELEASE_VERSION, remote_version[1],
1494
                  code=self.ETYPE_WARNING)
1495

    
1496
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1497
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1498
      for hv_name, hv_result in hyp_result.iteritems():
1499
        test = hv_result is not None
1500
        _ErrorIf(test, self.ENODEHV, node,
1501
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1502

    
1503
    test = nresult.get(constants.NV_NODESETUP,
1504
                           ["Missing NODESETUP results"])
1505
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1506
             "; ".join(test))
1507

    
1508
    return True
1509

    
1510
  def _VerifyNodeTime(self, ninfo, nresult,
1511
                      nvinfo_starttime, nvinfo_endtime):
1512
    """Check the node time.
1513

1514
    @type ninfo: L{objects.Node}
1515
    @param ninfo: the node to check
1516
    @param nresult: the remote results for the node
1517
    @param nvinfo_starttime: the start time of the RPC call
1518
    @param nvinfo_endtime: the end time of the RPC call
1519

1520
    """
1521
    node = ninfo.name
1522
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1523

    
1524
    ntime = nresult.get(constants.NV_TIME, None)
1525
    try:
1526
      ntime_merged = utils.MergeTime(ntime)
1527
    except (ValueError, TypeError):
1528
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1529
      return
1530

    
1531
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1532
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1533
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1534
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1535
    else:
1536
      ntime_diff = None
1537

    
1538
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1539
             "Node time diverges by at least %s from master node time",
1540
             ntime_diff)
1541

    
1542
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1543
    """Check the node time.
1544

1545
    @type ninfo: L{objects.Node}
1546
    @param ninfo: the node to check
1547
    @param nresult: the remote results for the node
1548
    @param vg_name: the configured VG name
1549

1550
    """
1551
    if vg_name is None:
1552
      return
1553

    
1554
    node = ninfo.name
1555
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1556

    
1557
    # checks vg existence and size > 20G
1558
    vglist = nresult.get(constants.NV_VGLIST, None)
1559
    test = not vglist
1560
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1561
    if not test:
1562
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1563
                                            constants.MIN_VG_SIZE)
1564
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1565

    
1566
    # check pv names
1567
    pvlist = nresult.get(constants.NV_PVLIST, None)
1568
    test = pvlist is None
1569
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1570
    if not test:
1571
      # check that ':' is not present in PV names, since it's a
1572
      # special character for lvcreate (denotes the range of PEs to
1573
      # use on the PV)
1574
      for _, pvname, owner_vg in pvlist:
1575
        test = ":" in pvname
1576
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1577
                 " '%s' of VG '%s'", pvname, owner_vg)
1578

    
1579
  def _VerifyNodeNetwork(self, ninfo, nresult):
1580
    """Check the node time.
1581

1582
    @type ninfo: L{objects.Node}
1583
    @param ninfo: the node to check
1584
    @param nresult: the remote results for the node
1585

1586
    """
1587
    node = ninfo.name
1588
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1589

    
1590
    test = constants.NV_NODELIST not in nresult
1591
    _ErrorIf(test, self.ENODESSH, node,
1592
             "node hasn't returned node ssh connectivity data")
1593
    if not test:
1594
      if nresult[constants.NV_NODELIST]:
1595
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1596
          _ErrorIf(True, self.ENODESSH, node,
1597
                   "ssh communication with node '%s': %s", a_node, a_msg)
1598

    
1599
    test = constants.NV_NODENETTEST not in nresult
1600
    _ErrorIf(test, self.ENODENET, node,
1601
             "node hasn't returned node tcp connectivity data")
1602
    if not test:
1603
      if nresult[constants.NV_NODENETTEST]:
1604
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1605
        for anode in nlist:
1606
          _ErrorIf(True, self.ENODENET, node,
1607
                   "tcp communication with node '%s': %s",
1608
                   anode, nresult[constants.NV_NODENETTEST][anode])
1609

    
1610
    test = constants.NV_MASTERIP not in nresult
1611
    _ErrorIf(test, self.ENODENET, node,
1612
             "node hasn't returned node master IP reachability data")
1613
    if not test:
1614
      if not nresult[constants.NV_MASTERIP]:
1615
        if node == self.master_node:
1616
          msg = "the master node cannot reach the master IP (not configured?)"
1617
        else:
1618
          msg = "cannot reach the master IP"
1619
        _ErrorIf(True, self.ENODENET, node, msg)
1620

    
1621
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1622
                      diskstatus):
1623
    """Verify an instance.
1624

1625
    This function checks to see if the required block devices are
1626
    available on the instance's node.
1627

1628
    """
1629
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1630
    node_current = instanceconfig.primary_node
1631

    
1632
    node_vol_should = {}
1633
    instanceconfig.MapLVsByNode(node_vol_should)
1634

    
1635
    for node in node_vol_should:
1636
      n_img = node_image[node]
1637
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1638
        # ignore missing volumes on offline or broken nodes
1639
        continue
1640
      for volume in node_vol_should[node]:
1641
        test = volume not in n_img.volumes
1642
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1643
                 "volume %s missing on node %s", volume, node)
1644

    
1645
    if instanceconfig.admin_up:
1646
      pri_img = node_image[node_current]
1647
      test = instance not in pri_img.instances and not pri_img.offline
1648
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1649
               "instance not running on its primary node %s",
1650
               node_current)
1651

    
1652
    for node, n_img in node_image.items():
1653
      if (not node == node_current):
1654
        test = instance in n_img.instances
1655
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1656
                 "instance should not run on node %s", node)
1657

    
1658
    diskdata = [(nname, success, status, idx)
1659
                for (nname, disks) in diskstatus.items()
1660
                for idx, (success, status) in enumerate(disks)]
1661

    
1662
    for nname, success, bdev_status, idx in diskdata:
1663
      _ErrorIf(instanceconfig.admin_up and not success,
1664
               self.EINSTANCEFAULTYDISK, instance,
1665
               "couldn't retrieve status for disk/%s on %s: %s",
1666
               idx, nname, bdev_status)
1667
      _ErrorIf((instanceconfig.admin_up and success and
1668
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1669
               self.EINSTANCEFAULTYDISK, instance,
1670
               "disk/%s on %s is faulty", idx, nname)
1671

    
1672
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1673
    """Verify if there are any unknown volumes in the cluster.
1674

1675
    The .os, .swap and backup volumes are ignored. All other volumes are
1676
    reported as unknown.
1677

1678
    @type reserved: L{ganeti.utils.FieldSet}
1679
    @param reserved: a FieldSet of reserved volume names
1680

1681
    """
1682
    for node, n_img in node_image.items():
1683
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1684
        # skip non-healthy nodes
1685
        continue
1686
      for volume in n_img.volumes:
1687
        test = ((node not in node_vol_should or
1688
                volume not in node_vol_should[node]) and
1689
                not reserved.Matches(volume))
1690
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1691
                      "volume %s is unknown", volume)
1692

    
1693
  def _VerifyOrphanInstances(self, instancelist, node_image):
1694
    """Verify the list of running instances.
1695

1696
    This checks what instances are running but unknown to the cluster.
1697

1698
    """
1699
    for node, n_img in node_image.items():
1700
      for o_inst in n_img.instances:
1701
        test = o_inst not in instancelist
1702
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1703
                      "instance %s on node %s should not exist", o_inst, node)
1704

    
1705
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1706
    """Verify N+1 Memory Resilience.
1707

1708
    Check that if one single node dies we can still start all the
1709
    instances it was primary for.
1710

1711
    """
1712
    for node, n_img in node_image.items():
1713
      # This code checks that every node which is now listed as
1714
      # secondary has enough memory to host all instances it is
1715
      # supposed to should a single other node in the cluster fail.
1716
      # FIXME: not ready for failover to an arbitrary node
1717
      # FIXME: does not support file-backed instances
1718
      # WARNING: we currently take into account down instances as well
1719
      # as up ones, considering that even if they're down someone
1720
      # might want to start them even in the event of a node failure.
1721
      for prinode, instances in n_img.sbp.items():
1722
        needed_mem = 0
1723
        for instance in instances:
1724
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1725
          if bep[constants.BE_AUTO_BALANCE]:
1726
            needed_mem += bep[constants.BE_MEMORY]
1727
        test = n_img.mfree < needed_mem
1728
        self._ErrorIf(test, self.ENODEN1, node,
1729
                      "not enough memory to accomodate instance failovers"
1730
                      " should node %s fail", prinode)
1731

    
1732
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1733
                       master_files):
1734
    """Verifies and computes the node required file checksums.
1735

1736
    @type ninfo: L{objects.Node}
1737
    @param ninfo: the node to check
1738
    @param nresult: the remote results for the node
1739
    @param file_list: required list of files
1740
    @param local_cksum: dictionary of local files and their checksums
1741
    @param master_files: list of files that only masters should have
1742

1743
    """
1744
    node = ninfo.name
1745
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1746

    
1747
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1748
    test = not isinstance(remote_cksum, dict)
1749
    _ErrorIf(test, self.ENODEFILECHECK, node,
1750
             "node hasn't returned file checksum data")
1751
    if test:
1752
      return
1753

    
1754
    for file_name in file_list:
1755
      node_is_mc = ninfo.master_candidate
1756
      must_have = (file_name not in master_files) or node_is_mc
1757
      # missing
1758
      test1 = file_name not in remote_cksum
1759
      # invalid checksum
1760
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1761
      # existing and good
1762
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1763
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1764
               "file '%s' missing", file_name)
1765
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1766
               "file '%s' has wrong checksum", file_name)
1767
      # not candidate and this is not a must-have file
1768
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1769
               "file '%s' should not exist on non master"
1770
               " candidates (and the file is outdated)", file_name)
1771
      # all good, except non-master/non-must have combination
1772
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1773
               "file '%s' should not exist"
1774
               " on non master candidates", file_name)
1775

    
1776
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1777
                      drbd_map):
1778
    """Verifies and the node DRBD status.
1779

1780
    @type ninfo: L{objects.Node}
1781
    @param ninfo: the node to check
1782
    @param nresult: the remote results for the node
1783
    @param instanceinfo: the dict of instances
1784
    @param drbd_helper: the configured DRBD usermode helper
1785
    @param drbd_map: the DRBD map as returned by
1786
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1787

1788
    """
1789
    node = ninfo.name
1790
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1791

    
1792
    if drbd_helper:
1793
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1794
      test = (helper_result == None)
1795
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1796
               "no drbd usermode helper returned")
1797
      if helper_result:
1798
        status, payload = helper_result
1799
        test = not status
1800
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1801
                 "drbd usermode helper check unsuccessful: %s", payload)
1802
        test = status and (payload != drbd_helper)
1803
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1804
                 "wrong drbd usermode helper: %s", payload)
1805

    
1806
    # compute the DRBD minors
1807
    node_drbd = {}
1808
    for minor, instance in drbd_map[node].items():
1809
      test = instance not in instanceinfo
1810
      _ErrorIf(test, self.ECLUSTERCFG, None,
1811
               "ghost instance '%s' in temporary DRBD map", instance)
1812
        # ghost instance should not be running, but otherwise we
1813
        # don't give double warnings (both ghost instance and
1814
        # unallocated minor in use)
1815
      if test:
1816
        node_drbd[minor] = (instance, False)
1817
      else:
1818
        instance = instanceinfo[instance]
1819
        node_drbd[minor] = (instance.name, instance.admin_up)
1820

    
1821
    # and now check them
1822
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1823
    test = not isinstance(used_minors, (tuple, list))
1824
    _ErrorIf(test, self.ENODEDRBD, node,
1825
             "cannot parse drbd status file: %s", str(used_minors))
1826
    if test:
1827
      # we cannot check drbd status
1828
      return
1829

    
1830
    for minor, (iname, must_exist) in node_drbd.items():
1831
      test = minor not in used_minors and must_exist
1832
      _ErrorIf(test, self.ENODEDRBD, node,
1833
               "drbd minor %d of instance %s is not active", minor, iname)
1834
    for minor in used_minors:
1835
      test = minor not in node_drbd
1836
      _ErrorIf(test, self.ENODEDRBD, node,
1837
               "unallocated drbd minor %d is in use", minor)
1838

    
1839
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1840
    """Builds the node OS structures.
1841

1842
    @type ninfo: L{objects.Node}
1843
    @param ninfo: the node to check
1844
    @param nresult: the remote results for the node
1845
    @param nimg: the node image object
1846

1847
    """
1848
    node = ninfo.name
1849
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1850

    
1851
    remote_os = nresult.get(constants.NV_OSLIST, None)
1852
    test = (not isinstance(remote_os, list) or
1853
            not compat.all(isinstance(v, list) and len(v) == 7
1854
                           for v in remote_os))
1855

    
1856
    _ErrorIf(test, self.ENODEOS, node,
1857
             "node hasn't returned valid OS data")
1858

    
1859
    nimg.os_fail = test
1860

    
1861
    if test:
1862
      return
1863

    
1864
    os_dict = {}
1865

    
1866
    for (name, os_path, status, diagnose,
1867
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1868

    
1869
      if name not in os_dict:
1870
        os_dict[name] = []
1871

    
1872
      # parameters is a list of lists instead of list of tuples due to
1873
      # JSON lacking a real tuple type, fix it:
1874
      parameters = [tuple(v) for v in parameters]
1875
      os_dict[name].append((os_path, status, diagnose,
1876
                            set(variants), set(parameters), set(api_ver)))
1877

    
1878
    nimg.oslist = os_dict
1879

    
1880
  def _VerifyNodeOS(self, ninfo, nimg, base):
1881
    """Verifies the node OS list.
1882

1883
    @type ninfo: L{objects.Node}
1884
    @param ninfo: the node to check
1885
    @param nimg: the node image object
1886
    @param base: the 'template' node we match against (e.g. from the master)
1887

1888
    """
1889
    node = ninfo.name
1890
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1891

    
1892
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1893

    
1894
    for os_name, os_data in nimg.oslist.items():
1895
      assert os_data, "Empty OS status for OS %s?!" % os_name
1896
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1897
      _ErrorIf(not f_status, self.ENODEOS, node,
1898
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1899
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1900
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1901
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1902
      # this will catched in backend too
1903
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1904
               and not f_var, self.ENODEOS, node,
1905
               "OS %s with API at least %d does not declare any variant",
1906
               os_name, constants.OS_API_V15)
1907
      # comparisons with the 'base' image
1908
      test = os_name not in base.oslist
1909
      _ErrorIf(test, self.ENODEOS, node,
1910
               "Extra OS %s not present on reference node (%s)",
1911
               os_name, base.name)
1912
      if test:
1913
        continue
1914
      assert base.oslist[os_name], "Base node has empty OS status?"
1915
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1916
      if not b_status:
1917
        # base OS is invalid, skipping
1918
        continue
1919
      for kind, a, b in [("API version", f_api, b_api),
1920
                         ("variants list", f_var, b_var),
1921
                         ("parameters", f_param, b_param)]:
1922
        _ErrorIf(a != b, self.ENODEOS, node,
1923
                 "OS %s %s differs from reference node %s: %s vs. %s",
1924
                 kind, os_name, base.name,
1925
                 utils.CommaJoin(a), utils.CommaJoin(b))
1926

    
1927
    # check any missing OSes
1928
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1929
    _ErrorIf(missing, self.ENODEOS, node,
1930
             "OSes present on reference node %s but missing on this node: %s",
1931
             base.name, utils.CommaJoin(missing))
1932

    
1933
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1934
    """Verifies and updates the node volume data.
1935

1936
    This function will update a L{NodeImage}'s internal structures
1937
    with data from the remote call.
1938

1939
    @type ninfo: L{objects.Node}
1940
    @param ninfo: the node to check
1941
    @param nresult: the remote results for the node
1942
    @param nimg: the node image object
1943
    @param vg_name: the configured VG name
1944

1945
    """
1946
    node = ninfo.name
1947
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1948

    
1949
    nimg.lvm_fail = True
1950
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1951
    if vg_name is None:
1952
      pass
1953
    elif isinstance(lvdata, basestring):
1954
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1955
               utils.SafeEncode(lvdata))
1956
    elif not isinstance(lvdata, dict):
1957
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1958
    else:
1959
      nimg.volumes = lvdata
1960
      nimg.lvm_fail = False
1961

    
1962
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1963
    """Verifies and updates the node instance list.
1964

1965
    If the listing was successful, then updates this node's instance
1966
    list. Otherwise, it marks the RPC call as failed for the instance
1967
    list key.
1968

1969
    @type ninfo: L{objects.Node}
1970
    @param ninfo: the node to check
1971
    @param nresult: the remote results for the node
1972
    @param nimg: the node image object
1973

1974
    """
1975
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1976
    test = not isinstance(idata, list)
1977
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1978
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1979
    if test:
1980
      nimg.hyp_fail = True
1981
    else:
1982
      nimg.instances = idata
1983

    
1984
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1985
    """Verifies and computes a node information map
1986

1987
    @type ninfo: L{objects.Node}
1988
    @param ninfo: the node to check
1989
    @param nresult: the remote results for the node
1990
    @param nimg: the node image object
1991
    @param vg_name: the configured VG name
1992

1993
    """
1994
    node = ninfo.name
1995
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1996

    
1997
    # try to read free memory (from the hypervisor)
1998
    hv_info = nresult.get(constants.NV_HVINFO, None)
1999
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2000
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2001
    if not test:
2002
      try:
2003
        nimg.mfree = int(hv_info["memory_free"])
2004
      except (ValueError, TypeError):
2005
        _ErrorIf(True, self.ENODERPC, node,
2006
                 "node returned invalid nodeinfo, check hypervisor")
2007

    
2008
    # FIXME: devise a free space model for file based instances as well
2009
    if vg_name is not None:
2010
      test = (constants.NV_VGLIST not in nresult or
2011
              vg_name not in nresult[constants.NV_VGLIST])
2012
      _ErrorIf(test, self.ENODELVM, node,
2013
               "node didn't return data for the volume group '%s'"
2014
               " - it is either missing or broken", vg_name)
2015
      if not test:
2016
        try:
2017
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2018
        except (ValueError, TypeError):
2019
          _ErrorIf(True, self.ENODERPC, node,
2020
                   "node returned invalid LVM info, check LVM status")
2021

    
2022
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2023
    """Gets per-disk status information for all instances.
2024

2025
    @type nodelist: list of strings
2026
    @param nodelist: Node names
2027
    @type node_image: dict of (name, L{objects.Node})
2028
    @param node_image: Node objects
2029
    @type instanceinfo: dict of (name, L{objects.Instance})
2030
    @param instanceinfo: Instance objects
2031
    @rtype: {instance: {node: [(succes, payload)]}}
2032
    @return: a dictionary of per-instance dictionaries with nodes as
2033
        keys and disk information as values; the disk information is a
2034
        list of tuples (success, payload)
2035

2036
    """
2037
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2038

    
2039
    node_disks = {}
2040
    node_disks_devonly = {}
2041
    diskless_instances = set()
2042
    diskless = constants.DT_DISKLESS
2043

    
2044
    for nname in nodelist:
2045
      node_instances = list(itertools.chain(node_image[nname].pinst,
2046
                                            node_image[nname].sinst))
2047
      diskless_instances.update(inst for inst in node_instances
2048
                                if instanceinfo[inst].disk_template == diskless)
2049
      disks = [(inst, disk)
2050
               for inst in node_instances
2051
               for disk in instanceinfo[inst].disks]
2052

    
2053
      if not disks:
2054
        # No need to collect data
2055
        continue
2056

    
2057
      node_disks[nname] = disks
2058

    
2059
      # Creating copies as SetDiskID below will modify the objects and that can
2060
      # lead to incorrect data returned from nodes
2061
      devonly = [dev.Copy() for (_, dev) in disks]
2062

    
2063
      for dev in devonly:
2064
        self.cfg.SetDiskID(dev, nname)
2065

    
2066
      node_disks_devonly[nname] = devonly
2067

    
2068
    assert len(node_disks) == len(node_disks_devonly)
2069

    
2070
    # Collect data from all nodes with disks
2071
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2072
                                                          node_disks_devonly)
2073

    
2074
    assert len(result) == len(node_disks)
2075

    
2076
    instdisk = {}
2077

    
2078
    for (nname, nres) in result.items():
2079
      disks = node_disks[nname]
2080

    
2081
      if nres.offline:
2082
        # No data from this node
2083
        data = len(disks) * [(False, "node offline")]
2084
      else:
2085
        msg = nres.fail_msg
2086
        _ErrorIf(msg, self.ENODERPC, nname,
2087
                 "while getting disk information: %s", msg)
2088
        if msg:
2089
          # No data from this node
2090
          data = len(disks) * [(False, msg)]
2091
        else:
2092
          data = []
2093
          for idx, i in enumerate(nres.payload):
2094
            if isinstance(i, (tuple, list)) and len(i) == 2:
2095
              data.append(i)
2096
            else:
2097
              logging.warning("Invalid result from node %s, entry %d: %s",
2098
                              nname, idx, i)
2099
              data.append((False, "Invalid result from the remote node"))
2100

    
2101
      for ((inst, _), status) in zip(disks, data):
2102
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2103

    
2104
    # Add empty entries for diskless instances.
2105
    for inst in diskless_instances:
2106
      assert inst not in instdisk
2107
      instdisk[inst] = {}
2108

    
2109
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2110
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2111
                      compat.all(isinstance(s, (tuple, list)) and
2112
                                 len(s) == 2 for s in statuses)
2113
                      for inst, nnames in instdisk.items()
2114
                      for nname, statuses in nnames.items())
2115
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2116

    
2117
    return instdisk
2118

    
2119
  def BuildHooksEnv(self):
2120
    """Build hooks env.
2121

2122
    Cluster-Verify hooks just ran in the post phase and their failure makes
2123
    the output be logged in the verify output and the verification to fail.
2124

2125
    """
2126
    all_nodes = self.cfg.GetNodeList()
2127
    env = {
2128
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2129
      }
2130
    for node in self.cfg.GetAllNodesInfo().values():
2131
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2132

    
2133
    return env, [], all_nodes
2134

    
2135
  def Exec(self, feedback_fn):
2136
    """Verify integrity of cluster, performing various test on nodes.
2137

2138
    """
2139
    self.bad = False
2140
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2141
    verbose = self.op.verbose
2142
    self._feedback_fn = feedback_fn
2143
    feedback_fn("* Verifying global settings")
2144
    for msg in self.cfg.VerifyConfig():
2145
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2146

    
2147
    # Check the cluster certificates
2148
    for cert_filename in constants.ALL_CERT_FILES:
2149
      (errcode, msg) = _VerifyCertificate(cert_filename)
2150
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2151

    
2152
    vg_name = self.cfg.GetVGName()
2153
    drbd_helper = self.cfg.GetDRBDHelper()
2154
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2155
    cluster = self.cfg.GetClusterInfo()
2156
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2157
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2158
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2159
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2160
                        for iname in instancelist)
2161
    i_non_redundant = [] # Non redundant instances
2162
    i_non_a_balanced = [] # Non auto-balanced instances
2163
    n_offline = 0 # Count of offline nodes
2164
    n_drained = 0 # Count of nodes being drained
2165
    node_vol_should = {}
2166

    
2167
    # FIXME: verify OS list
2168
    # do local checksums
2169
    master_files = [constants.CLUSTER_CONF_FILE]
2170
    master_node = self.master_node = self.cfg.GetMasterNode()
2171
    master_ip = self.cfg.GetMasterIP()
2172

    
2173
    file_names = ssconf.SimpleStore().GetFileList()
2174
    file_names.extend(constants.ALL_CERT_FILES)
2175
    file_names.extend(master_files)
2176
    if cluster.modify_etc_hosts:
2177
      file_names.append(constants.ETC_HOSTS)
2178

    
2179
    local_checksums = utils.FingerprintFiles(file_names)
2180

    
2181
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2182
    node_verify_param = {
2183
      constants.NV_FILELIST: file_names,
2184
      constants.NV_NODELIST: [node.name for node in nodeinfo
2185
                              if not node.offline],
2186
      constants.NV_HYPERVISOR: hypervisors,
2187
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2188
                                  node.secondary_ip) for node in nodeinfo
2189
                                 if not node.offline],
2190
      constants.NV_INSTANCELIST: hypervisors,
2191
      constants.NV_VERSION: None,
2192
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2193
      constants.NV_NODESETUP: None,
2194
      constants.NV_TIME: None,
2195
      constants.NV_MASTERIP: (master_node, master_ip),
2196
      constants.NV_OSLIST: None,
2197
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2198
      }
2199

    
2200
    if vg_name is not None:
2201
      node_verify_param[constants.NV_VGLIST] = None
2202
      node_verify_param[constants.NV_LVLIST] = vg_name
2203
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2204
      node_verify_param[constants.NV_DRBDLIST] = None
2205

    
2206
    if drbd_helper:
2207
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2208

    
2209
    # Build our expected cluster state
2210
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2211
                                                 name=node.name,
2212
                                                 vm_capable=node.vm_capable))
2213
                      for node in nodeinfo)
2214

    
2215
    for instance in instancelist:
2216
      inst_config = instanceinfo[instance]
2217

    
2218
      for nname in inst_config.all_nodes:
2219
        if nname not in node_image:
2220
          # ghost node
2221
          gnode = self.NodeImage(name=nname)
2222
          gnode.ghost = True
2223
          node_image[nname] = gnode
2224

    
2225
      inst_config.MapLVsByNode(node_vol_should)
2226

    
2227
      pnode = inst_config.primary_node
2228
      node_image[pnode].pinst.append(instance)
2229

    
2230
      for snode in inst_config.secondary_nodes:
2231
        nimg = node_image[snode]
2232
        nimg.sinst.append(instance)
2233
        if pnode not in nimg.sbp:
2234
          nimg.sbp[pnode] = []
2235
        nimg.sbp[pnode].append(instance)
2236

    
2237
    # At this point, we have the in-memory data structures complete,
2238
    # except for the runtime information, which we'll gather next
2239

    
2240
    # Due to the way our RPC system works, exact response times cannot be
2241
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2242
    # time before and after executing the request, we can at least have a time
2243
    # window.
2244
    nvinfo_starttime = time.time()
2245
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2246
                                           self.cfg.GetClusterName())
2247
    nvinfo_endtime = time.time()
2248

    
2249
    all_drbd_map = self.cfg.ComputeDRBDMap()
2250

    
2251
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2252
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2253

    
2254
    feedback_fn("* Verifying node status")
2255

    
2256
    refos_img = None
2257

    
2258
    for node_i in nodeinfo:
2259
      node = node_i.name
2260
      nimg = node_image[node]
2261

    
2262
      if node_i.offline:
2263
        if verbose:
2264
          feedback_fn("* Skipping offline node %s" % (node,))
2265
        n_offline += 1
2266
        continue
2267

    
2268
      if node == master_node:
2269
        ntype = "master"
2270
      elif node_i.master_candidate:
2271
        ntype = "master candidate"
2272
      elif node_i.drained:
2273
        ntype = "drained"
2274
        n_drained += 1
2275
      else:
2276
        ntype = "regular"
2277
      if verbose:
2278
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2279

    
2280
      msg = all_nvinfo[node].fail_msg
2281
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2282
      if msg:
2283
        nimg.rpc_fail = True
2284
        continue
2285

    
2286
      nresult = all_nvinfo[node].payload
2287

    
2288
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2289
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2290
      self._VerifyNodeNetwork(node_i, nresult)
2291
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2292
                            master_files)
2293

    
2294
      if nimg.vm_capable:
2295
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2296
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2297
                             all_drbd_map)
2298

    
2299
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2300
        self._UpdateNodeInstances(node_i, nresult, nimg)
2301
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2302
        self._UpdateNodeOS(node_i, nresult, nimg)
2303
        if not nimg.os_fail:
2304
          if refos_img is None:
2305
            refos_img = nimg
2306
          self._VerifyNodeOS(node_i, nimg, refos_img)
2307

    
2308
    feedback_fn("* Verifying instance status")
2309
    for instance in instancelist:
2310
      if verbose:
2311
        feedback_fn("* Verifying instance %s" % instance)
2312
      inst_config = instanceinfo[instance]
2313
      self._VerifyInstance(instance, inst_config, node_image,
2314
                           instdisk[instance])
2315
      inst_nodes_offline = []
2316

    
2317
      pnode = inst_config.primary_node
2318
      pnode_img = node_image[pnode]
2319
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2320
               self.ENODERPC, pnode, "instance %s, connection to"
2321
               " primary node failed", instance)
2322

    
2323
      if pnode_img.offline:
2324
        inst_nodes_offline.append(pnode)
2325

    
2326
      # If the instance is non-redundant we cannot survive losing its primary
2327
      # node, so we are not N+1 compliant. On the other hand we have no disk
2328
      # templates with more than one secondary so that situation is not well
2329
      # supported either.
2330
      # FIXME: does not support file-backed instances
2331
      if not inst_config.secondary_nodes:
2332
        i_non_redundant.append(instance)
2333
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2334
               instance, "instance has multiple secondary nodes: %s",
2335
               utils.CommaJoin(inst_config.secondary_nodes),
2336
               code=self.ETYPE_WARNING)
2337

    
2338
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2339
        i_non_a_balanced.append(instance)
2340

    
2341
      for snode in inst_config.secondary_nodes:
2342
        s_img = node_image[snode]
2343
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2344
                 "instance %s, connection to secondary node failed", instance)
2345

    
2346
        if s_img.offline:
2347
          inst_nodes_offline.append(snode)
2348

    
2349
      # warn that the instance lives on offline nodes
2350
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2351
               "instance lives on offline node(s) %s",
2352
               utils.CommaJoin(inst_nodes_offline))
2353
      # ... or ghost/non-vm_capable nodes
2354
      for node in inst_config.all_nodes:
2355
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2356
                 "instance lives on ghost node %s", node)
2357
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2358
                 instance, "instance lives on non-vm_capable node %s", node)
2359

    
2360
    feedback_fn("* Verifying orphan volumes")
2361
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2362
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2363

    
2364
    feedback_fn("* Verifying orphan instances")
2365
    self._VerifyOrphanInstances(instancelist, node_image)
2366

    
2367
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2368
      feedback_fn("* Verifying N+1 Memory redundancy")
2369
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2370

    
2371
    feedback_fn("* Other Notes")
2372
    if i_non_redundant:
2373
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2374
                  % len(i_non_redundant))
2375

    
2376
    if i_non_a_balanced:
2377
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2378
                  % len(i_non_a_balanced))
2379

    
2380
    if n_offline:
2381
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2382

    
2383
    if n_drained:
2384
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2385

    
2386
    return not self.bad
2387

    
2388
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2389
    """Analyze the post-hooks' result
2390

2391
    This method analyses the hook result, handles it, and sends some
2392
    nicely-formatted feedback back to the user.
2393

2394
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2395
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2396
    @param hooks_results: the results of the multi-node hooks rpc call
2397
    @param feedback_fn: function used send feedback back to the caller
2398
    @param lu_result: previous Exec result
2399
    @return: the new Exec result, based on the previous result
2400
        and hook results
2401

2402
    """
2403
    # We only really run POST phase hooks, and are only interested in
2404
    # their results
2405
    if phase == constants.HOOKS_PHASE_POST:
2406
      # Used to change hooks' output to proper indentation
2407
      feedback_fn("* Hooks Results")
2408
      assert hooks_results, "invalid result from hooks"
2409

    
2410
      for node_name in hooks_results:
2411
        res = hooks_results[node_name]
2412
        msg = res.fail_msg
2413
        test = msg and not res.offline
2414
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2415
                      "Communication failure in hooks execution: %s", msg)
2416
        if res.offline or msg:
2417
          # No need to investigate payload if node is offline or gave an error.
2418
          # override manually lu_result here as _ErrorIf only
2419
          # overrides self.bad
2420
          lu_result = 1
2421
          continue
2422
        for script, hkr, output in res.payload:
2423
          test = hkr == constants.HKR_FAIL
2424
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2425
                        "Script %s failed, output:", script)
2426
          if test:
2427
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2428
            feedback_fn("%s" % output)
2429
            lu_result = 0
2430

    
2431
      return lu_result
2432

    
2433

    
2434
class LUVerifyDisks(NoHooksLU):
2435
  """Verifies the cluster disks status.
2436

2437
  """
2438
  REQ_BGL = False
2439

    
2440
  def ExpandNames(self):
2441
    self.needed_locks = {
2442
      locking.LEVEL_NODE: locking.ALL_SET,
2443
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2444
    }
2445
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2446

    
2447
  def Exec(self, feedback_fn):
2448
    """Verify integrity of cluster disks.
2449

2450
    @rtype: tuple of three items
2451
    @return: a tuple of (dict of node-to-node_error, list of instances
2452
        which need activate-disks, dict of instance: (node, volume) for
2453
        missing volumes
2454

2455
    """
2456
    result = res_nodes, res_instances, res_missing = {}, [], {}
2457

    
2458
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2459
    instances = [self.cfg.GetInstanceInfo(name)
2460
                 for name in self.cfg.GetInstanceList()]
2461

    
2462
    nv_dict = {}
2463
    for inst in instances:
2464
      inst_lvs = {}
2465
      if (not inst.admin_up or
2466
          inst.disk_template not in constants.DTS_NET_MIRROR):
2467
        continue
2468
      inst.MapLVsByNode(inst_lvs)
2469
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2470
      for node, vol_list in inst_lvs.iteritems():
2471
        for vol in vol_list:
2472
          nv_dict[(node, vol)] = inst
2473

    
2474
    if not nv_dict:
2475
      return result
2476

    
2477
    vg_names = self.rpc.call_vg_list(nodes)
2478
    vg_names.Raise("Cannot get list of VGs")
2479

    
2480
    for node in nodes:
2481
      # node_volume
2482
      node_res = self.rpc.call_lv_list([node],
2483
                                       vg_names[node].payload.keys())[node]
2484
      if node_res.offline:
2485
        continue
2486
      msg = node_res.fail_msg
2487
      if msg:
2488
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2489
        res_nodes[node] = msg
2490
        continue
2491

    
2492
      lvs = node_res.payload
2493
      for lv_name, (_, _, lv_online) in lvs.items():
2494
        inst = nv_dict.pop((node, lv_name), None)
2495
        if (not lv_online and inst is not None
2496
            and inst.name not in res_instances):
2497
          res_instances.append(inst.name)
2498

    
2499
    # any leftover items in nv_dict are missing LVs, let's arrange the
2500
    # data better
2501
    for key, inst in nv_dict.iteritems():
2502
      if inst.name not in res_missing:
2503
        res_missing[inst.name] = []
2504
      res_missing[inst.name].append(key)
2505

    
2506
    return result
2507

    
2508

    
2509
class LURepairDiskSizes(NoHooksLU):
2510
  """Verifies the cluster disks sizes.
2511

2512
  """
2513
  _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2514
  REQ_BGL = False
2515

    
2516
  def ExpandNames(self):
2517
    if self.op.instances:
2518
      self.wanted_names = []
2519
      for name in self.op.instances:
2520
        full_name = _ExpandInstanceName(self.cfg, name)
2521
        self.wanted_names.append(full_name)
2522
      self.needed_locks = {
2523
        locking.LEVEL_NODE: [],
2524
        locking.LEVEL_INSTANCE: self.wanted_names,
2525
        }
2526
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2527
    else:
2528
      self.wanted_names = None
2529
      self.needed_locks = {
2530
        locking.LEVEL_NODE: locking.ALL_SET,
2531
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2532
        }
2533
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2534

    
2535
  def DeclareLocks(self, level):
2536
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2537
      self._LockInstancesNodes(primary_only=True)
2538

    
2539
  def CheckPrereq(self):
2540
    """Check prerequisites.
2541

2542
    This only checks the optional instance list against the existing names.
2543

2544
    """
2545
    if self.wanted_names is None:
2546
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2547

    
2548
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2549
                             in self.wanted_names]
2550

    
2551
  def _EnsureChildSizes(self, disk):
2552
    """Ensure children of the disk have the needed disk size.
2553

2554
    This is valid mainly for DRBD8 and fixes an issue where the
2555
    children have smaller disk size.
2556

2557
    @param disk: an L{ganeti.objects.Disk} object
2558

2559
    """
2560
    if disk.dev_type == constants.LD_DRBD8:
2561
      assert disk.children, "Empty children for DRBD8?"
2562
      fchild = disk.children[0]
2563
      mismatch = fchild.size < disk.size
2564
      if mismatch:
2565
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2566
                     fchild.size, disk.size)
2567
        fchild.size = disk.size
2568

    
2569
      # and we recurse on this child only, not on the metadev
2570
      return self._EnsureChildSizes(fchild) or mismatch
2571
    else:
2572
      return False
2573

    
2574
  def Exec(self, feedback_fn):
2575
    """Verify the size of cluster disks.
2576

2577
    """
2578
    # TODO: check child disks too
2579
    # TODO: check differences in size between primary/secondary nodes
2580
    per_node_disks = {}
2581
    for instance in self.wanted_instances:
2582
      pnode = instance.primary_node
2583
      if pnode not in per_node_disks:
2584
        per_node_disks[pnode] = []
2585
      for idx, disk in enumerate(instance.disks):
2586
        per_node_disks[pnode].append((instance, idx, disk))
2587

    
2588
    changed = []
2589
    for node, dskl in per_node_disks.items():
2590
      newl = [v[2].Copy() for v in dskl]
2591
      for dsk in newl:
2592
        self.cfg.SetDiskID(dsk, node)
2593
      result = self.rpc.call_blockdev_getsizes(node, newl)
2594
      if result.fail_msg:
2595
        self.LogWarning("Failure in blockdev_getsizes call to node"
2596
                        " %s, ignoring", node)
2597
        continue
2598
      if len(result.data) != len(dskl):
2599
        self.LogWarning("Invalid result from node %s, ignoring node results",
2600
                        node)
2601
        continue
2602
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2603
        if size is None:
2604
          self.LogWarning("Disk %d of instance %s did not return size"
2605
                          " information, ignoring", idx, instance.name)
2606
          continue
2607
        if not isinstance(size, (int, long)):
2608
          self.LogWarning("Disk %d of instance %s did not return valid"
2609
                          " size information, ignoring", idx, instance.name)
2610
          continue
2611
        size = size >> 20
2612
        if size != disk.size:
2613
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2614
                       " correcting: recorded %d, actual %d", idx,
2615
                       instance.name, disk.size, size)
2616
          disk.size = size
2617
          self.cfg.Update(instance, feedback_fn)
2618
          changed.append((instance.name, idx, size))
2619
        if self._EnsureChildSizes(disk):
2620
          self.cfg.Update(instance, feedback_fn)
2621
          changed.append((instance.name, idx, disk.size))
2622
    return changed
2623

    
2624

    
2625
class LURenameCluster(LogicalUnit):
2626
  """Rename the cluster.
2627

2628
  """
2629
  HPATH = "cluster-rename"
2630
  HTYPE = constants.HTYPE_CLUSTER
2631
  _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)]
2632

    
2633
  def BuildHooksEnv(self):
2634
    """Build hooks env.
2635

2636
    """
2637
    env = {
2638
      "OP_TARGET": self.cfg.GetClusterName(),
2639
      "NEW_NAME": self.op.name,
2640
      }
2641
    mn = self.cfg.GetMasterNode()
2642
    all_nodes = self.cfg.GetNodeList()
2643
    return env, [mn], all_nodes
2644

    
2645
  def CheckPrereq(self):
2646
    """Verify that the passed name is a valid one.
2647

2648
    """
2649
    hostname = netutils.GetHostname(name=self.op.name,
2650
                                    family=self.cfg.GetPrimaryIPFamily())
2651

    
2652
    new_name = hostname.name
2653
    self.ip = new_ip = hostname.ip
2654
    old_name = self.cfg.GetClusterName()
2655
    old_ip = self.cfg.GetMasterIP()
2656
    if new_name == old_name and new_ip == old_ip:
2657
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2658
                                 " cluster has changed",
2659
                                 errors.ECODE_INVAL)
2660
    if new_ip != old_ip:
2661
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2662
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2663
                                   " reachable on the network" %
2664
                                   new_ip, errors.ECODE_NOTUNIQUE)
2665

    
2666
    self.op.name = new_name
2667

    
2668
  def Exec(self, feedback_fn):
2669
    """Rename the cluster.
2670

2671
    """
2672
    clustername = self.op.name
2673
    ip = self.ip
2674

    
2675
    # shutdown the master IP
2676
    master = self.cfg.GetMasterNode()
2677
    result = self.rpc.call_node_stop_master(master, False)
2678
    result.Raise("Could not disable the master role")
2679

    
2680
    try:
2681
      cluster = self.cfg.GetClusterInfo()
2682
      cluster.cluster_name = clustername
2683
      cluster.master_ip = ip
2684
      self.cfg.Update(cluster, feedback_fn)
2685

    
2686
      # update the known hosts file
2687
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2688
      node_list = self.cfg.GetOnlineNodeList()
2689
      try:
2690
        node_list.remove(master)
2691
      except ValueError:
2692
        pass
2693
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2694
    finally:
2695
      result = self.rpc.call_node_start_master(master, False, False)
2696
      msg = result.fail_msg
2697
      if msg:
2698
        self.LogWarning("Could not re-enable the master role on"
2699
                        " the master, please restart manually: %s", msg)
2700

    
2701
    return clustername
2702

    
2703

    
2704
class LUSetClusterParams(LogicalUnit):
2705
  """Change the parameters of the cluster.
2706

2707
  """
2708
  HPATH = "cluster-modify"
2709
  HTYPE = constants.HTYPE_CLUSTER
2710
  _OP_PARAMS = [
2711
    ("vg_name", None, ht.TMaybeString),
2712
    ("enabled_hypervisors", None,
2713
     ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2714
            ht.TNone)),
2715
    ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2716
                              ht.TNone)),
2717
    ("beparams", None, ht.TOr(ht.TDict, ht.TNone)),
2718
    ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2719
                            ht.TNone)),
2720
    ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2721
                              ht.TNone)),
2722
    ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2723
    ("uid_pool", None, ht.NoType),
2724
    ("add_uids", None, ht.NoType),
2725
    ("remove_uids", None, ht.NoType),
2726
    ("maintain_node_health", None, ht.TMaybeBool),
2727
    ("prealloc_wipe_disks", None, ht.TMaybeBool),
2728
    ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2729
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
2730
    ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2731
    ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)),
2732
    ("master_netdev", None, ht.TOr(ht.TString, ht.TNone)),
2733
    ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2734
    ("hidden_os", None, ht.TOr(ht.TListOf(\
2735
          ht.TAnd(ht.TList,
2736
                ht.TIsLength(2),
2737
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2738
          ht.TNone)),
2739
    ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2740
          ht.TAnd(ht.TList,
2741
                ht.TIsLength(2),
2742
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2743
          ht.TNone)),
2744
    ]
2745
  REQ_BGL = False
2746

    
2747
  def CheckArguments(self):
2748
    """Check parameters
2749

2750
    """
2751
    if self.op.uid_pool:
2752
      uidpool.CheckUidPool(self.op.uid_pool)
2753

    
2754
    if self.op.add_uids:
2755
      uidpool.CheckUidPool(self.op.add_uids)
2756

    
2757
    if self.op.remove_uids:
2758
      uidpool.CheckUidPool(self.op.remove_uids)
2759

    
2760
  def ExpandNames(self):
2761
    # FIXME: in the future maybe other cluster params won't require checking on
2762
    # all nodes to be modified.
2763
    self.needed_locks = {
2764
      locking.LEVEL_NODE: locking.ALL_SET,
2765
    }
2766
    self.share_locks[locking.LEVEL_NODE] = 1
2767

    
2768
  def BuildHooksEnv(self):
2769
    """Build hooks env.
2770

2771
    """
2772
    env = {
2773
      "OP_TARGET": self.cfg.GetClusterName(),
2774
      "NEW_VG_NAME": self.op.vg_name,
2775
      }
2776
    mn = self.cfg.GetMasterNode()
2777
    return env, [mn], [mn]
2778

    
2779
  def CheckPrereq(self):
2780
    """Check prerequisites.
2781

2782
    This checks whether the given params don't conflict and
2783
    if the given volume group is valid.
2784

2785
    """
2786
    if self.op.vg_name is not None and not self.op.vg_name:
2787
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2788
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2789
                                   " instances exist", errors.ECODE_INVAL)
2790

    
2791
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2792
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2793
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2794
                                   " drbd-based instances exist",
2795
                                   errors.ECODE_INVAL)
2796

    
2797
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2798

    
2799
    # if vg_name not None, checks given volume group on all nodes
2800
    if self.op.vg_name:
2801
      vglist = self.rpc.call_vg_list(node_list)
2802
      for node in node_list:
2803
        msg = vglist[node].fail_msg
2804
        if msg:
2805
          # ignoring down node
2806
          self.LogWarning("Error while gathering data on node %s"
2807
                          " (ignoring node): %s", node, msg)
2808
          continue
2809
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2810
                                              self.op.vg_name,
2811
                                              constants.MIN_VG_SIZE)
2812
        if vgstatus:
2813
          raise errors.OpPrereqError("Error on node '%s': %s" %
2814
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2815

    
2816
    if self.op.drbd_helper:
2817
      # checks given drbd helper on all nodes
2818
      helpers = self.rpc.call_drbd_helper(node_list)
2819
      for node in node_list:
2820
        ninfo = self.cfg.GetNodeInfo(node)
2821
        if ninfo.offline:
2822
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2823
          continue
2824
        msg = helpers[node].fail_msg
2825
        if msg:
2826
          raise errors.OpPrereqError("Error checking drbd helper on node"
2827
                                     " '%s': %s" % (node, msg),
2828
                                     errors.ECODE_ENVIRON)
2829
        node_helper = helpers[node].payload
2830
        if node_helper != self.op.drbd_helper:
2831
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2832
                                     (node, node_helper), errors.ECODE_ENVIRON)
2833

    
2834
    self.cluster = cluster = self.cfg.GetClusterInfo()
2835
    # validate params changes
2836
    if self.op.beparams:
2837
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2838
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2839

    
2840
    if self.op.ndparams:
2841
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2842
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2843

    
2844
    if self.op.nicparams:
2845
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2846
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2847
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2848
      nic_errors = []
2849

    
2850
      # check all instances for consistency
2851
      for instance in self.cfg.GetAllInstancesInfo().values():
2852
        for nic_idx, nic in enumerate(instance.nics):
2853
          params_copy = copy.deepcopy(nic.nicparams)
2854
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2855

    
2856
          # check parameter syntax
2857
          try:
2858
            objects.NIC.CheckParameterSyntax(params_filled)
2859
          except errors.ConfigurationError, err:
2860
            nic_errors.append("Instance %s, nic/%d: %s" %
2861
                              (instance.name, nic_idx, err))
2862

    
2863
          # if we're moving instances to routed, check that they have an ip
2864
          target_mode = params_filled[constants.NIC_MODE]
2865
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2866
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2867
                              (instance.name, nic_idx))
2868
      if nic_errors:
2869
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2870
                                   "\n".join(nic_errors))
2871

    
2872
    # hypervisor list/parameters
2873
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2874
    if self.op.hvparams:
2875
      for hv_name, hv_dict in self.op.hvparams.items():
2876
        if hv_name not in self.new_hvparams:
2877
          self.new_hvparams[hv_name] = hv_dict
2878
        else:
2879
          self.new_hvparams[hv_name].update(hv_dict)
2880

    
2881
    # os hypervisor parameters
2882
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2883
    if self.op.os_hvp:
2884
      for os_name, hvs in self.op.os_hvp.items():
2885
        if os_name not in self.new_os_hvp:
2886
          self.new_os_hvp[os_name] = hvs
2887
        else:
2888
          for hv_name, hv_dict in hvs.items():
2889
            if hv_name not in self.new_os_hvp[os_name]:
2890
              self.new_os_hvp[os_name][hv_name] = hv_dict
2891
            else:
2892
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2893

    
2894
    # os parameters
2895
    self.new_osp = objects.FillDict(cluster.osparams, {})
2896
    if self.op.osparams:
2897
      for os_name, osp in self.op.osparams.items():
2898
        if os_name not in self.new_osp:
2899
          self.new_osp[os_name] = {}
2900

    
2901
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2902
                                                  use_none=True)
2903

    
2904
        if not self.new_osp[os_name]:
2905
          # we removed all parameters
2906
          del self.new_osp[os_name]
2907
        else:
2908
          # check the parameter validity (remote check)
2909
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2910
                         os_name, self.new_osp[os_name])
2911

    
2912
    # changes to the hypervisor list
2913
    if self.op.enabled_hypervisors is not None:
2914
      self.hv_list = self.op.enabled_hypervisors
2915
      for hv in self.hv_list:
2916
        # if the hypervisor doesn't already exist in the cluster
2917
        # hvparams, we initialize it to empty, and then (in both
2918
        # cases) we make sure to fill the defaults, as we might not
2919
        # have a complete defaults list if the hypervisor wasn't
2920
        # enabled before
2921
        if hv not in new_hvp:
2922
          new_hvp[hv] = {}
2923
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2924
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2925
    else:
2926
      self.hv_list = cluster.enabled_hypervisors
2927

    
2928
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2929
      # either the enabled list has changed, or the parameters have, validate
2930
      for hv_name, hv_params in self.new_hvparams.items():
2931
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2932
            (self.op.enabled_hypervisors and
2933
             hv_name in self.op.enabled_hypervisors)):
2934
          # either this is a new hypervisor, or its parameters have changed
2935
          hv_class = hypervisor.GetHypervisor(hv_name)
2936
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2937
          hv_class.CheckParameterSyntax(hv_params)
2938
          _CheckHVParams(self, node_list, hv_name, hv_params)
2939

    
2940
    if self.op.os_hvp:
2941
      # no need to check any newly-enabled hypervisors, since the
2942
      # defaults have already been checked in the above code-block
2943
      for os_name, os_hvp in self.new_os_hvp.items():
2944
        for hv_name, hv_params in os_hvp.items():
2945
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2946
          # we need to fill in the new os_hvp on top of the actual hv_p
2947
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2948
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2949
          hv_class = hypervisor.GetHypervisor(hv_name)
2950
          hv_class.CheckParameterSyntax(new_osp)
2951
          _CheckHVParams(self, node_list, hv_name, new_osp)
2952

    
2953
    if self.op.default_iallocator:
2954
      alloc_script = utils.FindFile(self.op.default_iallocator,
2955
                                    constants.IALLOCATOR_SEARCH_PATH,
2956
                                    os.path.isfile)
2957
      if alloc_script is None:
2958
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2959
                                   " specified" % self.op.default_iallocator,
2960
                                   errors.ECODE_INVAL)
2961

    
2962
  def Exec(self, feedback_fn):
2963
    """Change the parameters of the cluster.
2964

2965
    """
2966
    if self.op.vg_name is not None:
2967
      new_volume = self.op.vg_name
2968
      if not new_volume:
2969
        new_volume = None
2970
      if new_volume != self.cfg.GetVGName():
2971
        self.cfg.SetVGName(new_volume)
2972
      else:
2973
        feedback_fn("Cluster LVM configuration already in desired"
2974
                    " state, not changing")
2975
    if self.op.drbd_helper is not None:
2976
      new_helper = self.op.drbd_helper
2977
      if not new_helper:
2978
        new_helper = None
2979
      if new_helper != self.cfg.GetDRBDHelper():
2980
        self.cfg.SetDRBDHelper(new_helper)
2981
      else:
2982
        feedback_fn("Cluster DRBD helper already in desired state,"
2983
                    " not changing")
2984
    if self.op.hvparams:
2985
      self.cluster.hvparams = self.new_hvparams
2986
    if self.op.os_hvp:
2987
      self.cluster.os_hvp = self.new_os_hvp
2988
    if self.op.enabled_hypervisors is not None:
2989
      self.cluster.hvparams = self.new_hvparams
2990
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2991
    if self.op.beparams:
2992
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2993
    if self.op.nicparams:
2994
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2995
    if self.op.osparams:
2996
      self.cluster.osparams = self.new_osp
2997
    if self.op.ndparams:
2998
      self.cluster.ndparams = self.new_ndparams
2999

    
3000
    if self.op.candidate_pool_size is not None:
3001
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3002
      # we need to update the pool size here, otherwise the save will fail
3003
      _AdjustCandidatePool(self, [])
3004

    
3005
    if self.op.maintain_node_health is not None:
3006
      self.cluster.maintain_node_health = self.op.maintain_node_health
3007

    
3008
    if self.op.prealloc_wipe_disks is not None:
3009
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3010

    
3011
    if self.op.add_uids is not None:
3012
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3013

    
3014
    if self.op.remove_uids is not None:
3015
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3016

    
3017
    if self.op.uid_pool is not None:
3018
      self.cluster.uid_pool = self.op.uid_pool
3019

    
3020
    if self.op.default_iallocator is not None:
3021
      self.cluster.default_iallocator = self.op.default_iallocator
3022

    
3023
    if self.op.reserved_lvs is not None:
3024
      self.cluster.reserved_lvs = self.op.reserved_lvs
3025

    
3026
    def helper_os(aname, mods, desc):
3027
      desc += " OS list"
3028
      lst = getattr(self.cluster, aname)
3029
      for key, val in mods:
3030
        if key == constants.DDM_ADD:
3031
          if val in lst:
3032
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3033
          else:
3034
            lst.append(val)
3035
        elif key == constants.DDM_REMOVE:
3036
          if val in lst:
3037
            lst.remove(val)
3038
          else:
3039
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3040
        else:
3041
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3042

    
3043
    if self.op.hidden_os:
3044
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3045

    
3046
    if self.op.blacklisted_os:
3047
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3048

    
3049
    if self.op.master_netdev:
3050
      master = self.cfg.GetMasterNode()
3051
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3052
                  self.cluster.master_netdev)
3053
      result = self.rpc.call_node_stop_master(master, False)
3054
      result.Raise("Could not disable the master ip")
3055
      feedback_fn("Changing master_netdev from %s to %s" %
3056
                  (self.cluster.master_netdev, self.op.master_netdev))
3057
      self.cluster.master_netdev = self.op.master_netdev
3058

    
3059
    self.cfg.Update(self.cluster, feedback_fn)
3060

    
3061
    if self.op.master_netdev:
3062
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3063
                  self.op.master_netdev)
3064
      result = self.rpc.call_node_start_master(master, False, False)
3065
      if result.fail_msg:
3066
        self.LogWarning("Could not re-enable the master ip on"
3067
                        " the master, please restart manually: %s",
3068
                        result.fail_msg)
3069

    
3070

    
3071
def _UploadHelper(lu, nodes, fname):
3072
  """Helper for uploading a file and showing warnings.
3073

3074
  """
3075
  if os.path.exists(fname):
3076
    result = lu.rpc.call_upload_file(nodes, fname)
3077
    for to_node, to_result in result.items():
3078
      msg = to_result.fail_msg
3079
      if msg:
3080
        msg = ("Copy of file %s to node %s failed: %s" %
3081
               (fname, to_node, msg))
3082
        lu.proc.LogWarning(msg)
3083

    
3084

    
3085
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3086
  """Distribute additional files which are part of the cluster configuration.
3087

3088
  ConfigWriter takes care of distributing the config and ssconf files, but
3089
  there are more files which should be distributed to all nodes. This function
3090
  makes sure those are copied.
3091

3092
  @param lu: calling logical unit
3093
  @param additional_nodes: list of nodes not in the config to distribute to
3094
  @type additional_vm: boolean
3095
  @param additional_vm: whether the additional nodes are vm-capable or not
3096

3097
  """
3098
  # 1. Gather target nodes
3099
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3100
  dist_nodes = lu.cfg.GetOnlineNodeList()
3101
  nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3102
  vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3103
  if additional_nodes is not None:
3104
    dist_nodes.extend(additional_nodes)
3105
    if additional_vm:
3106
      vm_nodes.extend(additional_nodes)
3107
  if myself.name in dist_nodes:
3108
    dist_nodes.remove(myself.name)
3109
  if myself.name in vm_nodes:
3110
    vm_nodes.remove(myself.name)
3111

    
3112
  # 2. Gather files to distribute
3113
  dist_files = set([constants.ETC_HOSTS,
3114
                    constants.SSH_KNOWN_HOSTS_FILE,
3115
                    constants.RAPI_CERT_FILE,
3116
                    constants.RAPI_USERS_FILE,
3117
                    constants.CONFD_HMAC_KEY,
3118
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
3119
                   ])
3120

    
3121
  vm_files = set()
3122
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3123
  for hv_name in enabled_hypervisors:
3124
    hv_class = hypervisor.GetHypervisor(hv_name)
3125
    vm_files.update(hv_class.GetAncillaryFiles())
3126

    
3127
  # 3. Perform the files upload
3128
  for fname in dist_files:
3129
    _UploadHelper(lu, dist_nodes, fname)
3130
  for fname in vm_files:
3131
    _UploadHelper(lu, vm_nodes, fname)
3132

    
3133

    
3134
class LURedistributeConfig(NoHooksLU):
3135
  """Force the redistribution of cluster configuration.
3136

3137
  This is a very simple LU.
3138

3139
  """
3140
  REQ_BGL = False
3141

    
3142
  def ExpandNames(self):
3143
    self.needed_locks = {
3144
      locking.LEVEL_NODE: locking.ALL_SET,
3145
    }
3146
    self.share_locks[locking.LEVEL_NODE] = 1
3147

    
3148
  def Exec(self, feedback_fn):
3149
    """Redistribute the configuration.
3150

3151
    """
3152
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3153
    _RedistributeAncillaryFiles(self)
3154

    
3155

    
3156
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3157
  """Sleep and poll for an instance's disk to sync.
3158

3159
  """
3160
  if not instance.disks or disks is not None and not disks:
3161
    return True
3162

    
3163
  disks = _ExpandCheckDisks(instance, disks)
3164

    
3165
  if not oneshot:
3166
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3167

    
3168
  node = instance.primary_node
3169

    
3170
  for dev in disks:
3171
    lu.cfg.SetDiskID(dev, node)
3172

    
3173
  # TODO: Convert to utils.Retry
3174

    
3175
  retries = 0
3176
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3177
  while True:
3178
    max_time = 0
3179
    done = True
3180
    cumul_degraded = False
3181
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3182
    msg = rstats.fail_msg
3183
    if msg:
3184
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3185
      retries += 1
3186
      if retries >= 10:
3187
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3188
                                 " aborting." % node)
3189
      time.sleep(6)
3190
      continue
3191
    rstats = rstats.payload
3192
    retries = 0
3193
    for i, mstat in enumerate(rstats):
3194
      if mstat is None:
3195
        lu.LogWarning("Can't compute data for node %s/%s",
3196
                           node, disks[i].iv_name)
3197
        continue
3198

    
3199
      cumul_degraded = (cumul_degraded or
3200
                        (mstat.is_degraded and mstat.sync_percent is None))
3201
      if mstat.sync_percent is not None:
3202
        done = False
3203
        if mstat.estimated_time is not None:
3204
          rem_time = ("%s remaining (estimated)" %
3205
                      utils.FormatSeconds(mstat.estimated_time))
3206
          max_time = mstat.estimated_time
3207
        else:
3208
          rem_time = "no time estimate"
3209
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3210
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3211

    
3212
    # if we're done but degraded, let's do a few small retries, to
3213
    # make sure we see a stable and not transient situation; therefore
3214
    # we force restart of the loop
3215
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3216
      logging.info("Degraded disks found, %d retries left", degr_retries)
3217
      degr_retries -= 1
3218
      time.sleep(1)
3219
      continue
3220

    
3221
    if done or oneshot:
3222
      break
3223

    
3224
    time.sleep(min(60, max_time))
3225

    
3226
  if done:
3227
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3228
  return not cumul_degraded
3229

    
3230

    
3231
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3232
  """Check that mirrors are not degraded.
3233

3234
  The ldisk parameter, if True, will change the test from the
3235
  is_degraded attribute (which represents overall non-ok status for
3236
  the device(s)) to the ldisk (representing the local storage status).
3237

3238
  """
3239
  lu.cfg.SetDiskID(dev, node)
3240

    
3241
  result = True
3242

    
3243
  if on_primary or dev.AssembleOnSecondary():
3244
    rstats = lu.rpc.call_blockdev_find(node, dev)
3245
    msg = rstats.fail_msg
3246
    if msg:
3247
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3248
      result = False
3249
    elif not rstats.payload:
3250
      lu.LogWarning("Can't find disk on node %s", node)
3251
      result = False
3252
    else:
3253
      if ldisk:
3254
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3255
      else:
3256
        result = result and not rstats.payload.is_degraded
3257

    
3258
  if dev.children:
3259
    for child in dev.children:
3260
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3261

    
3262
  return result
3263

    
3264

    
3265
class LUOobCommand(NoHooksLU):
3266
  """Logical unit for OOB handling.
3267

3268
  """
3269
  _OP_PARAMS = [
3270
    _PNodeName,
3271
    ("command", None, ht.TElemOf(constants.OOB_COMMANDS)),
3272
    ("timeout", constants.OOB_TIMEOUT, ht.TInt),
3273
    ]
3274
  REG_BGL = False
3275

    
3276
  def CheckPrereq(self):
3277
    """Check prerequisites.
3278

3279
    This checks:
3280
     - the node exists in the configuration
3281
     - OOB is supported
3282

3283
    Any errors are signaled by raising errors.OpPrereqError.
3284

3285
    """
3286
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3287
    node = self.cfg.GetNodeInfo(self.op.node_name)
3288

    
3289
    if node is None:
3290
      raise errors.OpPrereqError("Node %s not found" % self.op.node_name)
3291

    
3292
    self.oob_program = _SupportsOob(self.cfg, node)
3293

    
3294
    if not self.oob_program:
3295
      raise errors.OpPrereqError("OOB is not supported for node %s" %
3296
                                 self.op.node_name)
3297

    
3298
    if self.op.command == constants.OOB_POWER_OFF and not node.offline:
3299
      raise errors.OpPrereqError(("Cannot power off node %s because it is"
3300
                                  " not marked offline") % self.op.node_name)
3301

    
3302
    self.node = node
3303

    
3304
  def ExpandNames(self):
3305
    """Gather locks we need.
3306

3307
    """
3308
    node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3309
    self.needed_locks = {
3310
      locking.LEVEL_NODE: [node_name],
3311
      }
3312

    
3313
  def Exec(self, feedback_fn):
3314
    """Execute OOB and return result if we expect any.
3315

3316
    """
3317
    master_node = self.cfg.GetMasterNode()
3318
    node = self.node
3319

    
3320
    logging.info("Executing out-of-band command '%s' using '%s' on %s",
3321
                 self.op.command, self.oob_program, self.op.node_name)
3322
    result = self.rpc.call_run_oob(master_node, self.oob_program,
3323
                                   self.op.command, self.op.node_name,
3324
                                   self.op.timeout)
3325

    
3326
    result.Raise("An error occurred on execution of OOB helper")
3327

    
3328
    self._CheckPayload(result)
3329

    
3330
    if self.op.command == constants.OOB_HEALTH:
3331
      # For health we should log important events
3332
      for item, status in result.payload:
3333
        if status in [constants.OOB_STATUS_WARNING,
3334
                      constants.OOB_STATUS_CRITICAL]:
3335
          logging.warning("On node '%s' item '%s' has status '%s'",
3336
                          self.op.node_name, item, status)
3337

    
3338
    if self.op.command == constants.OOB_POWER_ON:
3339
      node.powered = True
3340
    elif self.op.command == constants.OOB_POWER_OFF:
3341
      node.powered = False
3342
    elif self.op.command == constants.OOB_POWER_STATUS:
3343
      powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3344
      if powered != self.node.powered:
3345
        logging.warning(("Recorded power state (%s) of node '%s' does not match"
3346
                         " actual power state (%s)"), node.powered,
3347
                        self.op.node_name, powered)
3348

    
3349
    self.cfg.Update(node, feedback_fn)
3350

    
3351
    return result.payload
3352

    
3353
  def _CheckPayload(self, result):
3354
    """Checks if the payload is valid.
3355

3356
    @param result: RPC result
3357
    @raises errors.OpExecError: If payload is not valid
3358

3359
    """
3360
    errs = []
3361
    if self.op.command == constants.OOB_HEALTH:
3362
      if not isinstance(result.payload, list):
3363
        errs.append("command 'health' is expected to return a list but got %s" %
3364
                    type(result.payload))
3365
      for item, status in result.payload:
3366
        if status not in constants.OOB_STATUSES:
3367
          errs.append("health item '%s' has invalid status '%s'" %
3368
                      (item, status))
3369

    
3370
    if self.op.command == constants.OOB_POWER_STATUS:
3371
      if not isinstance(result.payload, dict):
3372
        errs.append("power-status is expected to return a dict but got %s" %
3373
                    type(result.payload))
3374

    
3375
    if self.op.command in [
3376
        constants.OOB_POWER_ON,
3377
        constants.OOB_POWER_OFF,
3378
        constants.OOB_POWER_CYCLE,
3379
        ]:
3380
      if result.payload is not None:
3381
        errs.append("%s is expected to not return payload but got '%s'" %
3382
                    (self.op.command, result.payload))
3383

    
3384
    if errs:
3385
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3386
                               utils.CommaJoin(errs))
3387

    
3388

    
3389

    
3390
class LUDiagnoseOS(NoHooksLU):
3391
  """Logical unit for OS diagnose/query.
3392

3393
  """
3394
  _OP_PARAMS = [
3395
    _POutputFields,
3396
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3397
    ]
3398
  REQ_BGL = False
3399
  _HID = "hidden"
3400
  _BLK = "blacklisted"
3401
  _VLD = "valid"
3402
  _FIELDS_STATIC = utils.FieldSet()
3403
  _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3404
                                   "parameters", "api_versions", _HID, _BLK)
3405

    
3406
  def CheckArguments(self):
3407
    if self.op.names:
3408
      raise errors.OpPrereqError("Selective OS query not supported",
3409
                                 errors.ECODE_INVAL)
3410

    
3411
    _CheckOutputFields(static=self._FIELDS_STATIC,
3412
                       dynamic=self._FIELDS_DYNAMIC,
3413
                       selected=self.op.output_fields)
3414

    
3415
  def ExpandNames(self):
3416
    # Lock all nodes, in shared mode
3417
    # Temporary removal of locks, should be reverted later
3418
    # TODO: reintroduce locks when they are lighter-weight
3419
    self.needed_locks = {}
3420
    #self.share_locks[locking.LEVEL_NODE] = 1
3421
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3422

    
3423
  @staticmethod
3424
  def _DiagnoseByOS(rlist):
3425
    """Remaps a per-node return list into an a per-os per-node dictionary
3426

3427
    @param rlist: a map with node names as keys and OS objects as values
3428

3429
    @rtype: dict
3430
    @return: a dictionary with osnames as keys and as value another
3431
        map, with nodes as keys and tuples of (path, status, diagnose,
3432
        variants, parameters, api_versions) as values, eg::
3433

3434
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3435
                                     (/srv/..., False, "invalid api")],
3436
                           "node2": [(/srv/..., True, "", [], [])]}
3437
          }
3438

3439
    """
3440
    all_os = {}
3441
    # we build here the list of nodes that didn't fail the RPC (at RPC
3442
    # level), so that nodes with a non-responding node daemon don't
3443
    # make all OSes invalid
3444
    good_nodes = [node_name for node_name in rlist
3445
                  if not rlist[node_name].fail_msg]
3446
    for node_name, nr in rlist.items():
3447
      if nr.fail_msg or not nr.payload:
3448
        continue
3449
      for (name, path, status, diagnose, variants,
3450
           params, api_versions) in nr.payload:
3451
        if name not in all_os:
3452
          # build a list of nodes for this os containing empty lists
3453
          # for each node in node_list
3454
          all_os[name] = {}
3455
          for nname in good_nodes:
3456
            all_os[name][nname] = []
3457
        # convert params from [name, help] to (name, help)
3458
        params = [tuple(v) for v in params]
3459
        all_os[name][node_name].append((path, status, diagnose,
3460
                                        variants, params, api_versions))
3461
    return all_os
3462

    
3463
  def Exec(self, feedback_fn):
3464
    """Compute the list of OSes.
3465

3466
    """
3467
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3468
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3469
    pol = self._DiagnoseByOS(node_data)
3470
    output = []
3471
    cluster = self.cfg.GetClusterInfo()
3472

    
3473
    for os_name in utils.NiceSort(pol.keys()):
3474
      os_data = pol[os_name]
3475
      row = []
3476
      valid = True
3477
      (variants, params, api_versions) = null_state = (set(), set(), set())
3478
      for idx, osl in enumerate(os_data.values()):
3479
        valid = bool(valid and osl and osl[0][1])
3480
        if not valid:
3481
          (variants, params, api_versions) = null_state
3482
          break
3483
        node_variants, node_params, node_api = osl[0][3:6]
3484
        if idx == 0: # first entry
3485
          variants = set(node_variants)
3486
          params = set(node_params)
3487
          api_versions = set(node_api)
3488
        else: # keep consistency
3489
          variants.intersection_update(node_variants)
3490
          params.intersection_update(node_params)
3491
          api_versions.intersection_update(node_api)
3492

    
3493
      is_hid = os_name in cluster.hidden_os
3494
      is_blk = os_name in cluster.blacklisted_os
3495
      if ((self._HID not in self.op.output_fields and is_hid) or
3496
          (self._BLK not in self.op.output_fields and is_blk) or
3497
          (self._VLD not in self.op.output_fields and not valid)):
3498
        continue
3499

    
3500
      for field in self.op.output_fields:
3501
        if field == "name":
3502
          val = os_name
3503
        elif field == self._VLD:
3504
          val = valid
3505
        elif field == "node_status":
3506
          # this is just a copy of the dict
3507
          val = {}
3508
          for node_name, nos_list in os_data.items():
3509
            val[node_name] = nos_list
3510
        elif field == "variants":
3511
          val = utils.NiceSort(list(variants))
3512
        elif field == "parameters":
3513
          val = list(params)
3514
        elif field == "api_versions":
3515
          val = list(api_versions)
3516
        elif field == self._HID:
3517
          val = is_hid
3518
        elif field == self._BLK:
3519
          val = is_blk
3520
        else:
3521
          raise errors.ParameterError(field)
3522
        row.append(val)
3523
      output.append(row)
3524

    
3525
    return output
3526

    
3527

    
3528
class LURemoveNode(LogicalUnit):
3529
  """Logical unit for removing a node.
3530

3531
  """
3532
  HPATH = "node-remove"
3533
  HTYPE = constants.HTYPE_NODE
3534
  _OP_PARAMS = [
3535
    _PNodeName,
3536
    ]
3537

    
3538
  def BuildHooksEnv(self):
3539
    """Build hooks env.
3540

3541
    This doesn't run on the target node in the pre phase as a failed
3542
    node would then be impossible to remove.
3543

3544
    """
3545
    env = {
3546
      "OP_TARGET": self.op.node_name,
3547
      "NODE_NAME": self.op.node_name,
3548
      }
3549
    all_nodes = self.cfg.GetNodeList()
3550
    try:
3551
      all_nodes.remove(self.op.node_name)
3552
    except ValueError:
3553
      logging.warning("Node %s which is about to be removed not found"
3554
                      " in the all nodes list", self.op.node_name)
3555
    return env, all_nodes, all_nodes
3556

    
3557
  def CheckPrereq(self):
3558
    """Check prerequisites.
3559

3560
    This checks:
3561
     - the node exists in the configuration
3562
     - it does not have primary or secondary instances
3563
     - it's not the master
3564

3565
    Any errors are signaled by raising errors.OpPrereqError.
3566

3567
    """
3568
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3569
    node = self.cfg.GetNodeInfo(self.op.node_name)
3570
    assert node is not None
3571

    
3572
    instance_list = self.cfg.GetInstanceList()
3573

    
3574
    masternode = self.cfg.GetMasterNode()
3575
    if node.name == masternode:
3576
      raise errors.OpPrereqError("Node is the master node,"
3577
                                 " you need to failover first.",
3578
                                 errors.ECODE_INVAL)
3579

    
3580
    for instance_name in instance_list:
3581
      instance = self.cfg.GetInstanceInfo(instance_name)
3582
      if node.name in instance.all_nodes:
3583
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3584
                                   " please remove first." % instance_name,
3585
                                   errors.ECODE_INVAL)
3586
    self.op.node_name = node.name
3587
    self.node = node
3588

    
3589
  def Exec(self, feedback_fn):
3590
    """Removes the node from the cluster.
3591

3592
    """
3593
    node = self.node
3594
    logging.info("Stopping the node daemon and removing configs from node %s",
3595
                 node.name)
3596

    
3597
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3598

    
3599
    # Promote nodes to master candidate as needed
3600
    _AdjustCandidatePool(self, exceptions=[node.name])
3601
    self.context.RemoveNode(node.name)
3602

    
3603
    # Run post hooks on the node before it's removed
3604
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3605
    try:
3606
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3607
    except:
3608
      # pylint: disable-msg=W0702
3609
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3610

    
3611
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3612
    msg = result.fail_msg
3613
    if msg:
3614
      self.LogWarning("Errors encountered on the remote node while leaving"
3615
                      " the cluster: %s", msg)
3616

    
3617
    # Remove node from our /etc/hosts
3618
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3619
      master_node = self.cfg.GetMasterNode()
3620
      result = self.rpc.call_etc_hosts_modify(master_node,
3621
                                              constants.ETC_HOSTS_REMOVE,
3622
                                              node.name, None)
3623
      result.Raise("Can't update hosts file with new host data")
3624
      _RedistributeAncillaryFiles(self)
3625

    
3626

    
3627
class _NodeQuery(_QueryBase):
3628
  FIELDS = query.NODE_FIELDS
3629

    
3630
  def ExpandNames(self, lu):
3631
    lu.needed_locks = {}
3632
    lu.share_locks[locking.LEVEL_NODE] = 1
3633

    
3634
    if self.names:
3635
      self.wanted = _GetWantedNodes(lu, self.names)
3636
    else:
3637
      self.wanted = locking.ALL_SET
3638

    
3639
    self.do_locking = (self.use_locking and
3640
                       query.NQ_LIVE in self.requested_data)
3641

    
3642
    if self.do_locking:
3643
      # if we don't request only static fields, we need to lock the nodes
3644
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3645

    
3646
  def DeclareLocks(self, lu, level):
3647
    pass
3648

    
3649
  def _GetQueryData(self, lu):
3650
    """Computes the list of nodes and their attributes.
3651

3652
    """
3653
    all_info = lu.cfg.GetAllNodesInfo()
3654

    
3655
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3656

    
3657
    # Gather data as requested
3658
    if query.NQ_LIVE in self.requested_data:
3659
      node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3660
                                        lu.cfg.GetHypervisorType())
3661
      live_data = dict((name, nresult.payload)
3662
                       for (name, nresult) in node_data.items()
3663
                       if not nresult.fail_msg and nresult.payload)
3664
    else:
3665
      live_data = None
3666

    
3667
    if query.NQ_INST in self.requested_data:
3668
      node_to_primary = dict([(name, set()) for name in nodenames])
3669
      node_to_secondary = dict([(name, set()) for name in nodenames])
3670

    
3671
      inst_data = lu.cfg.GetAllInstancesInfo()
3672

    
3673
      for inst in inst_data.values():
3674
        if inst.primary_node in node_to_primary:
3675
          node_to_primary[inst.primary_node].add(inst.name)
3676
        for secnode in inst.secondary_nodes:
3677
          if secnode in node_to_secondary:
3678
            node_to_secondary[secnode].add(inst.name)
3679
    else:
3680
      node_to_primary = None
3681
      node_to_secondary = None
3682

    
3683
    if query.NQ_GROUP in self.requested_data:
3684
      groups = lu.cfg.GetAllNodeGroupsInfo()
3685
    else:
3686
      groups = {}
3687

    
3688
    return query.NodeQueryData([all_info[name] for name in nodenames],
3689
                               live_data, lu.cfg.GetMasterNode(),
3690
                               node_to_primary, node_to_secondary, groups)
3691

    
3692

    
3693
class LUQueryNodes(NoHooksLU):
3694
  """Logical unit for querying nodes.
3695

3696
  """
3697
  # pylint: disable-msg=W0142
3698
  _OP_PARAMS = [
3699
    _POutputFields,
3700
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3701
    ("use_locking", False, ht.TBool),
3702
    ]
3703
  REQ_BGL = False
3704

    
3705
  def CheckArguments(self):
3706
    self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3707
                         self.op.use_locking)
3708

    
3709
  def ExpandNames(self):
3710
    self.nq.ExpandNames(self)
3711

    
3712
  def Exec(self, feedback_fn):
3713
    return self.nq.OldStyleQuery(self)
3714

    
3715

    
3716
class LUQueryNodeVolumes(NoHooksLU):
3717
  """Logical unit for getting volumes on node(s).
3718

3719
  """
3720
  _OP_PARAMS = [
3721
    _POutputFields,
3722
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3723
    ]
3724
  REQ_BGL = False
3725
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3726
  _FIELDS_STATIC = utils.FieldSet("node")
3727

    
3728
  def CheckArguments(self):
3729
    _CheckOutputFields(static=self._FIELDS_STATIC,
3730
                       dynamic=self._FIELDS_DYNAMIC,
3731
                       selected=self.op.output_fields)
3732

    
3733
  def ExpandNames(self):
3734
    self.needed_locks = {}
3735
    self.share_locks[locking.LEVEL_NODE] = 1
3736
    if not self.op.nodes:
3737
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3738
    else:
3739
      self.needed_locks[locking.LEVEL_NODE] = \
3740
        _GetWantedNodes(self, self.op.nodes)
3741

    
3742
  def Exec(self, feedback_fn):
3743
    """Computes the list of nodes and their attributes.
3744

3745
    """
3746
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3747
    volumes = self.rpc.call_node_volumes(nodenames)
3748

    
3749
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3750
             in self.cfg.GetInstanceList()]
3751

    
3752
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3753

    
3754
    output = []
3755
    for node in nodenames:
3756
      nresult = volumes[node]
3757
      if nresult.offline:
3758
        continue
3759
      msg = nresult.fail_msg
3760
      if msg:
3761
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3762
        continue
3763

    
3764
      node_vols = nresult.payload[:]
3765
      node_vols.sort(key=lambda vol: vol['dev'])
3766

    
3767
      for vol in node_vols:
3768
        node_output = []
3769
        for field in self.op.output_fields:
3770
          if field == "node":
3771
            val = node
3772
          elif field == "phys":
3773
            val = vol['dev']
3774
          elif field == "vg":
3775
            val = vol['vg']
3776
          elif field == "name":
3777
            val = vol['name']
3778
          elif field == "size":
3779
            val = int(float(vol['size']))
3780
          elif field == "instance":
3781
            for inst in ilist:
3782
              if node not in lv_by_node[inst]:
3783
                continue
3784
              if vol['name'] in lv_by_node[inst][node]:
3785
                val = inst.name
3786
                break
3787
            else:
3788
              val = '-'
3789
          else:
3790
            raise errors.ParameterError(field)
3791
          node_output.append(str(val))
3792

    
3793
        output.append(node_output)
3794

    
3795
    return output
3796

    
3797

    
3798
class LUQueryNodeStorage(NoHooksLU):
3799
  """Logical unit for getting information on storage units on node(s).
3800

3801
  """
3802
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3803
  _OP_PARAMS = [
3804
    _POutputFields,
3805
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3806
    ("storage_type", ht.NoDefault, _CheckStorageType),
3807
    ("name", None, ht.TMaybeString),
3808
    ]
3809
  REQ_BGL = False
3810

    
3811
  def CheckArguments(self):
3812
    _CheckOutputFields(static=self._FIELDS_STATIC,
3813
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3814
                       selected=self.op.output_fields)
3815

    
3816
  def ExpandNames(self):
3817
    self.needed_locks = {}
3818
    self.share_locks[locking.LEVEL_NODE] = 1
3819

    
3820
    if self.op.nodes:
3821
      self.needed_locks[locking.LEVEL_NODE] = \
3822
        _GetWantedNodes(self, self.op.nodes)
3823
    else:
3824
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3825

    
3826
  def Exec(self, feedback_fn):
3827
    """Computes the list of nodes and their attributes.
3828

3829
    """
3830
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3831

    
3832
    # Always get name to sort by
3833
    if constants.SF_NAME in self.op.output_fields:
3834
      fields = self.op.output_fields[:]
3835
    else:
3836
      fields = [constants.SF_NAME] + self.op.output_fields
3837

    
3838
    # Never ask for node or type as it's only known to the LU
3839
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3840
      while extra in fields:
3841
        fields.remove(extra)
3842

    
3843
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3844
    name_idx = field_idx[constants.SF_NAME]
3845

    
3846
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3847
    data = self.rpc.call_storage_list(self.nodes,
3848
                                      self.op.storage_type, st_args,
3849
                                      self.op.name, fields)
3850

    
3851
    result = []
3852

    
3853
    for node in utils.NiceSort(self.nodes):
3854
      nresult = data[node]
3855
      if nresult.offline:
3856
        continue
3857

    
3858
      msg = nresult.fail_msg
3859
      if msg:
3860
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3861
        continue
3862

    
3863
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3864

    
3865
      for name in utils.NiceSort(rows.keys()):
3866
        row = rows[name]
3867

    
3868
        out = []
3869

    
3870
        for field in self.op.output_fields:
3871
          if field == constants.SF_NODE:
3872
            val = node
3873
          elif field == constants.SF_TYPE:
3874
            val = self.op.storage_type
3875
          elif field in field_idx:
3876
            val = row[field_idx[field]]
3877
          else:
3878
            raise errors.ParameterError(field)
3879

    
3880
          out.append(val)
3881

    
3882
        result.append(out)
3883

    
3884
    return result
3885

    
3886

    
3887
class _InstanceQuery(_QueryBase):
3888
  FIELDS = query.INSTANCE_FIELDS
3889

    
3890
  def ExpandNames(self, lu):
3891
    lu.needed_locks = {}
3892
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
3893
    lu.share_locks[locking.LEVEL_NODE] = 1
3894

    
3895
    if self.names:
3896
      self.wanted = _GetWantedInstances(lu, self.names)
3897
    else:
3898
      self.wanted = locking.ALL_SET
3899

    
3900
    self.do_locking = (self.use_locking and
3901
                       query.IQ_LIVE in self.requested_data)
3902
    if self.do_locking:
3903
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3904
      lu.needed_locks[locking.LEVEL_NODE] = []
3905
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3906

    
3907
  def DeclareLocks(self, lu, level):
3908
    if level == locking.LEVEL_NODE and self.do_locking:
3909
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
3910

    
3911
  def _GetQueryData(self, lu):
3912
    """Computes the list of instances and their attributes.
3913

3914
    """
3915
    all_info = lu.cfg.GetAllInstancesInfo()
3916

    
3917
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3918

    
3919
    instance_list = [all_info[name] for name in instance_names]
3920
    nodes = frozenset([inst.primary_node for inst in instance_list])
3921
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3922
    bad_nodes = []
3923
    offline_nodes = []
3924

    
3925
    # Gather data as requested
3926
    if query.IQ_LIVE in self.requested_data:
3927
      live_data = {}
3928
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3929
      for name in nodes:
3930
        result = node_data[name]
3931
        if result.offline:
3932
          # offline nodes will be in both lists
3933
          assert result.fail_msg
3934
          offline_nodes.append(name)
3935
        if result.fail_msg:
3936
          bad_nodes.append(name)
3937
        elif result.payload:
3938
          live_data.update(result.payload)
3939
        # else no instance is alive
3940
    else:
3941
      live_data = {}
3942

    
3943
    if query.IQ_DISKUSAGE in self.requested_data:
3944
      disk_usage = dict((inst.name,
3945
                         _ComputeDiskSize(inst.disk_template,
3946
                                          [{"size": disk.size}
3947
                                           for disk in inst.disks]))
3948
                        for inst in instance_list)
3949
    else:
3950
      disk_usage = None
3951

    
3952
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3953
                                   disk_usage, offline_nodes, bad_nodes,
3954
                                   live_data)
3955

    
3956

    
3957
#: Query type implementations
3958
_QUERY_IMPL = {
3959
  constants.QR_INSTANCE: _InstanceQuery,
3960
  constants.QR_NODE: _NodeQuery,
3961
  }
3962

    
3963

    
3964
def _GetQueryImplementation(name):
3965
  """Returns the implemtnation for a query type.
3966

3967
  @param name: Query type, must be one of L{constants.QR_OP_QUERY}
3968

3969
  """
3970
  try:
3971
    return _QUERY_IMPL[name]
3972
  except KeyError:
3973
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
3974
                               errors.ECODE_INVAL)
3975

    
3976

    
3977
class LUQuery(NoHooksLU):
3978
  """Query for resources/items of a certain kind.
3979

3980
  """
3981
  # pylint: disable-msg=W0142
3982
  _OP_PARAMS = [
3983
    ("what", ht.NoDefault, ht.TElemOf(constants.QR_OP_QUERY)),
3984
    ("fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3985
    ("filter", None, ht.TOr(ht.TNone,
3986
                            ht.TListOf(ht.TOr(ht.TNonEmptyString, ht.TList)))),
3987
    ]
3988
  REQ_BGL = False
3989

    
3990
  def CheckArguments(self):
3991
    qcls = _GetQueryImplementation(self.op.what)
3992
    names = qlang.ReadSimpleFilter("name", self.op.filter)
3993

    
3994
    self.impl = qcls(names, self.op.fields, False)
3995

    
3996
  def ExpandNames(self):
3997
    self.impl.ExpandNames(self)
3998

    
3999
  def DeclareLocks(self, level):
4000
    self.impl.DeclareLocks(self, level)
4001

    
4002
  def Exec(self, feedback_fn):
4003
    return self.impl.NewStyleQuery(self)
4004

    
4005

    
4006
class LUQueryFields(NoHooksLU):
4007
  """Query for resources/items of a certain kind.
4008

4009
  """
4010
  # pylint: disable-msg=W0142
4011
  _OP_PARAMS = [
4012
    ("what", ht.NoDefault, ht.TElemOf(constants.QR_OP_QUERY)),
4013
    ("fields", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
4014
    ]
4015
  REQ_BGL = False
4016

    
4017
  def CheckArguments(self):
4018
    self.qcls = _GetQueryImplementation(self.op.what)
4019

    
4020
  def ExpandNames(self):
4021
    self.needed_locks = {}
4022

    
4023
  def Exec(self, feedback_fn):
4024
    return self.qcls.FieldsQuery(self.op.fields)
4025

    
4026

    
4027
class LUModifyNodeStorage(NoHooksLU):
4028
  """Logical unit for modifying a storage volume on a node.
4029

4030
  """
4031
  _OP_PARAMS = [
4032
    _PNodeName,
4033
    ("storage_type", ht.NoDefault, _CheckStorageType),
4034
    ("name", ht.NoDefault, ht.TNonEmptyString),
4035
    ("changes", ht.NoDefault, ht.TDict),
4036
    ]
4037
  REQ_BGL = False
4038

    
4039
  def CheckArguments(self):
4040
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4041

    
4042
    storage_type = self.op.storage_type
4043

    
4044
    try:
4045
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4046
    except KeyError:
4047
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4048
                                 " modified" % storage_type,
4049
                                 errors.ECODE_INVAL)
4050

    
4051
    diff = set(self.op.changes.keys()) - modifiable
4052
    if diff:
4053
      raise errors.OpPrereqError("The following fields can not be modified for"
4054
                                 " storage units of type '%s': %r" %
4055
                                 (storage_type, list(diff)),
4056
                                 errors.ECODE_INVAL)
4057

    
4058
  def ExpandNames(self):
4059
    self.needed_locks = {
4060
      locking.LEVEL_NODE: self.op.node_name,
4061
      }
4062

    
4063
  def Exec(self, feedback_fn):
4064
    """Computes the list of nodes and their attributes.
4065

4066
    """
4067
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4068
    result = self.rpc.call_storage_modify(self.op.node_name,
4069
                                          self.op.storage_type, st_args,
4070
                                          self.op.name, self.op.changes)
4071
    result.Raise("Failed to modify storage unit '%s' on %s" %
4072
                 (self.op.name, self.op.node_name))
4073

    
4074

    
4075
class LUAddNode(LogicalUnit):
4076
  """Logical unit for adding node to the cluster.
4077

4078
  """
4079
  HPATH = "node-add"
4080
  HTYPE = constants.HTYPE_NODE
4081
  _OP_PARAMS = [
4082
    _PNodeName,
4083
    ("primary_ip", None, ht.NoType),
4084
    ("secondary_ip", None, ht.TMaybeString),
4085
    ("readd", False, ht.TBool),
4086
    ("group", None, ht.TMaybeString),
4087
    ("master_capable", None, ht.TMaybeBool),
4088
    ("vm_capable", None, ht.TMaybeBool),
4089
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
4090
    ]
4091
  _NFLAGS = ["master_capable", "vm_capable"]
4092

    
4093
  def CheckArguments(self):
4094
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4095
    # validate/normalize the node name
4096
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4097
                                         family=self.primary_ip_family)
4098
    self.op.node_name = self.hostname.name
4099
    if self.op.readd and self.op.group:
4100
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4101
                                 " being readded", errors.ECODE_INVAL)
4102

    
4103
  def BuildHooksEnv(self):
4104
    """Build hooks env.
4105

4106
    This will run on all nodes before, and on all nodes + the new node after.
4107

4108
    """
4109
    env = {
4110
      "OP_TARGET": self.op.node_name,
4111
      "NODE_NAME": self.op.node_name,
4112
      "NODE_PIP": self.op.primary_ip,
4113
      "NODE_SIP": self.op.secondary_ip,
4114
      "MASTER_CAPABLE": str(self.op.master_capable),
4115
      "VM_CAPABLE": str(self.op.vm_capable),
4116
      }
4117
    nodes_0 = self.cfg.GetNodeList()
4118
    nodes_1 = nodes_0 + [self.op.node_name, ]
4119
    return env, nodes_0, nodes_1
4120

    
4121
  def CheckPrereq(self):
4122
    """Check prerequisites.
4123

4124
    This checks:
4125
     - the new node is not already in the config
4126
     - it is resolvable
4127
     - its parameters (single/dual homed) matches the cluster
4128

4129
    Any errors are signaled by raising errors.OpPrereqError.
4130

4131
    """
4132
    cfg = self.cfg
4133
    hostname = self.hostname
4134
    node = hostname.name
4135
    primary_ip = self.op.primary_ip = hostname.ip
4136
    if self.op.secondary_ip is None:
4137
      if self.primary_ip_family == netutils.IP6Address.family:
4138
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4139
                                   " IPv4 address must be given as secondary",
4140
                                   errors.ECODE_INVAL)
4141
      self.op.secondary_ip = primary_ip
4142

    
4143
    secondary_ip = self.op.secondary_ip
4144
    if not netutils.IP4Address.IsValid(secondary_ip):
4145
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4146
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4147

    
4148
    node_list = cfg.GetNodeList()
4149
    if not self.op.readd and node in node_list:
4150
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4151
                                 node, errors.ECODE_EXISTS)
4152
    elif self.op.readd and node not in node_list:
4153
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4154
                                 errors.ECODE_NOENT)
4155

    
4156
    self.changed_primary_ip = False
4157

    
4158
    for existing_node_name in node_list:
4159
      existing_node = cfg.GetNodeInfo(existing_node_name)
4160

    
4161
      if self.op.readd and node == existing_node_name:
4162
        if existing_node.secondary_ip != secondary_ip:
4163
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4164
                                     " address configuration as before",
4165
                                     errors.ECODE_INVAL)
4166
        if existing_node.primary_ip != primary_ip:
4167
          self.changed_primary_ip = True
4168

    
4169
        continue
4170

    
4171
      if (existing_node.primary_ip == primary_ip or
4172
          existing_node.secondary_ip == primary_ip or
4173
          existing_node.primary_ip == secondary_ip or
4174
          existing_node.secondary_ip == secondary_ip):
4175
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4176
                                   " existing node %s" % existing_node.name,
4177
                                   errors.ECODE_NOTUNIQUE)
4178

    
4179
    # After this 'if' block, None is no longer a valid value for the
4180
    # _capable op attributes
4181
    if self.op.readd:
4182
      old_node = self.cfg.GetNodeInfo(node)
4183
      assert old_node is not None, "Can't retrieve locked node %s" % node
4184
      for attr in self._NFLAGS:
4185
        if getattr(self.op, attr) is None:
4186
          setattr(self.op, attr, getattr(old_node, attr))
4187
    else:
4188
      for attr in self._NFLAGS:
4189
        if getattr(self.op, attr) is None:
4190
          setattr(self.op, attr, True)
4191

    
4192
    if self.op.readd and not self.op.vm_capable:
4193
      pri, sec = cfg.GetNodeInstances(node)
4194
      if pri or sec:
4195
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4196
                                   " flag set to false, but it already holds"
4197
                                   " instances" % node,
4198
                                   errors.ECODE_STATE)
4199

    
4200
    # check that the type of the node (single versus dual homed) is the
4201
    # same as for the master
4202
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4203
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4204
    newbie_singlehomed = secondary_ip == primary_ip
4205
    if master_singlehomed != newbie_singlehomed:
4206
      if master_singlehomed:
4207
        raise errors.OpPrereqError("The master has no secondary ip but the"
4208
                                   " new node has one",
4209
                                   errors.ECODE_INVAL)
4210
      else:
4211
        raise errors.OpPrereqError("The master has a secondary ip but the"
4212
                                   " new node doesn't have one",
4213
                                   errors.ECODE_INVAL)
4214

    
4215
    # checks reachability
4216
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4217
      raise errors.OpPrereqError("Node not reachable by ping",
4218
                                 errors.ECODE_ENVIRON)
4219

    
4220
    if not newbie_singlehomed:
4221
      # check reachability from my secondary ip to newbie's secondary ip
4222
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4223
                           source=myself.secondary_ip):
4224
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4225
                                   " based ping to node daemon port",
4226
                                   errors.ECODE_ENVIRON)
4227

    
4228
    if self.op.readd:
4229
      exceptions = [node]
4230
    else:
4231
      exceptions = []
4232

    
4233
    if self.op.master_capable:
4234
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4235
    else:
4236
      self.master_candidate = False
4237

    
4238
    if self.op.readd:
4239
      self.new_node = old_node
4240
    else:
4241
      node_group = cfg.LookupNodeGroup(self.op.group)
4242
      self.new_node = objects.Node(name=node,
4243
                                   primary_ip=primary_ip,
4244
                                   secondary_ip=secondary_ip,
4245
                                   master_candidate=self.master_candidate,
4246
                                   offline=False, drained=False,
4247
                                   group=node_group)
4248

    
4249
    if self.op.ndparams:
4250
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4251

    
4252
  def Exec(self, feedback_fn):
4253
    """Adds the new node to the cluster.
4254

4255
    """
4256
    new_node = self.new_node
4257
    node = new_node.name
4258

    
4259
    # We adding a new node so we assume it's powered
4260
    new_node.powered = True
4261

    
4262
    # for re-adds, reset the offline/drained/master-candidate flags;
4263
    # we need to reset here, otherwise offline would prevent RPC calls
4264
    # later in the procedure; this also means that if the re-add
4265
    # fails, we are left with a non-offlined, broken node
4266
    if self.op.readd:
4267
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4268
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4269
      # if we demote the node, we do cleanup later in the procedure
4270
      new_node.master_candidate = self.master_candidate
4271
      if self.changed_primary_ip:
4272
        new_node.primary_ip = self.op.primary_ip
4273

    
4274
    # copy the master/vm_capable flags
4275
    for attr in self._NFLAGS:
4276
      setattr(new_node, attr, getattr(self.op, attr))
4277

    
4278
    # notify the user about any possible mc promotion
4279
    if new_node.master_candidate:
4280
      self.LogInfo("Node will be a master candidate")
4281

    
4282
    if self.op.ndparams:
4283
      new_node.ndparams = self.op.ndparams
4284
    else:
4285
      new_node.ndparams = {}
4286

    
4287
    # check connectivity
4288
    result = self.rpc.call_version([node])[node]
4289
    result.Raise("Can't get version information from node %s" % node)
4290
    if constants.PROTOCOL_VERSION == result.payload:
4291
      logging.info("Communication to node %s fine, sw version %s match",
4292
                   node, result.payload)
4293
    else:
4294
      raise errors.OpExecError("Version mismatch master version %s,"
4295
                               " node version %s" %
4296
                               (constants.PROTOCOL_VERSION, result.payload))
4297

    
4298
    # Add node to our /etc/hosts, and add key to known_hosts
4299
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4300
      master_node = self.cfg.GetMasterNode()
4301
      result = self.rpc.call_etc_hosts_modify(master_node,
4302
                                              constants.ETC_HOSTS_ADD,
4303
                                              self.hostname.name,
4304
                                              self.hostname.ip)
4305
      result.Raise("Can't update hosts file with new host data")
4306

    
4307
    if new_node.secondary_ip != new_node.primary_ip:
4308
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4309
                               False)
4310

    
4311
    node_verify_list = [self.cfg.GetMasterNode()]
4312
    node_verify_param = {
4313
      constants.NV_NODELIST: [node],
4314
      # TODO: do a node-net-test as well?
4315
    }
4316

    
4317
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4318
                                       self.cfg.GetClusterName())
4319
    for verifier in node_verify_list:
4320
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4321
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4322
      if nl_payload:
4323
        for failed in nl_payload:
4324
          feedback_fn("ssh/hostname verification failed"
4325
                      " (checking from %s): %s" %
4326
                      (verifier, nl_payload[failed]))
4327
        raise errors.OpExecError("ssh/hostname verification failed.")
4328

    
4329
    if self.op.readd:
4330
      _RedistributeAncillaryFiles(self)
4331
      self.context.ReaddNode(new_node)
4332
      # make sure we redistribute the config
4333
      self.cfg.Update(new_node, feedback_fn)
4334
      # and make sure the new node will not have old files around
4335
      if not new_node.master_candidate:
4336
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4337
        msg = result.fail_msg
4338
        if msg:
4339
          self.LogWarning("Node failed to demote itself from master"
4340
                          " candidate status: %s" % msg)
4341
    else:
4342
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4343
                                  additional_vm=self.op.vm_capable)
4344
      self.context.AddNode(new_node, self.proc.GetECId())
4345

    
4346

    
4347
class LUSetNodeParams(LogicalUnit):
4348
  """Modifies the parameters of a node.
4349

4350
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4351
      to the node role (as _ROLE_*)
4352
  @cvar _R2F: a dictionary from node role to tuples of flags
4353
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4354

4355
  """
4356
  HPATH = "node-modify"
4357
  HTYPE = constants.HTYPE_NODE
4358
  _OP_PARAMS = [
4359
    _PNodeName,
4360
    ("master_candidate", None, ht.TMaybeBool),
4361
    ("offline", None, ht.TMaybeBool),
4362
    ("drained", None, ht.TMaybeBool),
4363
    ("auto_promote", False, ht.TBool),
4364
    ("master_capable", None, ht.TMaybeBool),
4365
    ("vm_capable", None, ht.TMaybeBool),
4366
    ("secondary_ip", None, ht.TMaybeString),
4367
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
4368
    ("powered", None, ht.TMaybeBool),
4369
    _PForce,
4370
    ]
4371
  REQ_BGL = False
4372
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4373
  _F2R = {
4374
    (True, False, False): _ROLE_CANDIDATE,
4375
    (False, True, False): _ROLE_DRAINED,
4376
    (False, False, True): _ROLE_OFFLINE,
4377
    (False, False, False): _ROLE_REGULAR,
4378
    }
4379
  _R2F = dict((v, k) for k, v in _F2R.items())
4380
  _FLAGS = ["master_candidate", "drained", "offline"]
4381

    
4382
  def CheckArguments(self):
4383
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4384
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4385
                self.op.master_capable, self.op.vm_capable,
4386
                self.op.secondary_ip, self.op.ndparams]
4387
    if all_mods.count(None) == len(all_mods):
4388
      raise errors.OpPrereqError("Please pass at least one modification",
4389
                                 errors.ECODE_INVAL)
4390
    if all_mods.count(True) > 1:
4391
      raise errors.OpPrereqError("Can't set the node into more than one"
4392
                                 " state at the same time",
4393
                                 errors.ECODE_INVAL)
4394

    
4395
    # Boolean value that tells us whether we might be demoting from MC
4396
    self.might_demote = (self.op.master_candidate == False or
4397
                         self.op.offline == True or
4398
                         self.op.drained == True or
4399
                         self.op.master_capable == False)
4400

    
4401
    if self.op.secondary_ip:
4402
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4403
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4404
                                   " address" % self.op.secondary_ip,
4405
                                   errors.ECODE_INVAL)
4406

    
4407
    self.lock_all = self.op.auto_promote and self.might_demote
4408
    self.lock_instances = self.op.secondary_ip is not None
4409

    
4410
  def ExpandNames(self):
4411
    if self.lock_all:
4412
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4413
    else:
4414
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4415

    
4416
    if self.lock_instances:
4417
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4418

    
4419
  def DeclareLocks(self, level):
4420
    # If we have locked all instances, before waiting to lock nodes, release
4421
    # all the ones living on nodes unrelated to the current operation.
4422
    if level == locking.LEVEL_NODE and self.lock_instances:
4423
      instances_release = []
4424
      instances_keep = []
4425
      self.affected_instances = []
4426
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4427
        for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4428
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4429
          i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4430
          if i_mirrored and self.op.node_name in instance.all_nodes:
4431
            instances_keep.append(instance_name)
4432
            self.affected_instances.append(instance)
4433
          else:
4434
            instances_release.append(instance_name)
4435
        if instances_release:
4436
          self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4437
          self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4438

    
4439
  def BuildHooksEnv(self):
4440
    """Build hooks env.
4441

4442
    This runs on the master node.
4443

4444
    """
4445
    env = {
4446
      "OP_TARGET": self.op.node_name,
4447
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4448
      "OFFLINE": str(self.op.offline),
4449
      "DRAINED": str(self.op.drained),
4450
      "MASTER_CAPABLE": str(self.op.master_capable),
4451
      "VM_CAPABLE": str(self.op.vm_capable),
4452
      }
4453
    nl = [self.cfg.GetMasterNode(),
4454
          self.op.node_name]
4455
    return env, nl, nl
4456

    
4457
  def CheckPrereq(self):
4458
    """Check prerequisites.
4459

4460
    This only checks the instance list against the existing names.
4461

4462
    """
4463
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4464

    
4465
    if (self.op.master_candidate is not None or
4466
        self.op.drained is not None or
4467
        self.op.offline is not None):
4468
      # we can't change the master's node flags
4469
      if self.op.node_name == self.cfg.GetMasterNode():
4470
        raise errors.OpPrereqError("The master role can be changed"
4471
                                   " only via master-failover",
4472
                                   errors.ECODE_INVAL)
4473

    
4474
    if self.op.master_candidate and not node.master_capable:
4475
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4476
                                 " it a master candidate" % node.name,
4477
                                 errors.ECODE_STATE)
4478

    
4479
    if self.op.vm_capable == False:
4480
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4481
      if ipri or isec:
4482
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4483
                                   " the vm_capable flag" % node.name,
4484
                                   errors.ECODE_STATE)
4485

    
4486
    if node.master_candidate and self.might_demote and not self.lock_all:
4487
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
4488
      # check if after removing the current node, we're missing master
4489
      # candidates
4490
      (mc_remaining, mc_should, _) = \
4491
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4492
      if mc_remaining < mc_should:
4493
        raise errors.OpPrereqError("Not enough master candidates, please"
4494
                                   " pass auto_promote to allow promotion",
4495
                                   errors.ECODE_STATE)
4496

    
4497
    self.old_flags = old_flags = (node.master_candidate,
4498
                                  node.drained, node.offline)
4499
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4500
    self.old_role = old_role = self._F2R[old_flags]
4501

    
4502
    # Check for ineffective changes
4503
    for attr in self._FLAGS:
4504
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4505
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4506
        setattr(self.op, attr, None)
4507

    
4508
    # Past this point, any flag change to False means a transition
4509
    # away from the respective state, as only real changes are kept
4510

    
4511
    # TODO: We might query the real power state if it supports OOB
4512
    if _SupportsOob(self.cfg, node):
4513
      if self.op.offline is False and not (node.powered or
4514
                                           self.op.powered == True):
4515
        raise errors.OpPrereqError(("Please power on node %s first before you"
4516
                                    " can reset offline state") %
4517
                                   self.op.node_name)
4518
    elif self.op.powered is not None:
4519
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4520
                                  " which does not support out-of-band"
4521
                                  " handling") % self.op.node_name)
4522

    
4523
    # If we're being deofflined/drained, we'll MC ourself if needed
4524
    if (self.op.drained == False or self.op.offline == False or
4525
        (self.op.master_capable and not node.master_capable)):
4526
      if _DecideSelfPromotion(self):
4527
        self.op.master_candidate = True
4528
        self.LogInfo("Auto-promoting node to master candidate")
4529

    
4530
    # If we're no longer master capable, we'll demote ourselves from MC
4531
    if self.op.master_capable == False and node.master_candidate:
4532
      self.LogInfo("Demoting from master candidate")
4533
      self.op.master_candidate = False
4534

    
4535
    # Compute new role
4536
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4537
    if self.op.master_candidate:
4538
      new_role = self._ROLE_CANDIDATE
4539
    elif self.op.drained:
4540
      new_role = self._ROLE_DRAINED
4541
    elif self.op.offline:
4542
      new_role = self._ROLE_OFFLINE
4543
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4544
      # False is still in new flags, which means we're un-setting (the
4545
      # only) True flag
4546
      new_role = self._ROLE_REGULAR
4547
    else: # no new flags, nothing, keep old role
4548
      new_role = old_role
4549

    
4550
    self.new_role = new_role
4551

    
4552
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4553
      # Trying to transition out of offline status
4554
      result = self.rpc.call_version([node.name])[node.name]
4555
      if result.fail_msg:
4556
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4557
                                   " to report its version: %s" %
4558
                                   (node.name, result.fail_msg),
4559
                                   errors.ECODE_STATE)
4560
      else:
4561
        self.LogWarning("Transitioning node from offline to online state"
4562
                        " without using re-add. Please make sure the node"
4563
                        " is healthy!")
4564

    
4565
    if self.op.secondary_ip:
4566
      # Ok even without locking, because this can't be changed by any LU
4567
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4568
      master_singlehomed = master.secondary_ip == master.primary_ip
4569
      if master_singlehomed and self.op.secondary_ip:
4570
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4571
                                   " homed cluster", errors.ECODE_INVAL)
4572

    
4573
      if node.offline:
4574
        if self.affected_instances:
4575
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4576
                                     " node has instances (%s) configured"
4577
                                     " to use it" % self.affected_instances)
4578
      else:
4579
        # On online nodes, check that no instances are running, and that
4580
        # the node has the new ip and we can reach it.
4581
        for instance in self.affected_instances:
4582
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4583

    
4584
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4585
        if master.name != node.name:
4586
          # check reachability from master secondary ip to new secondary ip
4587
          if not netutils.TcpPing(self.op.secondary_ip,
4588
                                  constants.DEFAULT_NODED_PORT,
4589
                                  source=master.secondary_ip):
4590
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4591
                                       " based ping to node daemon port",
4592
                                       errors.ECODE_ENVIRON)
4593

    
4594
    if self.op.ndparams:
4595
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4596
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4597
      self.new_ndparams = new_ndparams
4598

    
4599
  def Exec(self, feedback_fn):
4600
    """Modifies a node.
4601

4602
    """
4603
    node = self.node
4604
    old_role = self.old_role
4605
    new_role = self.new_role
4606

    
4607
    result = []
4608

    
4609
    if self.op.ndparams:
4610
      node.ndparams = self.new_ndparams
4611

    
4612
    if self.op.powered is not None:
4613
      node.powered = self.op.powered
4614

    
4615
    for attr in ["master_capable", "vm_capable"]:
4616
      val = getattr(self.op, attr)
4617
      if val is not None:
4618
        setattr(node, attr, val)
4619
        result.append((attr, str(val)))
4620

    
4621
    if new_role != old_role:
4622
      # Tell the node to demote itself, if no longer MC and not offline
4623
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4624
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4625
        if msg:
4626
          self.LogWarning("Node failed to demote itself: %s", msg)
4627

    
4628
      new_flags = self._R2F[new_role]
4629
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4630
        if of != nf:
4631
          result.append((desc, str(nf)))
4632
      (node.master_candidate, node.drained, node.offline) = new_flags
4633

    
4634
      # we locked all nodes, we adjust the CP before updating this node
4635
      if self.lock_all:
4636
        _AdjustCandidatePool(self, [node.name])
4637

    
4638
    if self.op.secondary_ip:
4639
      node.secondary_ip = self.op.secondary_ip
4640
      result.append(("secondary_ip", self.op.secondary_ip))
4641

    
4642
    # this will trigger configuration file update, if needed
4643
    self.cfg.Update(node, feedback_fn)
4644

    
4645
    # this will trigger job queue propagation or cleanup if the mc
4646
    # flag changed
4647
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4648
      self.context.ReaddNode(node)
4649

    
4650
    return result
4651

    
4652

    
4653
class LUPowercycleNode(NoHooksLU):
4654
  """Powercycles a node.
4655

4656
  """
4657
  _OP_PARAMS = [
4658
    _PNodeName,
4659
    _PForce,
4660
    ]
4661
  REQ_BGL = False
4662

    
4663
  def CheckArguments(self):
4664
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4665
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4666
      raise errors.OpPrereqError("The node is the master and the force"
4667
                                 " parameter was not set",
4668
                                 errors.ECODE_INVAL)
4669

    
4670
  def ExpandNames(self):
4671
    """Locking for PowercycleNode.
4672

4673
    This is a last-resort option and shouldn't block on other
4674
    jobs. Therefore, we grab no locks.
4675

4676
    """
4677
    self.needed_locks = {}
4678

    
4679
  def Exec(self, feedback_fn):
4680
    """Reboots a node.
4681

4682
    """
4683
    result = self.rpc.call_node_powercycle(self.op.node_name,
4684
                                           self.cfg.GetHypervisorType())
4685
    result.Raise("Failed to schedule the reboot")
4686
    return result.payload
4687

    
4688

    
4689
class LUQueryClusterInfo(NoHooksLU):
4690
  """Query cluster configuration.
4691

4692
  """
4693
  REQ_BGL = False
4694

    
4695
  def ExpandNames(self):
4696
    self.needed_locks = {}
4697

    
4698
  def Exec(self, feedback_fn):
4699
    """Return cluster config.
4700

4701
    """
4702
    cluster = self.cfg.GetClusterInfo()
4703
    os_hvp = {}
4704

    
4705
    # Filter just for enabled hypervisors
4706
    for os_name, hv_dict in cluster.os_hvp.items():
4707
      os_hvp[os_name] = {}
4708
      for hv_name, hv_params in hv_dict.items():
4709
        if hv_name in cluster.enabled_hypervisors:
4710
          os_hvp[os_name][hv_name] = hv_params
4711

    
4712
    # Convert ip_family to ip_version
4713
    primary_ip_version = constants.IP4_VERSION
4714
    if cluster.primary_ip_family == netutils.IP6Address.family:
4715
      primary_ip_version = constants.IP6_VERSION
4716

    
4717
    result = {
4718
      "software_version": constants.RELEASE_VERSION,
4719
      "protocol_version": constants.PROTOCOL_VERSION,
4720
      "config_version": constants.CONFIG_VERSION,
4721
      "os_api_version": max(constants.OS_API_VERSIONS),
4722
      "export_version": constants.EXPORT_VERSION,
4723
      "architecture": (platform.architecture()[0], platform.machine()),
4724
      "name": cluster.cluster_name,
4725
      "master": cluster.master_node,
4726
      "default_hypervisor": cluster.enabled_hypervisors[0],
4727
      "enabled_hypervisors": cluster.enabled_hypervisors,
4728
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4729
                        for hypervisor_name in cluster.enabled_hypervisors]),
4730
      "os_hvp": os_hvp,
4731
      "beparams": cluster.beparams,
4732
      "osparams": cluster.osparams,
4733
      "nicparams": cluster.nicparams,
4734
      "candidate_pool_size": cluster.candidate_pool_size,
4735
      "master_netdev": cluster.master_netdev,
4736
      "volume_group_name": cluster.volume_group_name,
4737
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4738
      "file_storage_dir": cluster.file_storage_dir,
4739
      "maintain_node_health": cluster.maintain_node_health,
4740
      "ctime": cluster.ctime,
4741
      "mtime": cluster.mtime,
4742
      "uuid": cluster.uuid,
4743
      "tags": list(cluster.GetTags()),
4744
      "uid_pool": cluster.uid_pool,
4745
      "default_iallocator": cluster.default_iallocator,
4746
      "reserved_lvs": cluster.reserved_lvs,
4747
      "primary_ip_version": primary_ip_version,
4748
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4749
      }
4750

    
4751
    return result
4752

    
4753

    
4754
class LUQueryConfigValues(NoHooksLU):
4755
  """Return configuration values.
4756

4757
  """
4758
  _OP_PARAMS = [_POutputFields]
4759
  REQ_BGL = False
4760
  _FIELDS_DYNAMIC = utils.FieldSet()
4761
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4762
                                  "watcher_pause", "volume_group_name")
4763

    
4764
  def CheckArguments(self):
4765
    _CheckOutputFields(static=self._FIELDS_STATIC,
4766
                       dynamic=self._FIELDS_DYNAMIC,
4767
                       selected=self.op.output_fields)
4768

    
4769
  def ExpandNames(self):
4770
    self.needed_locks = {}
4771

    
4772
  def Exec(self, feedback_fn):
4773
    """Dump a representation of the cluster config to the standard output.
4774

4775
    """
4776
    values = []
4777
    for field in self.op.output_fields:
4778
      if field == "cluster_name":
4779
        entry = self.cfg.GetClusterName()
4780
      elif field == "master_node":
4781
        entry = self.cfg.GetMasterNode()
4782
      elif field == "drain_flag":
4783
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4784
      elif field == "watcher_pause":
4785
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4786
      elif field == "volume_group_name":
4787
        entry = self.cfg.GetVGName()
4788
      else:
4789
        raise errors.ParameterError(field)
4790
      values.append(entry)
4791
    return values
4792

    
4793

    
4794
class LUActivateInstanceDisks(NoHooksLU):
4795
  """Bring up an instance's disks.
4796

4797
  """
4798
  _OP_PARAMS = [
4799
    _PInstanceName,
4800
    ("ignore_size", False, ht.TBool),
4801
    ]
4802
  REQ_BGL = False
4803

    
4804
  def ExpandNames(self):
4805
    self._ExpandAndLockInstance()
4806
    self.needed_locks[locking.LEVEL_NODE] = []
4807
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4808

    
4809
  def DeclareLocks(self, level):
4810
    if level == locking.LEVEL_NODE:
4811
      self._LockInstancesNodes()
4812

    
4813
  def CheckPrereq(self):
4814
    """Check prerequisites.
4815

4816
    This checks that the instance is in the cluster.
4817

4818
    """
4819
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4820
    assert self.instance is not None, \
4821
      "Cannot retrieve locked instance %s" % self.op.instance_name
4822
    _CheckNodeOnline(self, self.instance.primary_node)
4823

    
4824
  def Exec(self, feedback_fn):
4825
    """Activate the disks.
4826

4827
    """
4828
    disks_ok, disks_info = \
4829
              _AssembleInstanceDisks(self, self.instance,
4830
                                     ignore_size=self.op.ignore_size)
4831
    if not disks_ok:
4832
      raise errors.OpExecError("Cannot activate block devices")
4833

    
4834
    return disks_info
4835

    
4836

    
4837
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4838
                           ignore_size=False):
4839
  """Prepare the block devices for an instance.
4840

4841
  This sets up the block devices on all nodes.
4842

4843
  @type lu: L{LogicalUnit}
4844
  @param lu: the logical unit on whose behalf we execute
4845
  @type instance: L{objects.Instance}
4846
  @param instance: the instance for whose disks we assemble
4847
  @type disks: list of L{objects.Disk} or None
4848
  @param disks: which disks to assemble (or all, if None)
4849
  @type ignore_secondaries: boolean
4850
  @param ignore_secondaries: if true, errors on secondary nodes
4851
      won't result in an error return from the function
4852
  @type ignore_size: boolean
4853
  @param ignore_size: if true, the current known size of the disk
4854
      will not be used during the disk activation, useful for cases
4855
      when the size is wrong
4856
  @return: False if the operation failed, otherwise a list of
4857
      (host, instance_visible_name, node_visible_name)
4858
      with the mapping from node devices to instance devices
4859

4860
  """
4861
  device_info = []
4862
  disks_ok = True
4863
  iname = instance.name
4864
  disks = _ExpandCheckDisks(instance, disks)
4865

    
4866
  # With the two passes mechanism we try to reduce the window of
4867
  # opportunity for the race condition of switching DRBD to primary
4868
  # before handshaking occured, but we do not eliminate it
4869

    
4870
  # The proper fix would be to wait (with some limits) until the
4871
  # connection has been made and drbd transitions from WFConnection
4872
  # into any other network-connected state (Connected, SyncTarget,
4873
  # SyncSource, etc.)
4874

    
4875
  # 1st pass, assemble on all nodes in secondary mode
4876
  for inst_disk in disks:
4877
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4878
      if ignore_size:
4879
        node_disk = node_disk.Copy()
4880
        node_disk.UnsetSize()
4881
      lu.cfg.SetDiskID(node_disk, node)
4882
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4883
      msg = result.fail_msg
4884
      if msg:
4885
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4886
                           " (is_primary=False, pass=1): %s",
4887
                           inst_disk.iv_name, node, msg)
4888
        if not ignore_secondaries:
4889
          disks_ok = False
4890

    
4891
  # FIXME: race condition on drbd migration to primary
4892

    
4893
  # 2nd pass, do only the primary node
4894
  for inst_disk in disks:
4895
    dev_path = None
4896

    
4897
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4898
      if node != instance.primary_node:
4899
        continue
4900
      if ignore_size:
4901
        node_disk = node_disk.Copy()
4902
        node_disk.UnsetSize()
4903
      lu.cfg.SetDiskID(node_disk, node)
4904
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4905
      msg = result.fail_msg
4906
      if msg:
4907
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4908
                           " (is_primary=True, pass=2): %s",
4909
                           inst_disk.iv_name, node, msg)
4910
        disks_ok = False
4911
      else:
4912
        dev_path = result.payload
4913

    
4914
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4915

    
4916
  # leave the disks configured for the primary node
4917
  # this is a workaround that would be fixed better by
4918
  # improving the logical/physical id handling
4919
  for disk in disks:
4920
    lu.cfg.SetDiskID(disk, instance.primary_node)
4921

    
4922
  return disks_ok, device_info
4923

    
4924

    
4925
def _StartInstanceDisks(lu, instance, force):
4926
  """Start the disks of an instance.
4927

4928
  """
4929
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4930
                                           ignore_secondaries=force)
4931
  if not disks_ok:
4932
    _ShutdownInstanceDisks(lu, instance)
4933
    if force is not None and not force:
4934
      lu.proc.LogWarning("", hint="If the message above refers to a"
4935
                         " secondary node,"
4936
                         " you can retry the operation using '--force'.")
4937
    raise errors.OpExecError("Disk consistency error")
4938

    
4939

    
4940
class LUDeactivateInstanceDisks(NoHooksLU):
4941
  """Shutdown an instance's disks.
4942

4943
  """
4944
  _OP_PARAMS = [
4945
    _PInstanceName,
4946
    ]
4947
  REQ_BGL = False
4948

    
4949
  def ExpandNames(self):
4950
    self._ExpandAndLockInstance()
4951
    self.needed_locks[locking.LEVEL_NODE] = []
4952
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4953

    
4954
  def DeclareLocks(self, level):
4955
    if level == locking.LEVEL_NODE:
4956
      self._LockInstancesNodes()
4957

    
4958
  def CheckPrereq(self):
4959
    """Check prerequisites.
4960

4961
    This checks that the instance is in the cluster.
4962

4963
    """
4964
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4965
    assert self.instance is not None, \
4966
      "Cannot retrieve locked instance %s" % self.op.instance_name
4967

    
4968
  def Exec(self, feedback_fn):
4969
    """Deactivate the disks
4970

4971
    """
4972
    instance = self.instance
4973
    _SafeShutdownInstanceDisks(self, instance)
4974

    
4975

    
4976
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4977
  """Shutdown block devices of an instance.
4978

4979
  This function checks if an instance is running, before calling
4980
  _ShutdownInstanceDisks.
4981

4982
  """
4983
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4984
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4985

    
4986

    
4987
def _ExpandCheckDisks(instance, disks):
4988
  """Return the instance disks selected by the disks list
4989

4990
  @type disks: list of L{objects.Disk} or None
4991
  @param disks: selected disks
4992
  @rtype: list of L{objects.Disk}
4993
  @return: selected instance disks to act on
4994

4995
  """
4996
  if disks is None:
4997
    return instance.disks
4998
  else:
4999
    if not set(disks).issubset(instance.disks):
5000
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5001
                                   " target instance")
5002
    return disks
5003

    
5004

    
5005
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5006
  """Shutdown block devices of an instance.
5007

5008
  This does the shutdown on all nodes of the instance.
5009

5010
  If the ignore_primary is false, errors on the primary node are
5011
  ignored.
5012

5013
  """
5014
  all_result = True
5015
  disks = _ExpandCheckDisks(instance, disks)
5016

    
5017
  for disk in disks:
5018
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5019
      lu.cfg.SetDiskID(top_disk, node)
5020
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5021
      msg = result.fail_msg
5022
      if msg:
5023
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5024
                      disk.iv_name, node, msg)
5025
        if ((node == instance.primary_node and not ignore_primary) or
5026
            (node != instance.primary_node and not result.offline)):
5027
          all_result = False
5028
  return all_result
5029

    
5030

    
5031
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5032
  """Checks if a node has enough free memory.
5033

5034
  This function check if a given node has the needed amount of free
5035
  memory. In case the node has less memory or we cannot get the
5036
  information from the node, this function raise an OpPrereqError
5037
  exception.
5038

5039
  @type lu: C{LogicalUnit}
5040
  @param lu: a logical unit from which we get configuration data
5041
  @type node: C{str}
5042
  @param node: the node to check
5043
  @type reason: C{str}
5044
  @param reason: string to use in the error message
5045
  @type requested: C{int}
5046
  @param requested: the amount of memory in MiB to check for
5047
  @type hypervisor_name: C{str}
5048
  @param hypervisor_name: the hypervisor to ask for memory stats
5049
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5050
      we cannot check the node
5051

5052
  """
5053
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5054
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5055
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5056
  free_mem = nodeinfo[node].payload.get('memory_free', None)
5057
  if not isinstance(free_mem, int):
5058
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5059
                               " was '%s'" % (node, free_mem),
5060
                               errors.ECODE_ENVIRON)
5061
  if requested > free_mem:
5062
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5063
                               " needed %s MiB, available %s MiB" %
5064
                               (node, reason, requested, free_mem),
5065
                               errors.ECODE_NORES)
5066

    
5067

    
5068
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5069
  """Checks if nodes have enough free disk space in the all VGs.
5070

5071
  This function check if all given nodes have the needed amount of
5072
  free disk. In case any node has less disk or we cannot get the
5073
  information from the node, this function raise an OpPrereqError
5074
  exception.
5075

5076
  @type lu: C{LogicalUnit}
5077
  @param lu: a logical unit from which we get configuration data
5078
  @type nodenames: C{list}
5079
  @param nodenames: the list of node names to check
5080
  @type req_sizes: C{dict}
5081
  @param req_sizes: the hash of vg and corresponding amount of disk in
5082
      MiB to check for
5083
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5084
      or we cannot check the node
5085

5086
  """
5087
  if req_sizes is not None:
5088
    for vg, req_size in req_sizes.iteritems():
5089
      _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5090

    
5091

    
5092
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5093
  """Checks if nodes have enough free disk space in the specified VG.
5094

5095
  This function check if all given nodes have the needed amount of
5096
  free disk. In case any node has less disk or we cannot get the
5097
  information from the node, this function raise an OpPrereqError
5098
  exception.
5099

5100
  @type lu: C{LogicalUnit}
5101
  @param lu: a logical unit from which we get configuration data
5102
  @type nodenames: C{list}
5103
  @param nodenames: the list of node names to check
5104
  @type vg: C{str}
5105
  @param vg: the volume group to check
5106
  @type requested: C{int}
5107
  @param requested: the amount of disk in MiB to check for
5108
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5109
      or we cannot check the node
5110

5111
  """
5112
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5113
  for node in nodenames:
5114
    info = nodeinfo[node]
5115
    info.Raise("Cannot get current information from node %s" % node,
5116
               prereq=True, ecode=errors.ECODE_ENVIRON)
5117
    vg_free = info.payload.get("vg_free", None)
5118
    if not isinstance(vg_free, int):
5119
      raise errors.OpPrereqError("Can't compute free disk space on node"
5120
                                 " %s for vg %s, result was '%s'" %
5121
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5122
    if requested > vg_free:
5123
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5124
                                 " vg %s: required %d MiB, available %d MiB" %
5125
                                 (node, vg, requested, vg_free),
5126
                                 errors.ECODE_NORES)
5127

    
5128

    
5129
class LUStartupInstance(LogicalUnit):
5130
  """Starts an instance.
5131

5132
  """
5133
  HPATH = "instance-start"
5134
  HTYPE = constants.HTYPE_INSTANCE
5135
  _OP_PARAMS = [
5136
    _PInstanceName,
5137
    _PForce,
5138
    _PIgnoreOfflineNodes,
5139
    ("hvparams", ht.EmptyDict, ht.TDict),
5140
    ("beparams", ht.EmptyDict, ht.TDict),
5141
    ]
5142
  REQ_BGL = False
5143

    
5144
  def CheckArguments(self):
5145
    # extra beparams
5146
    if self.op.beparams:
5147
      # fill the beparams dict
5148
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5149

    
5150
  def ExpandNames(self):
5151
    self._ExpandAndLockInstance()
5152

    
5153
  def BuildHooksEnv(self):
5154
    """Build hooks env.
5155

5156
    This runs on master, primary and secondary nodes of the instance.
5157

5158
    """
5159
    env = {
5160
      "FORCE": self.op.force,
5161
      }
5162
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5163
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5164
    return env, nl, nl
5165

    
5166
  def CheckPrereq(self):
5167
    """Check prerequisites.
5168

5169
    This checks that the instance is in the cluster.
5170

5171
    """
5172
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5173
    assert self.instance is not None, \
5174
      "Cannot retrieve locked instance %s" % self.op.instance_name
5175

    
5176
    # extra hvparams
5177
    if self.op.hvparams:
5178
      # check hypervisor parameter syntax (locally)
5179
      cluster = self.cfg.GetClusterInfo()
5180
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5181
      filled_hvp = cluster.FillHV(instance)
5182
      filled_hvp.update(self.op.hvparams)
5183
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5184
      hv_type.CheckParameterSyntax(filled_hvp)
5185
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5186

    
5187
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5188

    
5189
    if self.primary_offline and self.op.ignore_offline_nodes:
5190
      self.proc.LogWarning("Ignoring offline primary node")
5191

    
5192
      if self.op.hvparams or self.op.beparams:
5193
        self.proc.LogWarning("Overridden parameters are ignored")
5194
    else:
5195
      _CheckNodeOnline(self, instance.primary_node)
5196

    
5197
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5198

    
5199
      # check bridges existence
5200
      _CheckInstanceBridgesExist(self, instance)
5201

    
5202
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5203
                                                instance.name,
5204
                                                instance.hypervisor)
5205
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5206
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5207
      if not remote_info.payload: # not running already
5208
        _CheckNodeFreeMemory(self, instance.primary_node,
5209
                             "starting instance %s" % instance.name,
5210
                             bep[constants.BE_MEMORY], instance.hypervisor)
5211

    
5212
  def Exec(self, feedback_fn):
5213
    """Start the instance.
5214

5215
    """
5216
    instance = self.instance
5217
    force = self.op.force
5218

    
5219
    self.cfg.MarkInstanceUp(instance.name)
5220

    
5221
    if self.primary_offline:
5222
      assert self.op.ignore_offline_nodes
5223
      self.proc.LogInfo("Primary node offline, marked instance as started")
5224
    else:
5225
      node_current = instance.primary_node
5226

    
5227
      _StartInstanceDisks(self, instance, force)
5228

    
5229
      result = self.rpc.call_instance_start(node_current, instance,
5230
                                            self.op.hvparams, self.op.beparams)
5231
      msg = result.fail_msg
5232
      if msg:
5233
        _ShutdownInstanceDisks(self, instance)
5234
        raise errors.OpExecError("Could not start instance: %s" % msg)
5235

    
5236

    
5237
class LURebootInstance(LogicalUnit):
5238
  """Reboot an instance.
5239

5240
  """
5241
  HPATH = "instance-reboot"
5242
  HTYPE = constants.HTYPE_INSTANCE
5243
  _OP_PARAMS = [
5244
    _PInstanceName,
5245
    ("ignore_secondaries", False, ht.TBool),
5246
    ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
5247
    _PShutdownTimeout,
5248
    ]
5249
  REQ_BGL = False
5250

    
5251
  def ExpandNames(self):
5252
    self._ExpandAndLockInstance()
5253

    
5254
  def BuildHooksEnv(self):
5255
    """Build hooks env.
5256

5257
    This runs on master, primary and secondary nodes of the instance.
5258

5259
    """
5260
    env = {
5261
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5262
      "REBOOT_TYPE": self.op.reboot_type,
5263
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5264
      }
5265
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5266
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5267
    return env, nl, nl
5268

    
5269
  def CheckPrereq(self):
5270
    """Check prerequisites.
5271

5272
    This checks that the instance is in the cluster.
5273

5274
    """
5275
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5276
    assert self.instance is not None, \
5277
      "Cannot retrieve locked instance %s" % self.op.instance_name
5278

    
5279
    _CheckNodeOnline(self, instance.primary_node)
5280

    
5281
    # check bridges existence
5282
    _CheckInstanceBridgesExist(self, instance)
5283

    
5284
  def Exec(self, feedback_fn):
5285
    """Reboot the instance.
5286

5287
    """
5288
    instance = self.instance
5289
    ignore_secondaries = self.op.ignore_secondaries
5290
    reboot_type = self.op.reboot_type
5291

    
5292
    node_current = instance.primary_node
5293

    
5294
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5295
                       constants.INSTANCE_REBOOT_HARD]:
5296
      for disk in instance.disks:
5297
        self.cfg.SetDiskID(disk, node_current)
5298
      result = self.rpc.call_instance_reboot(node_current, instance,
5299
                                             reboot_type,
5300
                                             self.op.shutdown_timeout)
5301
      result.Raise("Could not reboot instance")
5302
    else:
5303
      result = self.rpc.call_instance_shutdown(node_current, instance,
5304
                                               self.op.shutdown_timeout)
5305
      result.Raise("Could not shutdown instance for full reboot")
5306
      _ShutdownInstanceDisks(self, instance)
5307
      _StartInstanceDisks(self, instance, ignore_secondaries)
5308
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5309
      msg = result.fail_msg
5310
      if msg:
5311
        _ShutdownInstanceDisks(self, instance)
5312
        raise errors.OpExecError("Could not start instance for"
5313
                                 " full reboot: %s" % msg)
5314

    
5315
    self.cfg.MarkInstanceUp(instance.name)
5316

    
5317

    
5318
class LUShutdownInstance(LogicalUnit):
5319
  """Shutdown an instance.
5320

5321
  """
5322
  HPATH = "instance-stop"
5323
  HTYPE = constants.HTYPE_INSTANCE
5324
  _OP_PARAMS = [
5325
    _PInstanceName,
5326
    _PIgnoreOfflineNodes,
5327
    ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt),
5328
    ]
5329
  REQ_BGL = False
5330

    
5331
  def ExpandNames(self):
5332
    self._ExpandAndLockInstance()
5333

    
5334
  def BuildHooksEnv(self):
5335
    """Build hooks env.
5336

5337
    This runs on master, primary and secondary nodes of the instance.
5338

5339
    """
5340
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5341
    env["TIMEOUT"] = self.op.timeout
5342
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5343
    return env, nl, nl
5344

    
5345
  def CheckPrereq(self):
5346
    """Check prerequisites.
5347

5348
    This checks that the instance is in the cluster.
5349

5350
    """
5351
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5352
    assert self.instance is not None, \
5353
      "Cannot retrieve locked instance %s" % self.op.instance_name
5354

    
5355
    self.primary_offline = \
5356
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5357

    
5358
    if self.primary_offline and self.op.ignore_offline_nodes:
5359
      self.proc.LogWarning("Ignoring offline primary node")
5360
    else:
5361
      _CheckNodeOnline(self, self.instance.primary_node)
5362

    
5363
  def Exec(self, feedback_fn):
5364
    """Shutdown the instance.
5365

5366
    """
5367
    instance = self.instance
5368
    node_current = instance.primary_node
5369
    timeout = self.op.timeout
5370

    
5371
    self.cfg.MarkInstanceDown(instance.name)
5372

    
5373
    if self.primary_offline:
5374
      assert self.op.ignore_offline_nodes
5375
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5376
    else:
5377
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5378
      msg = result.fail_msg
5379
      if msg:
5380
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5381

    
5382
      _ShutdownInstanceDisks(self, instance)
5383

    
5384

    
5385
class LUReinstallInstance(LogicalUnit):
5386
  """Reinstall an instance.
5387

5388
  """
5389
  HPATH = "instance-reinstall"
5390
  HTYPE = constants.HTYPE_INSTANCE
5391
  _OP_PARAMS = [
5392
    _PInstanceName,
5393
    ("os_type", None, ht.TMaybeString),
5394
    ("force_variant", False, ht.TBool),
5395
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
5396
    ]
5397
  REQ_BGL = False
5398

    
5399
  def ExpandNames(self):
5400
    self._ExpandAndLockInstance()
5401

    
5402
  def BuildHooksEnv(self):
5403
    """Build hooks env.
5404

5405
    This runs on master, primary and secondary nodes of the instance.
5406

5407
    """
5408
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5409
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5410
    return env, nl, nl
5411

    
5412
  def CheckPrereq(self):
5413
    """Check prerequisites.
5414

5415
    This checks that the instance is in the cluster and is not running.
5416

5417
    """
5418
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5419
    assert instance is not None, \
5420
      "Cannot retrieve locked instance %s" % self.op.instance_name
5421
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5422
                     " offline, cannot reinstall")
5423
    for node in instance.secondary_nodes:
5424
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5425
                       " cannot reinstall")
5426

    
5427
    if instance.disk_template == constants.DT_DISKLESS:
5428
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5429
                                 self.op.instance_name,
5430
                                 errors.ECODE_INVAL)
5431
    _CheckInstanceDown(self, instance, "cannot reinstall")
5432

    
5433
    if self.op.os_type is not None:
5434
      # OS verification
5435
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5436
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5437
      instance_os = self.op.os_type
5438
    else:
5439
      instance_os = instance.os
5440

    
5441
    nodelist = list(instance.all_nodes)
5442

    
5443
    if self.op.osparams:
5444
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5445
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5446
      self.os_inst = i_osdict # the new dict (without defaults)
5447
    else:
5448
      self.os_inst = None
5449

    
5450
    self.instance = instance
5451

    
5452
  def Exec(self, feedback_fn):
5453
    """Reinstall the instance.
5454

5455
    """
5456
    inst = self.instance
5457

    
5458
    if self.op.os_type is not None:
5459
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5460
      inst.os = self.op.os_type
5461
      # Write to configuration
5462
      self.cfg.Update(inst, feedback_fn)
5463

    
5464
    _StartInstanceDisks(self, inst, None)
5465
    try:
5466
      feedback_fn("Running the instance OS create scripts...")
5467
      # FIXME: pass debug option from opcode to backend
5468
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5469
                                             self.op.debug_level,
5470
                                             osparams=self.os_inst)
5471
      result.Raise("Could not install OS for instance %s on node %s" %
5472
                   (inst.name, inst.primary_node))
5473
    finally:
5474
      _ShutdownInstanceDisks(self, inst)
5475

    
5476

    
5477
class LURecreateInstanceDisks(LogicalUnit):
5478
  """Recreate an instance's missing disks.
5479

5480
  """
5481
  HPATH = "instance-recreate-disks"
5482
  HTYPE = constants.HTYPE_INSTANCE
5483
  _OP_PARAMS = [
5484
    _PInstanceName,
5485
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
5486
    ]
5487
  REQ_BGL = False
5488

    
5489
  def ExpandNames(self):
5490
    self._ExpandAndLockInstance()
5491

    
5492
  def BuildHooksEnv(self):
5493
    """Build hooks env.
5494

5495
    This runs on master, primary and secondary nodes of the instance.
5496

5497
    """
5498
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5499
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5500
    return env, nl, nl
5501

    
5502
  def CheckPrereq(self):
5503
    """Check prerequisites.
5504

5505
    This checks that the instance is in the cluster and is not running.
5506

5507
    """
5508
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5509
    assert instance is not None, \
5510
      "Cannot retrieve locked instance %s" % self.op.instance_name
5511
    _CheckNodeOnline(self, instance.primary_node)
5512

    
5513
    if instance.disk_template == constants.DT_DISKLESS:
5514
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5515
                                 self.op.instance_name, errors.ECODE_INVAL)
5516
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5517

    
5518
    if not self.op.disks:
5519
      self.op.disks = range(len(instance.disks))
5520
    else:
5521
      for idx in self.op.disks:
5522
        if idx >= len(instance.disks):
5523
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5524
                                     errors.ECODE_INVAL)
5525

    
5526
    self.instance = instance
5527

    
5528
  def Exec(self, feedback_fn):
5529
    """Recreate the disks.
5530

5531
    """
5532
    to_skip = []
5533
    for idx, _ in enumerate(self.instance.disks):
5534
      if idx not in self.op.disks: # disk idx has not been passed in
5535
        to_skip.append(idx)
5536
        continue
5537

    
5538
    _CreateDisks(self, self.instance, to_skip=to_skip)
5539

    
5540

    
5541
class LURenameInstance(LogicalUnit):
5542
  """Rename an instance.
5543

5544
  """
5545
  HPATH = "instance-rename"
5546
  HTYPE = constants.HTYPE_INSTANCE
5547
  _OP_PARAMS = [
5548
    _PInstanceName,
5549
    ("new_name", ht.NoDefault, ht.TNonEmptyString),
5550
    ("ip_check", False, ht.TBool),
5551
    ("name_check", True, ht.TBool),
5552
    ]
5553

    
5554
  def CheckArguments(self):
5555
    """Check arguments.
5556

5557
    """
5558
    if self.op.ip_check and not self.op.name_check:
5559
      # TODO: make the ip check more flexible and not depend on the name check
5560
      raise errors.OpPrereqError("Cannot do ip check without a name check",
5561
                                 errors.ECODE_INVAL)
5562

    
5563
  def BuildHooksEnv(self):
5564
    """Build hooks env.
5565

5566
    This runs on master, primary and secondary nodes of the instance.
5567

5568
    """
5569
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5570
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5571
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5572
    return env, nl, nl
5573

    
5574
  def CheckPrereq(self):
5575
    """Check prerequisites.
5576

5577
    This checks that the instance is in the cluster and is not running.
5578

5579
    """
5580
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5581
                                                self.op.instance_name)
5582
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5583
    assert instance is not None
5584
    _CheckNodeOnline(self, instance.primary_node)
5585
    _CheckInstanceDown(self, instance, "cannot rename")
5586
    self.instance = instance
5587

    
5588
    new_name = self.op.new_name
5589
    if self.op.name_check:
5590
      hostname = netutils.GetHostname(name=new_name)
5591
      new_name = self.op.new_name = hostname.name
5592
      if (self.op.ip_check and
5593
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5594
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5595
                                   (hostname.ip, new_name),
5596
                                   errors.ECODE_NOTUNIQUE)
5597

    
5598
    instance_list = self.cfg.GetInstanceList()
5599
    if new_name in instance_list and new_name != instance.name:
5600
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5601
                                 new_name, errors.ECODE_EXISTS)
5602

    
5603
  def Exec(self, feedback_fn):
5604
    """Reinstall the instance.
5605

5606
    """
5607
    inst = self.instance
5608
    old_name = inst.name
5609

    
5610
    rename_file_storage = False
5611
    if (inst.disk_template == constants.DT_FILE and
5612
        self.op.new_name != inst.name):
5613
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5614
      rename_file_storage = True
5615

    
5616
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5617
    # Change the instance lock. This is definitely safe while we hold the BGL
5618
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5619
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5620

    
5621
    # re-read the instance from the configuration after rename
5622
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5623

    
5624
    if rename_file_storage:
5625
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5626
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5627
                                                     old_file_storage_dir,
5628
                                                     new_file_storage_dir)
5629
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5630
                   " (but the instance has been renamed in Ganeti)" %
5631
                   (inst.primary_node, old_file_storage_dir,
5632
                    new_file_storage_dir))
5633

    
5634
    _StartInstanceDisks(self, inst, None)
5635
    try:
5636
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5637
                                                 old_name, self.op.debug_level)
5638
      msg = result.fail_msg
5639
      if msg:
5640
        msg = ("Could not run OS rename script for instance %s on node %s"
5641
               " (but the instance has been renamed in Ganeti): %s" %
5642
               (inst.name, inst.primary_node, msg))
5643
        self.proc.LogWarning(msg)
5644
    finally:
5645
      _ShutdownInstanceDisks(self, inst)
5646

    
5647
    return inst.name
5648

    
5649

    
5650
class LURemoveInstance(LogicalUnit):
5651
  """Remove an instance.
5652

5653
  """
5654
  HPATH = "instance-remove"
5655
  HTYPE = constants.HTYPE_INSTANCE
5656
  _OP_PARAMS = [
5657
    _PInstanceName,
5658
    ("ignore_failures", False, ht.TBool),
5659
    _PShutdownTimeout,
5660
    ]
5661
  REQ_BGL = False
5662

    
5663
  def ExpandNames(self):
5664
    self._ExpandAndLockInstance()
5665
    self.needed_locks[locking.LEVEL_NODE] = []
5666
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5667

    
5668
  def DeclareLocks(self, level):
5669
    if level == locking.LEVEL_NODE:
5670
      self._LockInstancesNodes()
5671

    
5672
  def BuildHooksEnv(self):
5673
    """Build hooks env.
5674

5675
    This runs on master, primary and secondary nodes of the instance.
5676

5677
    """
5678
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5679
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5680
    nl = [self.cfg.GetMasterNode()]
5681
    nl_post = list(self.instance.all_nodes) + nl
5682
    return env, nl, nl_post
5683

    
5684
  def CheckPrereq(self):
5685
    """Check prerequisites.
5686

5687
    This checks that the instance is in the cluster.
5688

5689
    """
5690
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5691
    assert self.instance is not None, \
5692
      "Cannot retrieve locked instance %s" % self.op.instance_name
5693

    
5694
  def Exec(self, feedback_fn):
5695
    """Remove the instance.
5696

5697
    """
5698
    instance = self.instance
5699
    logging.info("Shutting down instance %s on node %s",
5700
                 instance.name, instance.primary_node)
5701

    
5702
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5703
                                             self.op.shutdown_timeout)
5704
    msg = result.fail_msg
5705
    if msg:
5706
      if self.op.ignore_failures:
5707
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5708
      else:
5709
        raise errors.OpExecError("Could not shutdown instance %s on"
5710
                                 " node %s: %s" %
5711
                                 (instance.name, instance.primary_node, msg))
5712

    
5713
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5714

    
5715

    
5716
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5717
  """Utility function to remove an instance.
5718

5719
  """
5720
  logging.info("Removing block devices for instance %s", instance.name)
5721

    
5722
  if not _RemoveDisks(lu, instance):
5723
    if not ignore_failures:
5724
      raise errors.OpExecError("Can't remove instance's disks")
5725
    feedback_fn("Warning: can't remove instance's disks")
5726

    
5727
  logging.info("Removing instance %s out of cluster config", instance.name)
5728

    
5729
  lu.cfg.RemoveInstance(instance.name)
5730

    
5731
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5732
    "Instance lock removal conflict"
5733

    
5734
  # Remove lock for the instance
5735
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5736

    
5737

    
5738
class LUQueryInstances(NoHooksLU):
5739
  """Logical unit for querying instances.
5740

5741
  """
5742
  # pylint: disable-msg=W0142
5743
  _OP_PARAMS = [
5744
    _POutputFields,
5745
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5746
    ("use_locking", False, ht.TBool),
5747
    ]
5748
  REQ_BGL = False
5749

    
5750
  def CheckArguments(self):
5751
    self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5752
                             self.op.use_locking)
5753

    
5754
  def ExpandNames(self):
5755
    self.iq.ExpandNames(self)
5756

    
5757
  def DeclareLocks(self, level):
5758
    self.iq.DeclareLocks(self, level)
5759

    
5760
  def Exec(self, feedback_fn):
5761
    return self.iq.OldStyleQuery(self)
5762

    
5763

    
5764
class LUFailoverInstance(LogicalUnit):
5765
  """Failover an instance.
5766

5767
  """
5768
  HPATH = "instance-failover"
5769
  HTYPE = constants.HTYPE_INSTANCE
5770
  _OP_PARAMS = [
5771
    _PInstanceName,
5772
    ("ignore_consistency", False, ht.TBool),
5773
    _PShutdownTimeout,
5774
    ]
5775
  REQ_BGL = False
5776

    
5777
  def ExpandNames(self):
5778
    self._ExpandAndLockInstance()
5779
    self.needed_locks[locking.LEVEL_NODE] = []
5780
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5781

    
5782
  def DeclareLocks(self, level):
5783
    if level == locking.LEVEL_NODE:
5784
      self._LockInstancesNodes()
5785

    
5786
  def BuildHooksEnv(self):
5787
    """Build hooks env.
5788

5789
    This runs on master, primary and secondary nodes of the instance.
5790

5791
    """
5792
    instance = self.instance
5793
    source_node = instance.primary_node
5794
    target_node = instance.secondary_nodes[0]
5795
    env = {
5796
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5797
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5798
      "OLD_PRIMARY": source_node,
5799
      "OLD_SECONDARY": target_node,
5800
      "NEW_PRIMARY": target_node,
5801
      "NEW_SECONDARY": source_node,
5802
      }
5803
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5804
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5805
    nl_post = list(nl)
5806
    nl_post.append(source_node)
5807
    return env, nl, nl_post
5808

    
5809
  def CheckPrereq(self):
5810
    """Check prerequisites.
5811

5812
    This checks that the instance is in the cluster.
5813

5814
    """
5815
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5816
    assert self.instance is not None, \
5817
      "Cannot retrieve locked instance %s" % self.op.instance_name
5818

    
5819
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5820
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5821
      raise errors.OpPrereqError("Instance's disk layout is not"
5822
                                 " network mirrored, cannot failover.",
5823
                                 errors.ECODE_STATE)
5824

    
5825
    secondary_nodes = instance.secondary_nodes
5826
    if not secondary_nodes:
5827
      raise errors.ProgrammerError("no secondary node but using "
5828
                                   "a mirrored disk template")
5829

    
5830
    target_node = secondary_nodes[0]
5831
    _CheckNodeOnline(self, target_node)
5832
    _CheckNodeNotDrained(self, target_node)
5833
    if instance.admin_up:
5834
      # check memory requirements on the secondary node
5835
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5836
                           instance.name, bep[constants.BE_MEMORY],
5837
                           instance.hypervisor)
5838
    else:
5839
      self.LogInfo("Not checking memory on the secondary node as"
5840
                   " instance will not be started")
5841

    
5842
    # check bridge existance
5843
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5844

    
5845
  def Exec(self, feedback_fn):
5846
    """Failover an instance.
5847

5848
    The failover is done by shutting it down on its present node and
5849
    starting it on the secondary.
5850

5851
    """
5852
    instance = self.instance
5853
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5854

    
5855
    source_node = instance.primary_node
5856
    target_node = instance.secondary_nodes[0]
5857

    
5858
    if instance.admin_up:
5859
      feedback_fn("* checking disk consistency between source and target")
5860
      for dev in instance.disks:
5861
        # for drbd, these are drbd over lvm
5862
        if not _CheckDiskConsistency(self, dev, target_node, False):
5863
          if not self.op.ignore_consistency:
5864
            raise errors.OpExecError("Disk %s is degraded on target node,"
5865
                                     " aborting failover." % dev.iv_name)
5866
    else:
5867
      feedback_fn("* not checking disk consistency as instance is not running")
5868

    
5869
    feedback_fn("* shutting down instance on source node")
5870
    logging.info("Shutting down instance %s on node %s",
5871
                 instance.name, source_node)
5872

    
5873
    result = self.rpc.call_instance_shutdown(source_node, instance,
5874
                                             self.op.shutdown_timeout)
5875
    msg = result.fail_msg
5876
    if msg:
5877
      if self.op.ignore_consistency or primary_node.offline:
5878
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5879
                             " Proceeding anyway. Please make sure node"
5880
                             " %s is down. Error details: %s",
5881
                             instance.name, source_node, source_node, msg)
5882
      else:
5883
        raise errors.OpExecError("Could not shutdown instance %s on"
5884
                                 " node %s: %s" %
5885
                                 (instance.name, source_node, msg))
5886

    
5887
    feedback_fn("* deactivating the instance's disks on source node")
5888
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5889
      raise errors.OpExecError("Can't shut down the instance's disks.")
5890

    
5891
    instance.primary_node = target_node
5892
    # distribute new instance config to the other nodes
5893
    self.cfg.Update(instance, feedback_fn)
5894

    
5895
    # Only start the instance if it's marked as up
5896
    if instance.admin_up:
5897
      feedback_fn("* activating the instance's disks on target node")
5898
      logging.info("Starting instance %s on node %s",
5899
                   instance.name, target_node)
5900

    
5901
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5902
                                           ignore_secondaries=True)
5903
      if not disks_ok:
5904
        _ShutdownInstanceDisks(self, instance)
5905
        raise errors.OpExecError("Can't activate the instance's disks")
5906

    
5907
      feedback_fn("* starting the instance on the target node")
5908
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5909
      msg = result.fail_msg
5910
      if msg:
5911
        _ShutdownInstanceDisks(self, instance)
5912
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5913
                                 (instance.name, target_node, msg))
5914

    
5915

    
5916
class LUMigrateInstance(LogicalUnit):
5917
  """Migrate an instance.
5918

5919
  This is migration without shutting down, compared to the failover,
5920
  which is done with shutdown.
5921

5922
  """
5923
  HPATH = "instance-migrate"
5924
  HTYPE = constants.HTYPE_INSTANCE
5925
  _OP_PARAMS = [
5926
    _PInstanceName,
5927
    _PMigrationMode,
5928
    _PMigrationLive,
5929
    ("cleanup", False, ht.TBool),
5930
    ]
5931

    
5932
  REQ_BGL = False
5933

    
5934
  def ExpandNames(self):
5935
    self._ExpandAndLockInstance()
5936

    
5937
    self.needed_locks[locking.LEVEL_NODE] = []
5938
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5939

    
5940
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5941
                                       self.op.cleanup)
5942
    self.tasklets = [self._migrater]
5943

    
5944
  def DeclareLocks(self, level):
5945
    if level == locking.LEVEL_NODE:
5946
      self._LockInstancesNodes()
5947

    
5948
  def BuildHooksEnv(self):
5949
    """Build hooks env.
5950

5951
    This runs on master, primary and secondary nodes of the instance.
5952

5953
    """
5954
    instance = self._migrater.instance
5955
    source_node = instance.primary_node
5956
    target_node = instance.secondary_nodes[0]
5957
    env = _BuildInstanceHookEnvByObject(self, instance)
5958
    env["MIGRATE_LIVE"] = self._migrater.live
5959
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5960
    env.update({
5961
        "OLD_PRIMARY": source_node,
5962
        "OLD_SECONDARY": target_node,
5963
        "NEW_PRIMARY": target_node,
5964
        "NEW_SECONDARY": source_node,
5965
        })
5966
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5967
    nl_post = list(nl)
5968
    nl_post.append(source_node)
5969
    return env, nl, nl_post
5970

    
5971

    
5972
class LUMoveInstance(LogicalUnit):
5973
  """Move an instance by data-copying.
5974

5975
  """
5976
  HPATH = "instance-move"
5977
  HTYPE = constants.HTYPE_INSTANCE
5978
  _OP_PARAMS = [
5979
    _PInstanceName,
5980
    ("target_node", ht.NoDefault, ht.TNonEmptyString),
5981
    _PShutdownTimeout,
5982
    ]
5983
  REQ_BGL = False
5984

    
5985
  def ExpandNames(self):
5986
    self._ExpandAndLockInstance()
5987
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5988
    self.op.target_node = target_node
5989
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5990
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5991

    
5992
  def DeclareLocks(self, level):
5993
    if level == locking.LEVEL_NODE:
5994
      self._LockInstancesNodes(primary_only=True)
5995

    
5996
  def BuildHooksEnv(self):
5997
    """Build hooks env.
5998

5999
    This runs on master, primary and secondary nodes of the instance.
6000

6001
    """
6002
    env = {
6003
      "TARGET_NODE": self.op.target_node,
6004
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6005
      }
6006
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6007
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
6008
                                       self.op.target_node]
6009
    return env, nl, nl
6010

    
6011
  def CheckPrereq(self):
6012
    """Check prerequisites.
6013

6014
    This checks that the instance is in the cluster.
6015

6016
    """
6017
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6018
    assert self.instance is not None, \
6019
      "Cannot retrieve locked instance %s" % self.op.instance_name
6020

    
6021
    node = self.cfg.GetNodeInfo(self.op.target_node)
6022
    assert node is not None, \
6023
      "Cannot retrieve locked node %s" % self.op.target_node
6024

    
6025
    self.target_node = target_node = node.name
6026

    
6027
    if target_node == instance.primary_node:
6028
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6029
                                 (instance.name, target_node),
6030
                                 errors.ECODE_STATE)
6031

    
6032
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6033

    
6034
    for idx, dsk in enumerate(instance.disks):
6035
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6036
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6037
                                   " cannot copy" % idx, errors.ECODE_STATE)
6038

    
6039
    _CheckNodeOnline(self, target_node)
6040
    _CheckNodeNotDrained(self, target_node)
6041
    _CheckNodeVmCapable(self, target_node)
6042

    
6043
    if instance.admin_up:
6044
      # check memory requirements on the secondary node
6045
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6046
                           instance.name, bep[constants.BE_MEMORY],
6047
                           instance.hypervisor)
6048
    else:
6049
      self.LogInfo("Not checking memory on the secondary node as"
6050
                   " instance will not be started")
6051

    
6052
    # check bridge existance
6053
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6054

    
6055
  def Exec(self, feedback_fn):
6056
    """Move an instance.
6057

6058
    The move is done by shutting it down on its present node, copying
6059
    the data over (slow) and starting it on the new node.
6060

6061
    """
6062
    instance = self.instance
6063

    
6064
    source_node = instance.primary_node
6065
    target_node = self.target_node
6066

    
6067
    self.LogInfo("Shutting down instance %s on source node %s",
6068
                 instance.name, source_node)
6069

    
6070
    result = self.rpc.call_instance_shutdown(source_node, instance,
6071
                                             self.op.shutdown_timeout)
6072
    msg = result.fail_msg
6073
    if msg:
6074
      if self.op.ignore_consistency:
6075
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6076
                             " Proceeding anyway. Please make sure node"
6077
                             " %s is down. Error details: %s",
6078
                             instance.name, source_node, source_node, msg)
6079
      else:
6080
        raise errors.OpExecError("Could not shutdown instance %s on"
6081
                                 " node %s: %s" %
6082
                                 (instance.name, source_node, msg))
6083

    
6084
    # create the target disks
6085
    try:
6086
      _CreateDisks(self, instance, target_node=target_node)
6087
    except errors.OpExecError:
6088
      self.LogWarning("Device creation failed, reverting...")
6089
      try:
6090
        _RemoveDisks(self, instance, target_node=target_node)
6091
      finally:
6092
        self.cfg.ReleaseDRBDMinors(instance.name)
6093
        raise
6094

    
6095
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6096

    
6097
    errs = []
6098
    # activate, get path, copy the data over
6099
    for idx, disk in enumerate(instance.disks):
6100
      self.LogInfo("Copying data for disk %d", idx)
6101
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6102
                                               instance.name, True)
6103
      if result.fail_msg:
6104
        self.LogWarning("Can't assemble newly created disk %d: %s",
6105
                        idx, result.fail_msg)
6106
        errs.append(result.fail_msg)
6107
        break
6108
      dev_path = result.payload
6109
      result = self.rpc.call_blockdev_export(source_node, disk,
6110
                                             target_node, dev_path,
6111
                                             cluster_name)
6112
      if result.fail_msg:
6113
        self.LogWarning("Can't copy data over for disk %d: %s",
6114
                        idx, result.fail_msg)
6115
        errs.append(result.fail_msg)
6116
        break
6117

    
6118
    if errs:
6119
      self.LogWarning("Some disks failed to copy, aborting")
6120
      try:
6121
        _RemoveDisks(self, instance, target_node=target_node)
6122
      finally:
6123
        self.cfg.ReleaseDRBDMinors(instance.name)
6124
        raise errors.OpExecError("Errors during disk copy: %s" %
6125
                                 (",".join(errs),))
6126

    
6127
    instance.primary_node = target_node
6128
    self.cfg.Update(instance, feedback_fn)
6129

    
6130
    self.LogInfo("Removing the disks on the original node")
6131
    _RemoveDisks(self, instance, target_node=source_node)
6132

    
6133
    # Only start the instance if it's marked as up
6134
    if instance.admin_up:
6135
      self.LogInfo("Starting instance %s on node %s",
6136
                   instance.name, target_node)
6137

    
6138
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6139
                                           ignore_secondaries=True)
6140
      if not disks_ok:
6141
        _ShutdownInstanceDisks(self, instance)
6142
        raise errors.OpExecError("Can't activate the instance's disks")
6143

    
6144
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6145
      msg = result.fail_msg
6146
      if msg:
6147
        _ShutdownInstanceDisks(self, instance)
6148
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6149
                                 (instance.name, target_node, msg))
6150

    
6151

    
6152
class LUMigrateNode(LogicalUnit):
6153
  """Migrate all instances from a node.
6154

6155
  """
6156
  HPATH = "node-migrate"
6157
  HTYPE = constants.HTYPE_NODE
6158
  _OP_PARAMS = [
6159
    _PNodeName,
6160
    _PMigrationMode,
6161
    _PMigrationLive,
6162
    ]
6163
  REQ_BGL = False
6164

    
6165
  def ExpandNames(self):
6166
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6167

    
6168
    self.needed_locks = {
6169
      locking.LEVEL_NODE: [self.op.node_name],
6170
      }
6171

    
6172
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6173

    
6174
    # Create tasklets for migrating instances for all instances on this node
6175
    names = []
6176
    tasklets = []
6177

    
6178
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6179
      logging.debug("Migrating instance %s", inst.name)
6180
      names.append(inst.name)
6181

    
6182
      tasklets.append(TLMigrateInstance(self, inst.name, False))
6183

    
6184
    self.tasklets = tasklets
6185

    
6186
    # Declare instance locks
6187
    self.needed_locks[locking.LEVEL_INSTANCE] = names
6188

    
6189
  def DeclareLocks(self, level):
6190
    if level == locking.LEVEL_NODE:
6191
      self._LockInstancesNodes()
6192

    
6193
  def BuildHooksEnv(self):
6194
    """Build hooks env.
6195

6196
    This runs on the master, the primary and all the secondaries.
6197

6198
    """
6199
    env = {
6200
      "NODE_NAME": self.op.node_name,
6201
      }
6202

    
6203
    nl = [self.cfg.GetMasterNode()]
6204

    
6205
    return (env, nl, nl)
6206

    
6207

    
6208
class TLMigrateInstance(Tasklet):
6209
  """Tasklet class for instance migration.
6210

6211
  @type live: boolean
6212
  @ivar live: whether the migration will be done live or non-live;
6213
      this variable is initalized only after CheckPrereq has run
6214

6215
  """
6216
  def __init__(self, lu, instance_name, cleanup):
6217
    """Initializes this class.
6218

6219
    """
6220
    Tasklet.__init__(self, lu)
6221

    
6222
    # Parameters
6223
    self.instance_name = instance_name
6224
    self.cleanup = cleanup
6225
    self.live = False # will be overridden later
6226

    
6227
  def CheckPrereq(self):
6228
    """Check prerequisites.
6229

6230
    This checks that the instance is in the cluster.
6231

6232
    """
6233
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6234
    instance = self.cfg.GetInstanceInfo(instance_name)
6235
    assert instance is not None
6236

    
6237
    if instance.disk_template != constants.DT_DRBD8:
6238
      raise errors.OpPrereqError("Instance's disk layout is not"
6239
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
6240

    
6241
    secondary_nodes = instance.secondary_nodes
6242
    if not secondary_nodes:
6243
      raise errors.ConfigurationError("No secondary node but using"
6244
                                      " drbd8 disk template")
6245

    
6246
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6247

    
6248
    target_node = secondary_nodes[0]
6249
    # check memory requirements on the secondary node
6250
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6251
                         instance.name, i_be[constants.BE_MEMORY],
6252
                         instance.hypervisor)
6253

    
6254
    # check bridge existance
6255
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6256

    
6257
    if not self.cleanup:
6258
      _CheckNodeNotDrained(self.lu, target_node)
6259
      result = self.rpc.call_instance_migratable(instance.primary_node,
6260
                                                 instance)
6261
      result.Raise("Can't migrate, please use failover",
6262
                   prereq=True, ecode=errors.ECODE_STATE)
6263

    
6264
    self.instance = instance
6265

    
6266
    if self.lu.op.live is not None and self.lu.op.mode is not None:
6267
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6268
                                 " parameters are accepted",
6269
                                 errors.ECODE_INVAL)
6270
    if self.lu.op.live is not None:
6271
      if self.lu.op.live:
6272
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
6273
      else:
6274
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6275
      # reset the 'live' parameter to None so that repeated
6276
      # invocations of CheckPrereq do not raise an exception
6277
      self.lu.op.live = None
6278
    elif self.lu.op.mode is None:
6279
      # read the default value from the hypervisor
6280
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6281
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6282

    
6283
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6284

    
6285
  def _WaitUntilSync(self):
6286
    """Poll with custom rpc for disk sync.
6287

6288
    This uses our own step-based rpc call.
6289

6290
    """
6291
    self.feedback_fn("* wait until resync is done")
6292
    all_done = False
6293
    while not all_done:
6294
      all_done = True
6295
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6296
                                            self.nodes_ip,
6297
                                            self.instance.disks)
6298
      min_percent = 100
6299
      for node, nres in result.items():
6300
        nres.Raise("Cannot resync disks on node %s" % node)
6301
        node_done, node_percent = nres.payload
6302
        all_done = all_done and node_done
6303
        if node_percent is not None:
6304
          min_percent = min(min_percent, node_percent)
6305
      if not all_done:
6306
        if min_percent < 100:
6307
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6308
        time.sleep(2)
6309

    
6310
  def _EnsureSecondary(self, node):
6311
    """Demote a node to secondary.
6312

6313
    """
6314
    self.feedback_fn("* switching node %s to secondary mode" % node)
6315

    
6316
    for dev in self.instance.disks:
6317
      self.cfg.SetDiskID(dev, node)
6318

    
6319
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6320
                                          self.instance.disks)
6321
    result.Raise("Cannot change disk to secondary on node %s" % node)
6322

    
6323
  def _GoStandalone(self):
6324
    """Disconnect from the network.
6325

6326
    """
6327
    self.feedback_fn("* changing into standalone mode")
6328
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6329
                                               self.instance.disks)
6330
    for node, nres in result.items():
6331
      nres.Raise("Cannot disconnect disks node %s" % node)
6332

    
6333
  def _GoReconnect(self, multimaster):
6334
    """Reconnect to the network.
6335

6336
    """
6337
    if multimaster:
6338
      msg = "dual-master"
6339
    else:
6340
      msg = "single-master"
6341
    self.feedback_fn("* changing disks into %s mode" % msg)
6342
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6343
                                           self.instance.disks,
6344
                                           self.instance.name, multimaster)
6345
    for node, nres in result.items():
6346
      nres.Raise("Cannot change disks config on node %s" % node)
6347

    
6348
  def _ExecCleanup(self):
6349
    """Try to cleanup after a failed migration.
6350

6351
    The cleanup is done by:
6352
      - check that the instance is running only on one node
6353
        (and update the config if needed)
6354
      - change disks on its secondary node to secondary
6355
      - wait until disks are fully synchronized
6356
      - disconnect from the network
6357
      - change disks into single-master mode
6358
      - wait again until disks are fully synchronized
6359

6360
    """
6361
    instance = self.instance
6362
    target_node = self.target_node
6363
    source_node = self.source_node
6364

    
6365
    # check running on only one node
6366
    self.feedback_fn("* checking where the instance actually runs"
6367
                     " (if this hangs, the hypervisor might be in"
6368
                     " a bad state)")
6369
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6370
    for node, result in ins_l.items():
6371
      result.Raise("Can't contact node %s" % node)
6372

    
6373
    runningon_source = instance.name in ins_l[source_node].payload
6374
    runningon_target = instance.name in ins_l[target_node].payload
6375

    
6376
    if runningon_source and runningon_target:
6377
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6378
                               " or the hypervisor is confused. You will have"
6379
                               " to ensure manually that it runs only on one"
6380
                               " and restart this operation.")
6381

    
6382
    if not (runningon_source or runningon_target):
6383
      raise errors.OpExecError("Instance does not seem to be running at all."
6384
                               " In this case, it's safer to repair by"
6385
                               " running 'gnt-instance stop' to ensure disk"
6386
                               " shutdown, and then restarting it.")
6387

    
6388
    if runningon_target:
6389
      # the migration has actually succeeded, we need to update the config
6390
      self.feedback_fn("* instance running on secondary node (%s),"
6391
                       " updating config" % target_node)
6392
      instance.primary_node = target_node
6393
      self.cfg.Update(instance, self.feedback_fn)
6394
      demoted_node = source_node
6395
    else:
6396
      self.feedback_fn("* instance confirmed to be running on its"
6397
                       " primary node (%s)" % source_node)
6398
      demoted_node = target_node
6399

    
6400
    self._EnsureSecondary(demoted_node)
6401
    try:
6402
      self._WaitUntilSync()
6403
    except errors.OpExecError:
6404
      # we ignore here errors, since if the device is standalone, it
6405
      # won't be able to sync
6406
      pass
6407
    self._GoStandalone()
6408
    self._GoReconnect(False)
6409
    self._WaitUntilSync()
6410

    
6411
    self.feedback_fn("* done")
6412

    
6413
  def _RevertDiskStatus(self):
6414
    """Try to revert the disk status after a failed migration.
6415

6416
    """
6417
    target_node = self.target_node
6418
    try:
6419
      self._EnsureSecondary(target_node)
6420
      self._GoStandalone()
6421
      self._GoReconnect(False)
6422
      self._WaitUntilSync()
6423
    except errors.OpExecError, err:
6424
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6425
                         " drives: error '%s'\n"
6426
                         "Please look and recover the instance status" %
6427
                         str(err))
6428

    
6429
  def _AbortMigration(self):
6430
    """Call the hypervisor code to abort a started migration.
6431

6432
    """
6433
    instance = self.instance
6434
    target_node = self.target_node
6435
    migration_info = self.migration_info
6436

    
6437
    abort_result = self.rpc.call_finalize_migration(target_node,
6438
                                                    instance,
6439
                                                    migration_info,
6440
                                                    False)
6441
    abort_msg = abort_result.fail_msg
6442
    if abort_msg:
6443
      logging.error("Aborting migration failed on target node %s: %s",
6444
                    target_node, abort_msg)
6445
      # Don't raise an exception here, as we stil have to try to revert the
6446
      # disk status, even if this step failed.
6447

    
6448
  def _ExecMigration(self):
6449
    """Migrate an instance.
6450

6451
    The migrate is done by:
6452
      - change the disks into dual-master mode
6453
      - wait until disks are fully synchronized again
6454
      - migrate the instance
6455
      - change disks on the new secondary node (the old primary) to secondary
6456
      - wait until disks are fully synchronized
6457
      - change disks into single-master mode
6458

6459
    """
6460
    instance = self.instance
6461
    target_node = self.target_node
6462
    source_node = self.source_node
6463

    
6464
    self.feedback_fn("* checking disk consistency between source and target")
6465
    for dev in instance.disks:
6466
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6467
        raise errors.OpExecError("Disk %s is degraded or not fully"
6468
                                 " synchronized on target node,"
6469
                                 " aborting migrate." % dev.iv_name)
6470

    
6471
    # First get the migration information from the remote node
6472
    result = self.rpc.call_migration_info(source_node, instance)
6473
    msg = result.fail_msg
6474
    if msg:
6475
      log_err = ("Failed fetching source migration information from %s: %s" %
6476
                 (source_node, msg))
6477
      logging.error(log_err)
6478
      raise errors.OpExecError(log_err)
6479

    
6480
    self.migration_info = migration_info = result.payload
6481

    
6482
    # Then switch the disks to master/master mode
6483
    self._EnsureSecondary(target_node)
6484
    self._GoStandalone()
6485
    self._GoReconnect(True)
6486
    self._WaitUntilSync()
6487

    
6488
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6489
    result = self.rpc.call_accept_instance(target_node,
6490
                                           instance,
6491
                                           migration_info,
6492
                                           self.nodes_ip[target_node])
6493

    
6494
    msg = result.fail_msg
6495
    if msg:
6496
      logging.error("Instance pre-migration failed, trying to revert"
6497
                    " disk status: %s", msg)
6498
      self.feedback_fn("Pre-migration failed, aborting")
6499
      self._AbortMigration()
6500
      self._RevertDiskStatus()
6501
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6502
                               (instance.name, msg))
6503

    
6504
    self.feedback_fn("* migrating instance to %s" % target_node)
6505
    time.sleep(10)
6506
    result = self.rpc.call_instance_migrate(source_node, instance,
6507
                                            self.nodes_ip[target_node],
6508
                                            self.live)
6509
    msg = result.fail_msg
6510
    if msg:
6511
      logging.error("Instance migration failed, trying to revert"
6512
                    " disk status: %s", msg)
6513
      self.feedback_fn("Migration failed, aborting")
6514
      self._AbortMigration()
6515
      self._RevertDiskStatus()
6516
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6517
                               (instance.name, msg))
6518
    time.sleep(10)
6519

    
6520
    instance.primary_node = target_node
6521
    # distribute new instance config to the other nodes
6522
    self.cfg.Update(instance, self.feedback_fn)
6523

    
6524
    result = self.rpc.call_finalize_migration(target_node,
6525
                                              instance,
6526
                                              migration_info,
6527
                                              True)
6528
    msg = result.fail_msg
6529
    if msg:
6530
      logging.error("Instance migration succeeded, but finalization failed:"
6531
                    " %s", msg)
6532
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6533
                               msg)
6534

    
6535
    self._EnsureSecondary(source_node)
6536
    self._WaitUntilSync()
6537
    self._GoStandalone()
6538
    self._GoReconnect(False)
6539
    self._WaitUntilSync()
6540

    
6541
    self.feedback_fn("* done")
6542

    
6543
  def Exec(self, feedback_fn):
6544
    """Perform the migration.
6545

6546
    """
6547
    feedback_fn("Migrating instance %s" % self.instance.name)
6548

    
6549
    self.feedback_fn = feedback_fn
6550

    
6551
    self.source_node = self.instance.primary_node
6552
    self.target_node = self.instance.secondary_nodes[0]
6553
    self.all_nodes = [self.source_node, self.target_node]
6554
    self.nodes_ip = {
6555
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6556
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6557
      }
6558

    
6559
    if self.cleanup:
6560
      return self._ExecCleanup()
6561
    else:
6562
      return self._ExecMigration()
6563

    
6564

    
6565
def _CreateBlockDev(lu, node, instance, device, force_create,
6566
                    info, force_open):
6567
  """Create a tree of block devices on a given node.
6568

6569
  If this device type has to be created on secondaries, create it and
6570
  all its children.
6571

6572
  If not, just recurse to children keeping the same 'force' value.
6573

6574
  @param lu: the lu on whose behalf we execute
6575
  @param node: the node on which to create the device
6576
  @type instance: L{objects.Instance}
6577
  @param instance: the instance which owns the device
6578
  @type device: L{objects.Disk}
6579
  @param device: the device to create
6580
  @type force_create: boolean
6581
  @param force_create: whether to force creation of this device; this
6582
      will be change to True whenever we find a device which has
6583
      CreateOnSecondary() attribute
6584
  @param info: the extra 'metadata' we should attach to the device
6585
      (this will be represented as a LVM tag)
6586
  @type force_open: boolean
6587
  @param force_open: this parameter will be passes to the
6588
      L{backend.BlockdevCreate} function where it specifies
6589
      whether we run on primary or not, and it affects both
6590
      the child assembly and the device own Open() execution
6591

6592
  """
6593
  if device.CreateOnSecondary():
6594
    force_create = True
6595

    
6596
  if device.children:
6597
    for child in device.children:
6598
      _CreateBlockDev(lu, node, instance, child, force_create,
6599
                      info, force_open)
6600

    
6601
  if not force_create:
6602
    return
6603

    
6604
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6605

    
6606

    
6607
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6608
  """Create a single block device on a given node.
6609

6610
  This will not recurse over children of the device, so they must be
6611
  created in advance.
6612

6613
  @param lu: the lu on whose behalf we execute
6614
  @param node: the node on which to create the device
6615
  @type instance: L{objects.Instance}
6616
  @param instance: the instance which owns the device
6617
  @type device: L{objects.Disk}
6618
  @param device: the device to create
6619
  @param info: the extra 'metadata' we should attach to the device
6620
      (this will be represented as a LVM tag)
6621
  @type force_open: boolean
6622
  @param force_open: this parameter will be passes to the
6623
      L{backend.BlockdevCreate} function where it specifies
6624
      whether we run on primary or not, and it affects both
6625
      the child assembly and the device own Open() execution
6626

6627
  """
6628
  lu.cfg.SetDiskID(device, node)
6629
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6630
                                       instance.name, force_open, info)
6631
  result.Raise("Can't create block device %s on"
6632
               " node %s for instance %s" % (device, node, instance.name))
6633
  if device.physical_id is None:
6634
    device.physical_id = result.payload
6635

    
6636

    
6637
def _GenerateUniqueNames(lu, exts):
6638
  """Generate a suitable LV name.
6639

6640
  This will generate a logical volume name for the given instance.
6641

6642
  """
6643
  results = []
6644
  for val in exts:
6645
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6646
    results.append("%s%s" % (new_id, val))
6647
  return results
6648

    
6649

    
6650
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6651
                         p_minor, s_minor):
6652
  """Generate a drbd8 device complete with its children.
6653

6654
  """
6655
  port = lu.cfg.AllocatePort()
6656
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6657
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6658
                          logical_id=(vgname, names[0]))
6659
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6660
                          logical_id=(vgname, names[1]))
6661
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6662
                          logical_id=(primary, secondary, port,
6663
                                      p_minor, s_minor,
6664
                                      shared_secret),
6665
                          children=[dev_data, dev_meta],
6666
                          iv_name=iv_name)
6667
  return drbd_dev
6668

    
6669

    
6670
def _GenerateDiskTemplate(lu, template_name,
6671
                          instance_name, primary_node,
6672
                          secondary_nodes, disk_info,
6673
                          file_storage_dir, file_driver,
6674
                          base_index, feedback_fn):
6675
  """Generate the entire disk layout for a given template type.
6676

6677
  """
6678
  #TODO: compute space requirements
6679

    
6680
  vgname = lu.cfg.GetVGName()
6681
  disk_count = len(disk_info)
6682
  disks = []
6683
  if template_name == constants.DT_DISKLESS:
6684
    pass
6685
  elif template_name == constants.DT_PLAIN:
6686
    if len(secondary_nodes) != 0:
6687
      raise errors.ProgrammerError("Wrong template configuration")
6688

    
6689
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6690
                                      for i in range(disk_count)])
6691
    for idx, disk in enumerate(disk_info):
6692
      disk_index = idx + base_index
6693
      vg = disk.get("vg", vgname)
6694
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6695
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6696
                              logical_id=(vg, names[idx]),
6697
                              iv_name="disk/%d" % disk_index,
6698
                              mode=disk["mode"])
6699
      disks.append(disk_dev)
6700
  elif template_name == constants.DT_DRBD8:
6701
    if len(secondary_nodes) != 1:
6702
      raise errors.ProgrammerError("Wrong template configuration")
6703
    remote_node = secondary_nodes[0]
6704
    minors = lu.cfg.AllocateDRBDMinor(
6705
      [primary_node, remote_node] * len(disk_info), instance_name)
6706

    
6707
    names = []
6708
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6709
                                               for i in range(disk_count)]):
6710
      names.append(lv_prefix + "_data")
6711
      names.append(lv_prefix + "_meta")
6712
    for idx, disk in enumerate(disk_info):
6713
      disk_index = idx + base_index
6714
      vg = disk.get("vg", vgname)
6715
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6716
                                      disk["size"], vg, names[idx*2:idx*2+2],
6717
                                      "disk/%d" % disk_index,
6718
                                      minors[idx*2], minors[idx*2+1])
6719
      disk_dev.mode = disk["mode"]
6720
      disks.append(disk_dev)
6721
  elif template_name == constants.DT_FILE:
6722
    if len(secondary_nodes) != 0:
6723
      raise errors.ProgrammerError("Wrong template configuration")
6724

    
6725
    _RequireFileStorage()
6726

    
6727
    for idx, disk in enumerate(disk_info):
6728
      disk_index = idx + base_index
6729
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6730
                              iv_name="disk/%d" % disk_index,
6731
                              logical_id=(file_driver,
6732
                                          "%s/disk%d" % (file_storage_dir,
6733
                                                         disk_index)),
6734
                              mode=disk["mode"])
6735
      disks.append(disk_dev)
6736
  else:
6737
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6738
  return disks
6739

    
6740

    
6741
def _GetInstanceInfoText(instance):
6742
  """Compute that text that should be added to the disk's metadata.
6743

6744
  """
6745
  return "originstname+%s" % instance.name
6746

    
6747

    
6748
def _CalcEta(time_taken, written, total_size):
6749
  """Calculates the ETA based on size written and total size.
6750

6751
  @param time_taken: The time taken so far
6752
  @param written: amount written so far
6753
  @param total_size: The total size of data to be written
6754
  @return: The remaining time in seconds
6755

6756
  """
6757
  avg_time = time_taken / float(written)
6758
  return (total_size - written) * avg_time
6759

    
6760

    
6761
def _WipeDisks(lu, instance):
6762
  """Wipes instance disks.
6763

6764
  @type lu: L{LogicalUnit}
6765
  @param lu: the logical unit on whose behalf we execute
6766
  @type instance: L{objects.Instance}
6767
  @param instance: the instance whose disks we should create
6768
  @return: the success of the wipe
6769

6770
  """
6771
  node = instance.primary_node
6772
  for idx, device in enumerate(instance.disks):
6773
    lu.LogInfo("* Wiping disk %d", idx)
6774
    logging.info("Wiping disk %d for instance %s", idx, instance.name)
6775

    
6776
    # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6777
    # MAX_WIPE_CHUNK at max
6778
    wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6779
                          constants.MIN_WIPE_CHUNK_PERCENT)
6780

    
6781
    offset = 0
6782
    size = device.size
6783
    last_output = 0
6784
    start_time = time.time()
6785

    
6786
    while offset < size:
6787
      wipe_size = min(wipe_chunk_size, size - offset)
6788
      result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6789
      result.Raise("Could not wipe disk %d at offset %d for size %d" %
6790
                   (idx, offset, wipe_size))
6791
      now = time.time()
6792
      offset += wipe_size
6793
      if now - last_output >= 60:
6794
        eta = _CalcEta(now - start_time, offset, size)
6795
        lu.LogInfo(" - done: %.1f%% ETA: %s" %
6796
                   (offset / float(size) * 100, utils.FormatSeconds(eta)))
6797
        last_output = now
6798

    
6799

    
6800
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6801
  """Create all disks for an instance.
6802

6803
  This abstracts away some work from AddInstance.
6804

6805
  @type lu: L{LogicalUnit}
6806
  @param lu: the logical unit on whose behalf we execute
6807
  @type instance: L{objects.Instance}
6808
  @param instance: the instance whose disks we should create
6809
  @type to_skip: list
6810
  @param to_skip: list of indices to skip
6811
  @type target_node: string
6812
  @param target_node: if passed, overrides the target node for creation
6813
  @rtype: boolean
6814
  @return: the success of the creation
6815

6816
  """
6817
  info = _GetInstanceInfoText(instance)
6818
  if target_node is None:
6819
    pnode = instance.primary_node
6820
    all_nodes = instance.all_nodes
6821
  else:
6822
    pnode = target_node
6823
    all_nodes = [pnode]
6824

    
6825
  if instance.disk_template == constants.DT_FILE:
6826
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6827
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6828

    
6829
    result.Raise("Failed to create directory '%s' on"
6830
                 " node %s" % (file_storage_dir, pnode))
6831

    
6832
  # Note: this needs to be kept in sync with adding of disks in
6833
  # LUSetInstanceParams
6834
  for idx, device in enumerate(instance.disks):
6835
    if to_skip and idx in to_skip:
6836
      continue
6837
    logging.info("Creating volume %s for instance %s",
6838
                 device.iv_name, instance.name)
6839
    #HARDCODE
6840
    for node in all_nodes:
6841
      f_create = node == pnode
6842
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6843

    
6844

    
6845
def _RemoveDisks(lu, instance, target_node=None):
6846
  """Remove all disks for an instance.
6847

6848
  This abstracts away some work from `AddInstance()` and
6849
  `RemoveInstance()`. Note that in case some of the devices couldn't
6850
  be removed, the removal will continue with the other ones (compare
6851
  with `_CreateDisks()`).
6852

6853
  @type lu: L{LogicalUnit}
6854
  @param lu: the logical unit on whose behalf we execute
6855
  @type instance: L{objects.Instance}
6856
  @param instance: the instance whose disks we should remove
6857
  @type target_node: string
6858
  @param target_node: used to override the node on which to remove the disks
6859
  @rtype: boolean
6860
  @return: the success of the removal
6861

6862
  """
6863
  logging.info("Removing block devices for instance %s", instance.name)
6864

    
6865
  all_result = True
6866
  for device in instance.disks:
6867
    if target_node:
6868
      edata = [(target_node, device)]
6869
    else:
6870
      edata = device.ComputeNodeTree(instance.primary_node)
6871
    for node, disk in edata:
6872
      lu.cfg.SetDiskID(disk, node)
6873
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6874
      if msg:
6875
        lu.LogWarning("Could not remove block device %s on node %s,"
6876
                      " continuing anyway: %s", device.iv_name, node, msg)
6877
        all_result = False
6878

    
6879
  if instance.disk_template == constants.DT_FILE:
6880
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6881
    if target_node:
6882
      tgt = target_node
6883
    else:
6884
      tgt = instance.primary_node
6885
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6886
    if result.fail_msg:
6887
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6888
                    file_storage_dir, instance.primary_node, result.fail_msg)
6889
      all_result = False
6890

    
6891
  return all_result
6892

    
6893

    
6894
def _ComputeDiskSizePerVG(disk_template, disks):
6895
  """Compute disk size requirements in the volume group
6896

6897
  """
6898
  def _compute(disks, payload):
6899
    """Universal algorithm
6900

6901
    """
6902
    vgs = {}
6903
    for disk in disks:
6904
      vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6905

    
6906
    return vgs
6907

    
6908
  # Required free disk space as a function of disk and swap space
6909
  req_size_dict = {
6910
    constants.DT_DISKLESS: None,
6911
    constants.DT_PLAIN: _compute(disks, 0),
6912
    # 128 MB are added for drbd metadata for each disk
6913
    constants.DT_DRBD8: _compute(disks, 128),
6914
    constants.DT_FILE: None,
6915
  }
6916

    
6917
  if disk_template not in req_size_dict:
6918
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6919
                                 " is unknown" %  disk_template)
6920

    
6921
  return req_size_dict[disk_template]
6922

    
6923

    
6924
def _ComputeDiskSize(disk_template, disks):
6925
  """Compute disk size requirements in the volume group
6926

6927
  """
6928
  # Required free disk space as a function of disk and swap space
6929
  req_size_dict = {
6930
    constants.DT_DISKLESS: None,
6931
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6932
    # 128 MB are added for drbd metadata for each disk
6933
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6934
    constants.DT_FILE: None,
6935
  }
6936

    
6937
  if disk_template not in req_size_dict:
6938
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6939
                                 " is unknown" %  disk_template)
6940

    
6941
  return req_size_dict[disk_template]
6942

    
6943

    
6944
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6945
  """Hypervisor parameter validation.
6946

6947
  This function abstract the hypervisor parameter validation to be
6948
  used in both instance create and instance modify.
6949

6950
  @type lu: L{LogicalUnit}
6951
  @param lu: the logical unit for which we check
6952
  @type nodenames: list
6953
  @param nodenames: the list of nodes on which we should check
6954
  @type hvname: string
6955
  @param hvname: the name of the hypervisor we should use
6956
  @type hvparams: dict
6957
  @param hvparams: the parameters which we need to check
6958
  @raise errors.OpPrereqError: if the parameters are not valid
6959

6960
  """
6961
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6962
                                                  hvname,
6963
                                                  hvparams)
6964
  for node in nodenames:
6965
    info = hvinfo[node]
6966
    if info.offline:
6967
      continue
6968
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6969

    
6970

    
6971
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6972
  """OS parameters validation.
6973

6974
  @type lu: L{LogicalUnit}
6975
  @param lu: the logical unit for which we check
6976
  @type required: boolean
6977
  @param required: whether the validation should fail if the OS is not
6978
      found
6979
  @type nodenames: list
6980
  @param nodenames: the list of nodes on which we should check
6981
  @type osname: string
6982
  @param osname: the name of the hypervisor we should use
6983
  @type osparams: dict
6984
  @param osparams: the parameters which we need to check
6985
  @raise errors.OpPrereqError: if the parameters are not valid
6986

6987
  """
6988
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6989
                                   [constants.OS_VALIDATE_PARAMETERS],
6990
                                   osparams)
6991
  for node, nres in result.items():
6992
    # we don't check for offline cases since this should be run only
6993
    # against the master node and/or an instance's nodes
6994
    nres.Raise("OS Parameters validation failed on node %s" % node)
6995
    if not nres.payload:
6996
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6997
                 osname, node)
6998

    
6999

    
7000
class LUCreateInstance(LogicalUnit):
7001
  """Create an instance.
7002

7003
  """
7004
  HPATH = "instance-add"
7005
  HTYPE = constants.HTYPE_INSTANCE
7006
  _OP_PARAMS = [
7007
    _PInstanceName,
7008
    ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
7009
    ("start", True, ht.TBool),
7010
    ("wait_for_sync", True, ht.TBool),
7011
    ("ip_check", True, ht.TBool),
7012
    ("name_check", True, ht.TBool),
7013
    ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
7014
    ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
7015
    ("hvparams", ht.EmptyDict, ht.TDict),
7016
    ("beparams", ht.EmptyDict, ht.TDict),
7017
    ("osparams", ht.EmptyDict, ht.TDict),
7018
    ("no_install", None, ht.TMaybeBool),
7019
    ("os_type", None, ht.TMaybeString),
7020
    ("force_variant", False, ht.TBool),
7021
    ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
7022
    ("source_x509_ca", None, ht.TMaybeString),
7023
    ("source_instance_name", None, ht.TMaybeString),
7024
    ("source_shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
7025
     ht.TPositiveInt),
7026
    ("src_node", None, ht.TMaybeString),
7027
    ("src_path", None, ht.TMaybeString),
7028
    ("pnode", None, ht.TMaybeString),
7029
    ("snode", None, ht.TMaybeString),
7030
    ("iallocator", None, ht.TMaybeString),
7031
    ("hypervisor", None, ht.TMaybeString),
7032
    ("disk_template", ht.NoDefault, _CheckDiskTemplate),
7033
    ("identify_defaults", False, ht.TBool),
7034
    ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
7035
    ("file_storage_dir", None, ht.TMaybeString),
7036
    ]
7037
  REQ_BGL = False
7038

    
7039
  def CheckArguments(self):
7040
    """Check arguments.
7041

7042
    """
7043
    # do not require name_check to ease forward/backward compatibility
7044
    # for tools
7045
    if self.op.no_install and self.op.start:
7046
      self.LogInfo("No-installation mode selected, disabling startup")
7047
      self.op.start = False
7048
    # validate/normalize the instance name
7049
    self.op.instance_name = \
7050
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7051

    
7052
    if self.op.ip_check and not self.op.name_check:
7053
      # TODO: make the ip check more flexible and not depend on the name check
7054
      raise errors.OpPrereqError("Cannot do ip check without a name check",
7055
                                 errors.ECODE_INVAL)
7056

    
7057
    # check nics' parameter names
7058
    for nic in self.op.nics:
7059
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7060

    
7061
    # check disks. parameter names and consistent adopt/no-adopt strategy
7062
    has_adopt = has_no_adopt = False
7063
    for disk in self.op.disks:
7064
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7065
      if "adopt" in disk:
7066
        has_adopt = True
7067
      else:
7068
        has_no_adopt = True
7069
    if has_adopt and has_no_adopt:
7070
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7071
                                 errors.ECODE_INVAL)
7072
    if has_adopt:
7073
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7074
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7075
                                   " '%s' disk template" %
7076
                                   self.op.disk_template,
7077
                                   errors.ECODE_INVAL)
7078
      if self.op.iallocator is not None:
7079
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7080
                                   " iallocator script", errors.ECODE_INVAL)
7081
      if self.op.mode == constants.INSTANCE_IMPORT:
7082
        raise errors.OpPrereqError("Disk adoption not allowed for"
7083
                                   " instance import", errors.ECODE_INVAL)
7084

    
7085
    self.adopt_disks = has_adopt
7086

    
7087
    # instance name verification
7088
    if self.op.name_check:
7089
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7090
      self.op.instance_name = self.hostname1.name
7091
      # used in CheckPrereq for ip ping check
7092
      self.check_ip = self.hostname1.ip
7093
    else:
7094
      self.check_ip = None
7095

    
7096
    # file storage checks
7097
    if (self.op.file_driver and
7098
        not self.op.file_driver in constants.FILE_DRIVER):
7099
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7100
                                 self.op.file_driver, errors.ECODE_INVAL)
7101

    
7102
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7103
      raise errors.OpPrereqError("File storage directory path not absolute",
7104
                                 errors.ECODE_INVAL)
7105

    
7106
    ### Node/iallocator related checks
7107
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7108

    
7109
    if self.op.pnode is not None:
7110
      if self.op.disk_template in constants.DTS_NET_MIRROR:
7111
        if self.op.snode is None:
7112
          raise errors.OpPrereqError("The networked disk templates need"
7113
                                     " a mirror node", errors.ECODE_INVAL)
7114
      elif self.op.snode:
7115
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7116
                        " template")
7117
        self.op.snode = None
7118

    
7119
    self._cds = _GetClusterDomainSecret()
7120

    
7121
    if self.op.mode == constants.INSTANCE_IMPORT:
7122
      # On import force_variant must be True, because if we forced it at
7123
      # initial install, our only chance when importing it back is that it
7124
      # works again!
7125
      self.op.force_variant = True
7126

    
7127
      if self.op.no_install:
7128
        self.LogInfo("No-installation mode has no effect during import")
7129

    
7130
    elif self.op.mode == constants.INSTANCE_CREATE:
7131
      if self.op.os_type is None:
7132
        raise errors.OpPrereqError("No guest OS specified",
7133
                                   errors.ECODE_INVAL)
7134
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7135
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7136
                                   " installation" % self.op.os_type,
7137
                                   errors.ECODE_STATE)
7138
      if self.op.disk_template is None:
7139
        raise errors.OpPrereqError("No disk template specified",
7140
                                   errors.ECODE_INVAL)
7141

    
7142
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7143
      # Check handshake to ensure both clusters have the same domain secret
7144
      src_handshake = self.op.source_handshake
7145
      if not src_handshake:
7146
        raise errors.OpPrereqError("Missing source handshake",
7147
                                   errors.ECODE_INVAL)
7148

    
7149
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7150
                                                           src_handshake)
7151
      if errmsg:
7152
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7153
                                   errors.ECODE_INVAL)
7154

    
7155
      # Load and check source CA
7156
      self.source_x509_ca_pem = self.op.source_x509_ca
7157
      if not self.source_x509_ca_pem:
7158
        raise errors.OpPrereqError("Missing source X509 CA",
7159
                                   errors.ECODE_INVAL)
7160

    
7161
      try:
7162
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7163
                                                    self._cds)
7164
      except OpenSSL.crypto.Error, err:
7165
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7166
                                   (err, ), errors.ECODE_INVAL)
7167

    
7168
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7169
      if errcode is not None:
7170
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7171
                                   errors.ECODE_INVAL)
7172

    
7173
      self.source_x509_ca = cert
7174

    
7175
      src_instance_name = self.op.source_instance_name
7176
      if not src_instance_name:
7177
        raise errors.OpPrereqError("Missing source instance name",
7178
                                   errors.ECODE_INVAL)
7179

    
7180
      self.source_instance_name = \
7181
          netutils.GetHostname(name=src_instance_name).name
7182

    
7183
    else:
7184
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7185
                                 self.op.mode, errors.ECODE_INVAL)
7186

    
7187
  def ExpandNames(self):
7188
    """ExpandNames for CreateInstance.
7189

7190
    Figure out the right locks for instance creation.
7191

7192
    """
7193
    self.needed_locks = {}
7194

    
7195
    instance_name = self.op.instance_name
7196
    # this is just a preventive check, but someone might still add this
7197
    # instance in the meantime, and creation will fail at lock-add time
7198
    if instance_name in self.cfg.GetInstanceList():
7199
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7200
                                 instance_name, errors.ECODE_EXISTS)
7201

    
7202
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7203

    
7204
    if self.op.iallocator:
7205
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7206
    else:
7207
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7208
      nodelist = [self.op.pnode]
7209
      if self.op.snode is not None:
7210
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7211
        nodelist.append(self.op.snode)
7212
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7213

    
7214
    # in case of import lock the source node too
7215
    if self.op.mode == constants.INSTANCE_IMPORT:
7216
      src_node = self.op.src_node
7217
      src_path = self.op.src_path
7218

    
7219
      if src_path is None:
7220
        self.op.src_path = src_path = self.op.instance_name
7221

    
7222
      if src_node is None:
7223
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7224
        self.op.src_node = None
7225
        if os.path.isabs(src_path):
7226
          raise errors.OpPrereqError("Importing an instance from an absolute"
7227
                                     " path requires a source node option.",
7228
                                     errors.ECODE_INVAL)
7229
      else:
7230
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7231
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7232
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7233
        if not os.path.isabs(src_path):
7234
          self.op.src_path = src_path = \
7235
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7236

    
7237
  def _RunAllocator(self):
7238
    """Run the allocator based on input opcode.
7239

7240
    """
7241
    nics = [n.ToDict() for n in self.nics]
7242
    ial = IAllocator(self.cfg, self.rpc,
7243
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7244
                     name=self.op.instance_name,
7245
                     disk_template=self.op.disk_template,
7246
                     tags=[],
7247
                     os=self.op.os_type,
7248
                     vcpus=self.be_full[constants.BE_VCPUS],
7249
                     mem_size=self.be_full[constants.BE_MEMORY],
7250
                     disks=self.disks,
7251
                     nics=nics,
7252
                     hypervisor=self.op.hypervisor,
7253
                     )
7254

    
7255
    ial.Run(self.op.iallocator)
7256

    
7257
    if not ial.success:
7258
      raise errors.OpPrereqError("Can't compute nodes using"
7259
                                 " iallocator '%s': %s" %
7260
                                 (self.op.iallocator, ial.info),
7261
                                 errors.ECODE_NORES)
7262
    if len(ial.result) != ial.required_nodes:
7263
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7264
                                 " of nodes (%s), required %s" %
7265
                                 (self.op.iallocator, len(ial.result),
7266
                                  ial.required_nodes), errors.ECODE_FAULT)
7267
    self.op.pnode = ial.result[0]
7268
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7269
                 self.op.instance_name, self.op.iallocator,
7270
                 utils.CommaJoin(ial.result))
7271
    if ial.required_nodes == 2:
7272
      self.op.snode = ial.result[1]
7273

    
7274
  def BuildHooksEnv(self):
7275
    """Build hooks env.
7276

7277
    This runs on master, primary and secondary nodes of the instance.
7278

7279
    """
7280
    env = {
7281
      "ADD_MODE": self.op.mode,
7282
      }
7283
    if self.op.mode == constants.INSTANCE_IMPORT:
7284
      env["SRC_NODE"] = self.op.src_node
7285
      env["SRC_PATH"] = self.op.src_path
7286
      env["SRC_IMAGES"] = self.src_images
7287

    
7288
    env.update(_BuildInstanceHookEnv(
7289
      name=self.op.instance_name,
7290
      primary_node=self.op.pnode,
7291
      secondary_nodes=self.secondaries,
7292
      status=self.op.start,
7293
      os_type=self.op.os_type,
7294
      memory=self.be_full[constants.BE_MEMORY],
7295
      vcpus=self.be_full[constants.BE_VCPUS],
7296
      nics=_NICListToTuple(self, self.nics),
7297
      disk_template=self.op.disk_template,
7298
      disks=[(d["size"], d["mode"]) for d in self.disks],
7299
      bep=self.be_full,
7300
      hvp=self.hv_full,
7301
      hypervisor_name=self.op.hypervisor,
7302
    ))
7303

    
7304
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7305
          self.secondaries)
7306
    return env, nl, nl
7307

    
7308
  def _ReadExportInfo(self):
7309
    """Reads the export information from disk.
7310

7311
    It will override the opcode source node and path with the actual
7312
    information, if these two were not specified before.
7313

7314
    @return: the export information
7315

7316
    """
7317
    assert self.op.mode == constants.INSTANCE_IMPORT
7318

    
7319
    src_node = self.op.src_node
7320
    src_path = self.op.src_path
7321

    
7322
    if src_node is None:
7323
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7324
      exp_list = self.rpc.call_export_list(locked_nodes)
7325
      found = False
7326
      for node in exp_list:
7327
        if exp_list[node].fail_msg:
7328
          continue
7329
        if src_path in exp_list[node].payload:
7330
          found = True
7331
          self.op.src_node = src_node = node
7332
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7333
                                                       src_path)
7334
          break
7335
      if not found:
7336
        raise errors.OpPrereqError("No export found for relative path %s" %
7337
                                    src_path, errors.ECODE_INVAL)
7338

    
7339
    _CheckNodeOnline(self, src_node)
7340
    result = self.rpc.call_export_info(src_node, src_path)
7341
    result.Raise("No export or invalid export found in dir %s" % src_path)
7342

    
7343
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7344
    if not export_info.has_section(constants.INISECT_EXP):
7345
      raise errors.ProgrammerError("Corrupted export config",
7346
                                   errors.ECODE_ENVIRON)
7347

    
7348
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7349
    if (int(ei_version) != constants.EXPORT_VERSION):
7350
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7351
                                 (ei_version, constants.EXPORT_VERSION),
7352
                                 errors.ECODE_ENVIRON)
7353
    return export_info
7354

    
7355
  def _ReadExportParams(self, einfo):
7356
    """Use export parameters as defaults.
7357

7358
    In case the opcode doesn't specify (as in override) some instance
7359
    parameters, then try to use them from the export information, if
7360
    that declares them.
7361

7362
    """
7363
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7364

    
7365
    if self.op.disk_template is None:
7366
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7367
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7368
                                          "disk_template")
7369
      else:
7370
        raise errors.OpPrereqError("No disk template specified and the export"
7371
                                   " is missing the disk_template information",
7372
                                   errors.ECODE_INVAL)
7373

    
7374
    if not self.op.disks:
7375
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7376
        disks = []
7377
        # TODO: import the disk iv_name too
7378
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7379
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7380
          disks.append({"size": disk_sz})
7381
        self.op.disks = disks
7382
      else:
7383
        raise errors.OpPrereqError("No disk info specified and the export"
7384
                                   " is missing the disk information",
7385
                                   errors.ECODE_INVAL)
7386

    
7387
    if (not self.op.nics and
7388
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7389
      nics = []
7390
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7391
        ndict = {}
7392
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7393
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7394
          ndict[name] = v
7395
        nics.append(ndict)
7396
      self.op.nics = nics
7397

    
7398
    if (self.op.hypervisor is None and
7399
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7400
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7401
    if einfo.has_section(constants.INISECT_HYP):
7402
      # use the export parameters but do not override the ones
7403
      # specified by the user
7404
      for name, value in einfo.items(constants.INISECT_HYP):
7405
        if name not in self.op.hvparams:
7406
          self.op.hvparams[name] = value
7407

    
7408
    if einfo.has_section(constants.INISECT_BEP):
7409
      # use the parameters, without overriding
7410
      for name, value in einfo.items(constants.INISECT_BEP):
7411
        if name not in self.op.beparams:
7412
          self.op.beparams[name] = value
7413
    else:
7414
      # try to read the parameters old style, from the main section
7415
      for name in constants.BES_PARAMETERS:
7416
        if (name not in self.op.beparams and
7417
            einfo.has_option(constants.INISECT_INS, name)):
7418
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7419

    
7420
    if einfo.has_section(constants.INISECT_OSP):
7421
      # use the parameters, without overriding
7422
      for name, value in einfo.items(constants.INISECT_OSP):
7423
        if name not in self.op.osparams:
7424
          self.op.osparams[name] = value
7425

    
7426
  def _RevertToDefaults(self, cluster):
7427
    """Revert the instance parameters to the default values.
7428

7429
    """
7430
    # hvparams
7431
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7432
    for name in self.op.hvparams.keys():
7433
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7434
        del self.op.hvparams[name]
7435
    # beparams
7436
    be_defs = cluster.SimpleFillBE({})
7437
    for name in self.op.beparams.keys():
7438
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7439
        del self.op.beparams[name]
7440
    # nic params
7441
    nic_defs = cluster.SimpleFillNIC({})
7442
    for nic in self.op.nics:
7443
      for name in constants.NICS_PARAMETERS:
7444
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7445
          del nic[name]
7446
    # osparams
7447
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7448
    for name in self.op.osparams.keys():
7449
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7450
        del self.op.osparams[name]
7451

    
7452
  def CheckPrereq(self):
7453
    """Check prerequisites.
7454

7455
    """
7456
    if self.op.mode == constants.INSTANCE_IMPORT:
7457
      export_info = self._ReadExportInfo()
7458
      self._ReadExportParams(export_info)
7459

    
7460
    _CheckDiskTemplate(self.op.disk_template)
7461

    
7462
    if (not self.cfg.GetVGName() and
7463
        self.op.disk_template not in constants.DTS_NOT_LVM):
7464
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7465
                                 " instances", errors.ECODE_STATE)
7466

    
7467
    if self.op.hypervisor is None:
7468
      self.op.hypervisor = self.cfg.GetHypervisorType()
7469

    
7470
    cluster = self.cfg.GetClusterInfo()
7471
    enabled_hvs = cluster.enabled_hypervisors
7472
    if self.op.hypervisor not in enabled_hvs:
7473
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7474
                                 " cluster (%s)" % (self.op.hypervisor,
7475
                                  ",".join(enabled_hvs)),
7476
                                 errors.ECODE_STATE)
7477

    
7478
    # check hypervisor parameter syntax (locally)
7479
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7480
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7481
                                      self.op.hvparams)
7482
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7483
    hv_type.CheckParameterSyntax(filled_hvp)
7484
    self.hv_full = filled_hvp
7485
    # check that we don't specify global parameters on an instance
7486
    _CheckGlobalHvParams(self.op.hvparams)
7487

    
7488
    # fill and remember the beparams dict
7489
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7490
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7491

    
7492
    # build os parameters
7493
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7494

    
7495
    # now that hvp/bep are in final format, let's reset to defaults,
7496
    # if told to do so
7497
    if self.op.identify_defaults:
7498
      self._RevertToDefaults(cluster)
7499

    
7500
    # NIC buildup
7501
    self.nics = []
7502
    for idx, nic in enumerate(self.op.nics):
7503
      nic_mode_req = nic.get("mode", None)
7504
      nic_mode = nic_mode_req
7505
      if nic_mode is None:
7506
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7507

    
7508
      # in routed mode, for the first nic, the default ip is 'auto'
7509
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7510
        default_ip_mode = constants.VALUE_AUTO
7511
      else:
7512
        default_ip_mode = constants.VALUE_NONE
7513

    
7514
      # ip validity checks
7515
      ip = nic.get("ip", default_ip_mode)
7516
      if ip is None or ip.lower() == constants.VALUE_NONE:
7517
        nic_ip = None
7518
      elif ip.lower() == constants.VALUE_AUTO:
7519
        if not self.op.name_check:
7520
          raise errors.OpPrereqError("IP address set to auto but name checks"
7521
                                     " have been skipped",
7522
                                     errors.ECODE_INVAL)
7523
        nic_ip = self.hostname1.ip
7524
      else:
7525
        if not netutils.IPAddress.IsValid(ip):
7526
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7527
                                     errors.ECODE_INVAL)
7528
        nic_ip = ip
7529

    
7530
      # TODO: check the ip address for uniqueness
7531
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7532
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7533
                                   errors.ECODE_INVAL)
7534

    
7535
      # MAC address verification
7536
      mac = nic.get("mac", constants.VALUE_AUTO)
7537
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7538
        mac = utils.NormalizeAndValidateMac(mac)
7539

    
7540
        try:
7541
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7542
        except errors.ReservationError:
7543
          raise errors.OpPrereqError("MAC address %s already in use"
7544
                                     " in cluster" % mac,
7545
                                     errors.ECODE_NOTUNIQUE)
7546

    
7547
      # bridge verification
7548
      bridge = nic.get("bridge", None)
7549
      link = nic.get("link", None)
7550
      if bridge and link:
7551
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7552
                                   " at the same time", errors.ECODE_INVAL)
7553
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7554
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7555
                                   errors.ECODE_INVAL)
7556
      elif bridge:
7557
        link = bridge
7558

    
7559
      nicparams = {}
7560
      if nic_mode_req:
7561
        nicparams[constants.NIC_MODE] = nic_mode_req
7562
      if link:
7563
        nicparams[constants.NIC_LINK] = link
7564

    
7565
      check_params = cluster.SimpleFillNIC(nicparams)
7566
      objects.NIC.CheckParameterSyntax(check_params)
7567
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7568

    
7569
    # disk checks/pre-build
7570
    self.disks = []
7571
    for disk in self.op.disks:
7572
      mode = disk.get("mode", constants.DISK_RDWR)
7573
      if mode not in constants.DISK_ACCESS_SET:
7574
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7575
                                   mode, errors.ECODE_INVAL)
7576
      size = disk.get("size", None)
7577
      if size is None:
7578
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7579
      try:
7580
        size = int(size)
7581
      except (TypeError, ValueError):
7582
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7583
                                   errors.ECODE_INVAL)
7584
      vg = disk.get("vg", self.cfg.GetVGName())
7585
      new_disk = {"size": size, "mode": mode, "vg": vg}
7586
      if "adopt" in disk:
7587
        new_disk["adopt"] = disk["adopt"]
7588
      self.disks.append(new_disk)
7589

    
7590
    if self.op.mode == constants.INSTANCE_IMPORT:
7591

    
7592
      # Check that the new instance doesn't have less disks than the export
7593
      instance_disks = len(self.disks)
7594
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7595
      if instance_disks < export_disks:
7596
        raise errors.OpPrereqError("Not enough disks to import."
7597
                                   " (instance: %d, export: %d)" %
7598
                                   (instance_disks, export_disks),
7599
                                   errors.ECODE_INVAL)
7600

    
7601
      disk_images = []
7602
      for idx in range(export_disks):
7603
        option = 'disk%d_dump' % idx
7604
        if export_info.has_option(constants.INISECT_INS, option):
7605
          # FIXME: are the old os-es, disk sizes, etc. useful?
7606
          export_name = export_info.get(constants.INISECT_INS, option)
7607
          image = utils.PathJoin(self.op.src_path, export_name)
7608
          disk_images.append(image)
7609
        else:
7610
          disk_images.append(False)
7611

    
7612
      self.src_images = disk_images
7613

    
7614
      old_name = export_info.get(constants.INISECT_INS, 'name')
7615
      try:
7616
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7617
      except (TypeError, ValueError), err:
7618
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7619
                                   " an integer: %s" % str(err),
7620
                                   errors.ECODE_STATE)
7621
      if self.op.instance_name == old_name:
7622
        for idx, nic in enumerate(self.nics):
7623
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7624
            nic_mac_ini = 'nic%d_mac' % idx
7625
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7626

    
7627
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7628

    
7629
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7630
    if self.op.ip_check:
7631
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7632
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7633
                                   (self.check_ip, self.op.instance_name),
7634
                                   errors.ECODE_NOTUNIQUE)
7635

    
7636
    #### mac address generation
7637
    # By generating here the mac address both the allocator and the hooks get
7638
    # the real final mac address rather than the 'auto' or 'generate' value.
7639
    # There is a race condition between the generation and the instance object
7640
    # creation, which means that we know the mac is valid now, but we're not
7641
    # sure it will be when we actually add the instance. If things go bad
7642
    # adding the instance will abort because of a duplicate mac, and the
7643
    # creation job will fail.
7644
    for nic in self.nics:
7645
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7646
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7647

    
7648
    #### allocator run
7649

    
7650
    if self.op.iallocator is not None:
7651
      self._RunAllocator()
7652

    
7653
    #### node related checks
7654

    
7655
    # check primary node
7656
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7657
    assert self.pnode is not None, \
7658
      "Cannot retrieve locked node %s" % self.op.pnode
7659
    if pnode.offline:
7660
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7661
                                 pnode.name, errors.ECODE_STATE)
7662
    if pnode.drained:
7663
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7664
                                 pnode.name, errors.ECODE_STATE)
7665
    if not pnode.vm_capable:
7666
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7667
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
7668

    
7669
    self.secondaries = []
7670

    
7671
    # mirror node verification
7672
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7673
      if self.op.snode == pnode.name:
7674
        raise errors.OpPrereqError("The secondary node cannot be the"
7675
                                   " primary node.", errors.ECODE_INVAL)
7676
      _CheckNodeOnline(self, self.op.snode)
7677
      _CheckNodeNotDrained(self, self.op.snode)
7678
      _CheckNodeVmCapable(self, self.op.snode)
7679
      self.secondaries.append(self.op.snode)
7680

    
7681
    nodenames = [pnode.name] + self.secondaries
7682

    
7683
    if not self.adopt_disks:
7684
      # Check lv size requirements, if not adopting
7685
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7686
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7687

    
7688
    else: # instead, we must check the adoption data
7689
      all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7690
      if len(all_lvs) != len(self.disks):
7691
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7692
                                   errors.ECODE_INVAL)
7693
      for lv_name in all_lvs:
7694
        try:
7695
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7696
          # to ReserveLV uses the same syntax
7697
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7698
        except errors.ReservationError:
7699
          raise errors.OpPrereqError("LV named %s used by another instance" %
7700
                                     lv_name, errors.ECODE_NOTUNIQUE)
7701

    
7702
      vg_names = self.rpc.call_vg_list([pnode.name])
7703
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7704

    
7705
      node_lvs = self.rpc.call_lv_list([pnode.name],
7706
                                       vg_names[pnode.name].payload.keys()
7707
                                      )[pnode.name]
7708
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7709
      node_lvs = node_lvs.payload
7710

    
7711
      delta = all_lvs.difference(node_lvs.keys())
7712
      if delta:
7713
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7714
                                   utils.CommaJoin(delta),
7715
                                   errors.ECODE_INVAL)
7716
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7717
      if online_lvs:
7718
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7719
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7720
                                   errors.ECODE_STATE)
7721
      # update the size of disk based on what is found
7722
      for dsk in self.disks:
7723
        dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7724

    
7725
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7726

    
7727
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7728
    # check OS parameters (remotely)
7729
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7730

    
7731
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7732

    
7733
    # memory check on primary node
7734
    if self.op.start:
7735
      _CheckNodeFreeMemory(self, self.pnode.name,
7736
                           "creating instance %s" % self.op.instance_name,
7737
                           self.be_full[constants.BE_MEMORY],
7738
                           self.op.hypervisor)
7739

    
7740
    self.dry_run_result = list(nodenames)
7741

    
7742
  def Exec(self, feedback_fn):
7743
    """Create and add the instance to the cluster.
7744

7745
    """
7746
    instance = self.op.instance_name
7747
    pnode_name = self.pnode.name
7748

    
7749
    ht_kind = self.op.hypervisor
7750
    if ht_kind in constants.HTS_REQ_PORT:
7751
      network_port = self.cfg.AllocatePort()
7752
    else:
7753
      network_port = None
7754

    
7755
    if constants.ENABLE_FILE_STORAGE:
7756
      # this is needed because os.path.join does not accept None arguments
7757
      if self.op.file_storage_dir is None:
7758
        string_file_storage_dir = ""
7759
      else:
7760
        string_file_storage_dir = self.op.file_storage_dir
7761

    
7762
      # build the full file storage dir path
7763
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7764
                                        string_file_storage_dir, instance)
7765
    else:
7766
      file_storage_dir = ""
7767

    
7768
    disks = _GenerateDiskTemplate(self,
7769
                                  self.op.disk_template,
7770
                                  instance, pnode_name,
7771
                                  self.secondaries,
7772
                                  self.disks,
7773
                                  file_storage_dir,
7774
                                  self.op.file_driver,
7775
                                  0,
7776
                                  feedback_fn)
7777

    
7778
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7779
                            primary_node=pnode_name,
7780
                            nics=self.nics, disks=disks,
7781
                            disk_template=self.op.disk_template,
7782
                            admin_up=False,
7783
                            network_port=network_port,
7784
                            beparams=self.op.beparams,
7785
                            hvparams=self.op.hvparams,
7786
                            hypervisor=self.op.hypervisor,
7787
                            osparams=self.op.osparams,
7788
                            )
7789

    
7790
    if self.adopt_disks:
7791
      # rename LVs to the newly-generated names; we need to construct
7792
      # 'fake' LV disks with the old data, plus the new unique_id
7793
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7794
      rename_to = []
7795
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7796
        rename_to.append(t_dsk.logical_id)
7797
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7798
        self.cfg.SetDiskID(t_dsk, pnode_name)
7799
      result = self.rpc.call_blockdev_rename(pnode_name,
7800
                                             zip(tmp_disks, rename_to))
7801
      result.Raise("Failed to rename adoped LVs")
7802
    else:
7803
      feedback_fn("* creating instance disks...")
7804
      try:
7805
        _CreateDisks(self, iobj)
7806
      except errors.OpExecError:
7807
        self.LogWarning("Device creation failed, reverting...")
7808
        try:
7809
          _RemoveDisks(self, iobj)
7810
        finally:
7811
          self.cfg.ReleaseDRBDMinors(instance)
7812
          raise
7813

    
7814
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7815
        feedback_fn("* wiping instance disks...")
7816
        try:
7817
          _WipeDisks(self, iobj)
7818
        except errors.OpExecError:
7819
          self.LogWarning("Device wiping failed, reverting...")
7820
          try:
7821
            _RemoveDisks(self, iobj)
7822
          finally:
7823
            self.cfg.ReleaseDRBDMinors(instance)
7824
            raise
7825

    
7826
    feedback_fn("adding instance %s to cluster config" % instance)
7827

    
7828
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7829

    
7830
    # Declare that we don't want to remove the instance lock anymore, as we've
7831
    # added the instance to the config
7832
    del self.remove_locks[locking.LEVEL_INSTANCE]
7833
    # Unlock all the nodes
7834
    if self.op.mode == constants.INSTANCE_IMPORT:
7835
      nodes_keep = [self.op.src_node]
7836
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7837
                       if node != self.op.src_node]
7838
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7839
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7840
    else:
7841
      self.context.glm.release(locking.LEVEL_NODE)
7842
      del self.acquired_locks[locking.LEVEL_NODE]
7843

    
7844
    if self.op.wait_for_sync:
7845
      disk_abort = not _WaitForSync(self, iobj)
7846
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7847
      # make sure the disks are not degraded (still sync-ing is ok)
7848
      time.sleep(15)
7849
      feedback_fn("* checking mirrors status")
7850
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7851
    else:
7852
      disk_abort = False
7853

    
7854
    if disk_abort:
7855
      _RemoveDisks(self, iobj)
7856
      self.cfg.RemoveInstance(iobj.name)
7857
      # Make sure the instance lock gets removed
7858
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7859
      raise errors.OpExecError("There are some degraded disks for"
7860
                               " this instance")
7861

    
7862
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7863
      if self.op.mode == constants.INSTANCE_CREATE:
7864
        if not self.op.no_install:
7865
          feedback_fn("* running the instance OS create scripts...")
7866
          # FIXME: pass debug option from opcode to backend
7867
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7868
                                                 self.op.debug_level)
7869
          result.Raise("Could not add os for instance %s"
7870
                       " on node %s" % (instance, pnode_name))
7871

    
7872
      elif self.op.mode == constants.INSTANCE_IMPORT:
7873
        feedback_fn("* running the instance OS import scripts...")
7874

    
7875
        transfers = []
7876

    
7877
        for idx, image in enumerate(self.src_images):
7878
          if not image:
7879
            continue
7880

    
7881
          # FIXME: pass debug option from opcode to backend
7882
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7883
                                             constants.IEIO_FILE, (image, ),
7884
                                             constants.IEIO_SCRIPT,
7885
                                             (iobj.disks[idx], idx),
7886
                                             None)
7887
          transfers.append(dt)
7888

    
7889
        import_result = \
7890
          masterd.instance.TransferInstanceData(self, feedback_fn,
7891
                                                self.op.src_node, pnode_name,
7892
                                                self.pnode.secondary_ip,
7893
                                                iobj, transfers)
7894
        if not compat.all(import_result):
7895
          self.LogWarning("Some disks for instance %s on node %s were not"
7896
                          " imported successfully" % (instance, pnode_name))
7897

    
7898
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7899
        feedback_fn("* preparing remote import...")
7900
        # The source cluster will stop the instance before attempting to make a
7901
        # connection. In some cases stopping an instance can take a long time,
7902
        # hence the shutdown timeout is added to the connection timeout.
7903
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7904
                           self.op.source_shutdown_timeout)
7905
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7906

    
7907
        assert iobj.primary_node == self.pnode.name
7908
        disk_results = \
7909
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7910
                                        self.source_x509_ca,
7911
                                        self._cds, timeouts)
7912
        if not compat.all(disk_results):
7913
          # TODO: Should the instance still be started, even if some disks
7914
          # failed to import (valid for local imports, too)?
7915
          self.LogWarning("Some disks for instance %s on node %s were not"
7916
                          " imported successfully" % (instance, pnode_name))
7917

    
7918
        # Run rename script on newly imported instance
7919
        assert iobj.name == instance
7920
        feedback_fn("Running rename script for %s" % instance)
7921
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7922
                                                   self.source_instance_name,
7923
                                                   self.op.debug_level)
7924
        if result.fail_msg:
7925
          self.LogWarning("Failed to run rename script for %s on node"
7926
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7927

    
7928
      else:
7929
        # also checked in the prereq part
7930
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7931
                                     % self.op.mode)
7932

    
7933
    if self.op.start:
7934
      iobj.admin_up = True
7935
      self.cfg.Update(iobj, feedback_fn)
7936
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7937
      feedback_fn("* starting instance...")
7938
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7939
      result.Raise("Could not start instance")
7940

    
7941
    return list(iobj.all_nodes)
7942

    
7943

    
7944
class LUConnectConsole(NoHooksLU):
7945
  """Connect to an instance's console.
7946

7947
  This is somewhat special in that it returns the command line that
7948
  you need to run on the master node in order to connect to the
7949
  console.
7950

7951
  """
7952
  _OP_PARAMS = [
7953
    _PInstanceName
7954
    ]
7955
  REQ_BGL = False
7956

    
7957
  def ExpandNames(self):
7958
    self._ExpandAndLockInstance()
7959

    
7960
  def CheckPrereq(self):
7961
    """Check prerequisites.
7962

7963
    This checks that the instance is in the cluster.
7964

7965
    """
7966
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7967
    assert self.instance is not None, \
7968
      "Cannot retrieve locked instance %s" % self.op.instance_name
7969
    _CheckNodeOnline(self, self.instance.primary_node)
7970

    
7971
  def Exec(self, feedback_fn):
7972
    """Connect to the console of an instance
7973

7974
    """
7975
    instance = self.instance
7976
    node = instance.primary_node
7977

    
7978
    node_insts = self.rpc.call_instance_list([node],
7979
                                             [instance.hypervisor])[node]
7980
    node_insts.Raise("Can't get node information from %s" % node)
7981

    
7982
    if instance.name not in node_insts.payload:
7983
      if instance.admin_up:
7984
        state = "ERROR_down"
7985
      else:
7986
        state = "ADMIN_down"
7987
      raise errors.OpExecError("Instance %s is not running (state %s)" %
7988
                               (instance.name, state))
7989

    
7990
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7991

    
7992
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7993
    cluster = self.cfg.GetClusterInfo()
7994
    # beparams and hvparams are passed separately, to avoid editing the
7995
    # instance and then saving the defaults in the instance itself.
7996
    hvparams = cluster.FillHV(instance)
7997
    beparams = cluster.FillBE(instance)
7998
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7999

    
8000
    # build ssh cmdline
8001
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
8002

    
8003

    
8004
class LUReplaceDisks(LogicalUnit):
8005
  """Replace the disks of an instance.
8006

8007
  """
8008
  HPATH = "mirrors-replace"
8009
  HTYPE = constants.HTYPE_INSTANCE
8010
  _OP_PARAMS = [
8011
    _PInstanceName,
8012
    ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
8013
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
8014
    ("remote_node", None, ht.TMaybeString),
8015
    ("iallocator", None, ht.TMaybeString),
8016
    ("early_release", False, ht.TBool),
8017
    ]
8018
  REQ_BGL = False
8019

    
8020
  def CheckArguments(self):
8021
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8022
                                  self.op.iallocator)
8023

    
8024
  def ExpandNames(self):
8025
    self._ExpandAndLockInstance()
8026

    
8027
    if self.op.iallocator is not None:
8028
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8029

    
8030
    elif self.op.remote_node is not None:
8031
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8032
      self.op.remote_node = remote_node
8033

    
8034
      # Warning: do not remove the locking of the new secondary here
8035
      # unless DRBD8.AddChildren is changed to work in parallel;
8036
      # currently it doesn't since parallel invocations of
8037
      # FindUnusedMinor will conflict
8038
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8039
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8040

    
8041
    else:
8042
      self.needed_locks[locking.LEVEL_NODE] = []
8043
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8044

    
8045
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8046
                                   self.op.iallocator, self.op.remote_node,
8047
                                   self.op.disks, False, self.op.early_release)
8048

    
8049
    self.tasklets = [self.replacer]
8050

    
8051
  def DeclareLocks(self, level):
8052
    # If we're not already locking all nodes in the set we have to declare the
8053
    # instance's primary/secondary nodes.
8054
    if (level == locking.LEVEL_NODE and
8055
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8056
      self._LockInstancesNodes()
8057

    
8058
  def BuildHooksEnv(self):
8059
    """Build hooks env.
8060

8061
    This runs on the master, the primary and all the secondaries.
8062

8063
    """
8064
    instance = self.replacer.instance
8065
    env = {
8066
      "MODE": self.op.mode,
8067
      "NEW_SECONDARY": self.op.remote_node,
8068
      "OLD_SECONDARY": instance.secondary_nodes[0],
8069
      }
8070
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8071
    nl = [
8072
      self.cfg.GetMasterNode(),
8073
      instance.primary_node,
8074
      ]
8075
    if self.op.remote_node is not None:
8076
      nl.append(self.op.remote_node)
8077
    return env, nl, nl
8078

    
8079

    
8080
class TLReplaceDisks(Tasklet):
8081
  """Replaces disks for an instance.
8082

8083
  Note: Locking is not within the scope of this class.
8084

8085
  """
8086
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8087
               disks, delay_iallocator, early_release):
8088
    """Initializes this class.
8089

8090
    """
8091
    Tasklet.__init__(self, lu)
8092

    
8093
    # Parameters
8094
    self.instance_name = instance_name
8095
    self.mode = mode
8096
    self.iallocator_name = iallocator_name
8097
    self.remote_node = remote_node
8098
    self.disks = disks
8099
    self.delay_iallocator = delay_iallocator
8100
    self.early_release = early_release
8101

    
8102
    # Runtime data
8103
    self.instance = None
8104
    self.new_node = None
8105
    self.target_node = None
8106
    self.other_node = None
8107
    self.remote_node_info = None
8108
    self.node_secondary_ip = None
8109

    
8110
  @staticmethod
8111
  def CheckArguments(mode, remote_node, iallocator):
8112
    """Helper function for users of this class.
8113

8114
    """
8115
    # check for valid parameter combination
8116
    if mode == constants.REPLACE_DISK_CHG:
8117
      if remote_node is None and iallocator is None:
8118
        raise errors.OpPrereqError("When changing the secondary either an"
8119
                                   " iallocator script must be used or the"
8120
                                   " new node given", errors.ECODE_INVAL)
8121

    
8122
      if remote_node is not None and iallocator is not None:
8123
        raise errors.OpPrereqError("Give either the iallocator or the new"
8124
                                   " secondary, not both", errors.ECODE_INVAL)
8125

    
8126
    elif remote_node is not None or iallocator is not None:
8127
      # Not replacing the secondary
8128
      raise errors.OpPrereqError("The iallocator and new node options can"
8129
                                 " only be used when changing the"
8130
                                 " secondary node", errors.ECODE_INVAL)
8131

    
8132
  @staticmethod
8133
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8134
    """Compute a new secondary node using an IAllocator.
8135

8136
    """
8137
    ial = IAllocator(lu.cfg, lu.rpc,
8138
                     mode=constants.IALLOCATOR_MODE_RELOC,
8139
                     name=instance_name,
8140
                     relocate_from=relocate_from)
8141

    
8142
    ial.Run(iallocator_name)
8143

    
8144
    if not ial.success:
8145
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8146
                                 " %s" % (iallocator_name, ial.info),
8147
                                 errors.ECODE_NORES)
8148

    
8149
    if len(ial.result) != ial.required_nodes:
8150
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8151
                                 " of nodes (%s), required %s" %
8152
                                 (iallocator_name,
8153
                                  len(ial.result), ial.required_nodes),
8154
                                 errors.ECODE_FAULT)
8155

    
8156
    remote_node_name = ial.result[0]
8157

    
8158
    lu.LogInfo("Selected new secondary for instance '%s': %s",
8159
               instance_name, remote_node_name)
8160

    
8161
    return remote_node_name
8162

    
8163
  def _FindFaultyDisks(self, node_name):
8164
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8165
                                    node_name, True)
8166

    
8167
  def CheckPrereq(self):
8168
    """Check prerequisites.
8169

8170
    This checks that the instance is in the cluster.
8171

8172
    """
8173
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8174
    assert instance is not None, \
8175
      "Cannot retrieve locked instance %s" % self.instance_name
8176

    
8177
    if instance.disk_template != constants.DT_DRBD8:
8178
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8179
                                 " instances", errors.ECODE_INVAL)
8180

    
8181
    if len(instance.secondary_nodes) != 1:
8182
      raise errors.OpPrereqError("The instance has a strange layout,"
8183
                                 " expected one secondary but found %d" %
8184
                                 len(instance.secondary_nodes),
8185
                                 errors.ECODE_FAULT)
8186

    
8187
    if not self.delay_iallocator:
8188
      self._CheckPrereq2()
8189

    
8190
  def _CheckPrereq2(self):
8191
    """Check prerequisites, second part.
8192

8193
    This function should always be part of CheckPrereq. It was separated and is
8194
    now called from Exec because during node evacuation iallocator was only
8195
    called with an unmodified cluster model, not taking planned changes into
8196
    account.
8197

8198
    """
8199
    instance = self.instance
8200
    secondary_node = instance.secondary_nodes[0]
8201

    
8202
    if self.iallocator_name is None:
8203
      remote_node = self.remote_node
8204
    else:
8205
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8206
                                       instance.name, instance.secondary_nodes)
8207

    
8208
    if remote_node is not None:
8209
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8210
      assert self.remote_node_info is not None, \
8211
        "Cannot retrieve locked node %s" % remote_node
8212
    else:
8213
      self.remote_node_info = None
8214

    
8215
    if remote_node == self.instance.primary_node:
8216
      raise errors.OpPrereqError("The specified node is the primary node of"
8217
                                 " the instance.", errors.ECODE_INVAL)
8218

    
8219
    if remote_node == secondary_node:
8220
      raise errors.OpPrereqError("The specified node is already the"
8221
                                 " secondary node of the instance.",
8222
                                 errors.ECODE_INVAL)
8223

    
8224
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8225
                                    constants.REPLACE_DISK_CHG):
8226
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
8227
                                 errors.ECODE_INVAL)
8228

    
8229
    if self.mode == constants.REPLACE_DISK_AUTO:
8230
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
8231
      faulty_secondary = self._FindFaultyDisks(secondary_node)
8232

    
8233
      if faulty_primary and faulty_secondary:
8234
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8235
                                   " one node and can not be repaired"
8236
                                   " automatically" % self.instance_name,
8237
                                   errors.ECODE_STATE)
8238

    
8239
      if faulty_primary:
8240
        self.disks = faulty_primary
8241
        self.target_node = instance.primary_node
8242
        self.other_node = secondary_node
8243
        check_nodes = [self.target_node, self.other_node]
8244
      elif faulty_secondary:
8245
        self.disks = faulty_secondary
8246
        self.target_node = secondary_node
8247
        self.other_node = instance.primary_node
8248
        check_nodes = [self.target_node, self.other_node]
8249
      else:
8250
        self.disks = []
8251
        check_nodes = []
8252

    
8253
    else:
8254
      # Non-automatic modes
8255
      if self.mode == constants.REPLACE_DISK_PRI:
8256
        self.target_node = instance.primary_node
8257
        self.other_node = secondary_node
8258
        check_nodes = [self.target_node, self.other_node]
8259

    
8260
      elif self.mode == constants.REPLACE_DISK_SEC:
8261
        self.target_node = secondary_node
8262
        self.other_node = instance.primary_node
8263
        check_nodes = [self.target_node, self.other_node]
8264

    
8265
      elif self.mode == constants.REPLACE_DISK_CHG:
8266
        self.new_node = remote_node
8267
        self.other_node = instance.primary_node
8268
        self.target_node = secondary_node
8269
        check_nodes = [self.new_node, self.other_node]
8270

    
8271
        _CheckNodeNotDrained(self.lu, remote_node)
8272
        _CheckNodeVmCapable(self.lu, remote_node)
8273

    
8274
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8275
        assert old_node_info is not None
8276
        if old_node_info.offline and not self.early_release:
8277
          # doesn't make sense to delay the release
8278
          self.early_release = True
8279
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8280
                          " early-release mode", secondary_node)
8281

    
8282
      else:
8283
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8284
                                     self.mode)
8285

    
8286
      # If not specified all disks should be replaced
8287
      if not self.disks:
8288
        self.disks = range(len(self.instance.disks))
8289

    
8290
    for node in check_nodes:
8291
      _CheckNodeOnline(self.lu, node)
8292

    
8293
    # Check whether disks are valid
8294
    for disk_idx in self.disks:
8295
      instance.FindDisk(disk_idx)
8296

    
8297
    # Get secondary node IP addresses
8298
    node_2nd_ip = {}
8299

    
8300
    for node_name in [self.target_node, self.other_node, self.new_node]:
8301
      if node_name is not None:
8302
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8303

    
8304
    self.node_secondary_ip = node_2nd_ip
8305

    
8306
  def Exec(self, feedback_fn):
8307
    """Execute disk replacement.
8308

8309
    This dispatches the disk replacement to the appropriate handler.
8310

8311
    """
8312
    if self.delay_iallocator:
8313
      self._CheckPrereq2()
8314

    
8315
    if not self.disks:
8316
      feedback_fn("No disks need replacement")
8317
      return
8318

    
8319
    feedback_fn("Replacing disk(s) %s for %s" %
8320
                (utils.CommaJoin(self.disks), self.instance.name))
8321

    
8322
    activate_disks = (not self.instance.admin_up)
8323

    
8324
    # Activate the instance disks if we're replacing them on a down instance
8325
    if activate_disks:
8326
      _StartInstanceDisks(self.lu, self.instance, True)
8327

    
8328
    try:
8329
      # Should we replace the secondary node?
8330
      if self.new_node is not None:
8331
        fn = self._ExecDrbd8Secondary
8332
      else:
8333
        fn = self._ExecDrbd8DiskOnly
8334

    
8335
      return fn(feedback_fn)
8336

    
8337
    finally:
8338
      # Deactivate the instance disks if we're replacing them on a
8339
      # down instance
8340
      if activate_disks:
8341
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8342

    
8343
  def _CheckVolumeGroup(self, nodes):
8344
    self.lu.LogInfo("Checking volume groups")
8345

    
8346
    vgname = self.cfg.GetVGName()
8347

    
8348
    # Make sure volume group exists on all involved nodes
8349
    results = self.rpc.call_vg_list(nodes)
8350
    if not results:
8351
      raise errors.OpExecError("Can't list volume groups on the nodes")
8352

    
8353
    for node in nodes:
8354
      res = results[node]
8355
      res.Raise("Error checking node %s" % node)
8356
      if vgname not in res.payload:
8357
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8358
                                 (vgname, node))
8359

    
8360
  def _CheckDisksExistence(self, nodes):
8361
    # Check disk existence
8362
    for idx, dev in enumerate(self.instance.disks):
8363
      if idx not in self.disks:
8364
        continue
8365

    
8366
      for node in nodes:
8367
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8368
        self.cfg.SetDiskID(dev, node)
8369

    
8370
        result = self.rpc.call_blockdev_find(node, dev)
8371

    
8372
        msg = result.fail_msg
8373
        if msg or not result.payload:
8374
          if not msg:
8375
            msg = "disk not found"
8376
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8377
                                   (idx, node, msg))
8378

    
8379
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8380
    for idx, dev in enumerate(self.instance.disks):
8381
      if idx not in self.disks:
8382
        continue
8383

    
8384
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8385
                      (idx, node_name))
8386

    
8387
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8388
                                   ldisk=ldisk):
8389
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8390
                                 " replace disks for instance %s" %
8391
                                 (node_name, self.instance.name))
8392

    
8393
  def _CreateNewStorage(self, node_name):
8394
    vgname = self.cfg.GetVGName()
8395
    iv_names = {}
8396

    
8397
    for idx, dev in enumerate(self.instance.disks):
8398
      if idx not in self.disks:
8399
        continue
8400

    
8401
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8402

    
8403
      self.cfg.SetDiskID(dev, node_name)
8404

    
8405
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8406
      names = _GenerateUniqueNames(self.lu, lv_names)
8407

    
8408
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8409
                             logical_id=(vgname, names[0]))
8410
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8411
                             logical_id=(vgname, names[1]))
8412

    
8413
      new_lvs = [lv_data, lv_meta]
8414
      old_lvs = dev.children
8415
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8416

    
8417
      # we pass force_create=True to force the LVM creation
8418
      for new_lv in new_lvs:
8419
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8420
                        _GetInstanceInfoText(self.instance), False)
8421

    
8422
    return iv_names
8423

    
8424
  def _CheckDevices(self, node_name, iv_names):
8425
    for name, (dev, _, _) in iv_names.iteritems():
8426
      self.cfg.SetDiskID(dev, node_name)
8427

    
8428
      result = self.rpc.call_blockdev_find(node_name, dev)
8429

    
8430
      msg = result.fail_msg
8431
      if msg or not result.payload:
8432
        if not msg:
8433
          msg = "disk not found"
8434
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8435
                                 (name, msg))
8436

    
8437
      if result.payload.is_degraded:
8438
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8439

    
8440
  def _RemoveOldStorage(self, node_name, iv_names):
8441
    for name, (_, old_lvs, _) in iv_names.iteritems():
8442
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8443

    
8444
      for lv in old_lvs:
8445
        self.cfg.SetDiskID(lv, node_name)
8446

    
8447
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8448
        if msg:
8449
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8450
                             hint="remove unused LVs manually")
8451

    
8452
  def _ReleaseNodeLock(self, node_name):
8453
    """Releases the lock for a given node."""
8454
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8455

    
8456
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8457
    """Replace a disk on the primary or secondary for DRBD 8.
8458

8459
    The algorithm for replace is quite complicated:
8460

8461
      1. for each disk to be replaced:
8462

8463
        1. create new LVs on the target node with unique names
8464
        1. detach old LVs from the drbd device
8465
        1. rename old LVs to name_replaced.<time_t>
8466
        1. rename new LVs to old LVs
8467
        1. attach the new LVs (with the old names now) to the drbd device
8468

8469
      1. wait for sync across all devices
8470

8471
      1. for each modified disk:
8472

8473
        1. remove old LVs (which have the name name_replaces.<time_t>)
8474

8475
    Failures are not very well handled.
8476

8477
    """
8478
    steps_total = 6
8479

    
8480
    # Step: check device activation
8481
    self.lu.LogStep(1, steps_total, "Check device existence")
8482
    self._CheckDisksExistence([self.other_node, self.target_node])
8483
    self._CheckVolumeGroup([self.target_node, self.other_node])
8484

    
8485
    # Step: check other node consistency
8486
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8487
    self._CheckDisksConsistency(self.other_node,
8488
                                self.other_node == self.instance.primary_node,
8489
                                False)
8490

    
8491
    # Step: create new storage
8492
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8493
    iv_names = self._CreateNewStorage(self.target_node)
8494

    
8495
    # Step: for each lv, detach+rename*2+attach
8496
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8497
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8498
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8499

    
8500
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8501
                                                     old_lvs)
8502
      result.Raise("Can't detach drbd from local storage on node"
8503
                   " %s for device %s" % (self.target_node, dev.iv_name))
8504
      #dev.children = []
8505
      #cfg.Update(instance)
8506

    
8507
      # ok, we created the new LVs, so now we know we have the needed
8508
      # storage; as such, we proceed on the target node to rename
8509
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8510
      # using the assumption that logical_id == physical_id (which in
8511
      # turn is the unique_id on that node)
8512

    
8513
      # FIXME(iustin): use a better name for the replaced LVs
8514
      temp_suffix = int(time.time())
8515
      ren_fn = lambda d, suff: (d.physical_id[0],
8516
                                d.physical_id[1] + "_replaced-%s" % suff)
8517

    
8518
      # Build the rename list based on what LVs exist on the node
8519
      rename_old_to_new = []
8520
      for to_ren in old_lvs:
8521
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8522
        if not result.fail_msg and result.payload:
8523
          # device exists
8524
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8525

    
8526
      self.lu.LogInfo("Renaming the old LVs on the target node")
8527
      result = self.rpc.call_blockdev_rename(self.target_node,
8528
                                             rename_old_to_new)
8529
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8530

    
8531
      # Now we rename the new LVs to the old LVs
8532
      self.lu.LogInfo("Renaming the new LVs on the target node")
8533
      rename_new_to_old = [(new, old.physical_id)
8534
                           for old, new in zip(old_lvs, new_lvs)]
8535
      result = self.rpc.call_blockdev_rename(self.target_node,
8536
                                             rename_new_to_old)
8537
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8538

    
8539
      for old, new in zip(old_lvs, new_lvs):
8540
        new.logical_id = old.logical_id
8541
        self.cfg.SetDiskID(new, self.target_node)
8542

    
8543
      for disk in old_lvs:
8544
        disk.logical_id = ren_fn(disk, temp_suffix)
8545
        self.cfg.SetDiskID(disk, self.target_node)
8546

    
8547
      # Now that the new lvs have the old name, we can add them to the device
8548
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8549
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8550
                                                  new_lvs)
8551
      msg = result.fail_msg
8552
      if msg:
8553
        for new_lv in new_lvs:
8554
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8555
                                               new_lv).fail_msg
8556
          if msg2:
8557
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8558
                               hint=("cleanup manually the unused logical"
8559
                                     "volumes"))
8560
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8561

    
8562
      dev.children = new_lvs
8563

    
8564
      self.cfg.Update(self.instance, feedback_fn)
8565

    
8566
    cstep = 5
8567
    if self.early_release:
8568
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8569
      cstep += 1
8570
      self._RemoveOldStorage(self.target_node, iv_names)
8571
      # WARNING: we release both node locks here, do not do other RPCs
8572
      # than WaitForSync to the primary node
8573
      self._ReleaseNodeLock([self.target_node, self.other_node])
8574

    
8575
    # Wait for sync
8576
    # This can fail as the old devices are degraded and _WaitForSync
8577
    # does a combined result over all disks, so we don't check its return value
8578
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8579
    cstep += 1
8580
    _WaitForSync(self.lu, self.instance)
8581

    
8582
    # Check all devices manually
8583
    self._CheckDevices(self.instance.primary_node, iv_names)
8584

    
8585
    # Step: remove old storage
8586
    if not self.early_release:
8587
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8588
      cstep += 1
8589
      self._RemoveOldStorage(self.target_node, iv_names)
8590

    
8591
  def _ExecDrbd8Secondary(self, feedback_fn):
8592
    """Replace the secondary node for DRBD 8.
8593

8594
    The algorithm for replace is quite complicated:
8595
      - for all disks of the instance:
8596
        - create new LVs on the new node with same names
8597
        - shutdown the drbd device on the old secondary
8598
        - disconnect the drbd network on the primary
8599
        - create the drbd device on the new secondary
8600
        - network attach the drbd on the primary, using an artifice:
8601
          the drbd code for Attach() will connect to the network if it
8602
          finds a device which is connected to the good local disks but
8603
          not network enabled
8604
      - wait for sync across all devices
8605
      - remove all disks from the old secondary
8606

8607
    Failures are not very well handled.
8608

8609
    """
8610
    steps_total = 6
8611

    
8612
    # Step: check device activation
8613
    self.lu.LogStep(1, steps_total, "Check device existence")
8614
    self._CheckDisksExistence([self.instance.primary_node])
8615
    self._CheckVolumeGroup([self.instance.primary_node])
8616

    
8617
    # Step: check other node consistency
8618
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8619
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8620

    
8621
    # Step: create new storage
8622
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8623
    for idx, dev in enumerate(self.instance.disks):
8624
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8625
                      (self.new_node, idx))
8626
      # we pass force_create=True to force LVM creation
8627
      for new_lv in dev.children:
8628
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8629
                        _GetInstanceInfoText(self.instance), False)
8630

    
8631
    # Step 4: dbrd minors and drbd setups changes
8632
    # after this, we must manually remove the drbd minors on both the
8633
    # error and the success paths
8634
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8635
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8636
                                         for dev in self.instance.disks],
8637
                                        self.instance.name)
8638
    logging.debug("Allocated minors %r", minors)
8639

    
8640
    iv_names = {}
8641
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8642
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8643
                      (self.new_node, idx))
8644
      # create new devices on new_node; note that we create two IDs:
8645
      # one without port, so the drbd will be activated without
8646
      # networking information on the new node at this stage, and one
8647
      # with network, for the latter activation in step 4
8648
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8649
      if self.instance.primary_node == o_node1:
8650
        p_minor = o_minor1
8651
      else:
8652
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8653
        p_minor = o_minor2
8654

    
8655
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8656
                      p_minor, new_minor, o_secret)
8657
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8658
                    p_minor, new_minor, o_secret)
8659

    
8660
      iv_names[idx] = (dev, dev.children, new_net_id)
8661
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8662
                    new_net_id)
8663
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8664
                              logical_id=new_alone_id,
8665
                              children=dev.children,
8666
                              size=dev.size)
8667
      try:
8668
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8669
                              _GetInstanceInfoText(self.instance), False)
8670
      except errors.GenericError:
8671
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8672
        raise
8673

    
8674
    # We have new devices, shutdown the drbd on the old secondary
8675
    for idx, dev in enumerate(self.instance.disks):
8676
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8677
      self.cfg.SetDiskID(dev, self.target_node)
8678
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8679
      if msg:
8680
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8681
                           "node: %s" % (idx, msg),
8682
                           hint=("Please cleanup this device manually as"
8683
                                 " soon as possible"))
8684

    
8685
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8686
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8687
                                               self.node_secondary_ip,
8688
                                               self.instance.disks)\
8689
                                              [self.instance.primary_node]
8690

    
8691
    msg = result.fail_msg
8692
    if msg:
8693
      # detaches didn't succeed (unlikely)
8694
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8695
      raise errors.OpExecError("Can't detach the disks from the network on"
8696
                               " old node: %s" % (msg,))
8697

    
8698
    # if we managed to detach at least one, we update all the disks of
8699
    # the instance to point to the new secondary
8700
    self.lu.LogInfo("Updating instance configuration")
8701
    for dev, _, new_logical_id in iv_names.itervalues():
8702
      dev.logical_id = new_logical_id
8703
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8704

    
8705
    self.cfg.Update(self.instance, feedback_fn)
8706

    
8707
    # and now perform the drbd attach
8708
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8709
                    " (standalone => connected)")
8710
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8711
                                            self.new_node],
8712
                                           self.node_secondary_ip,
8713
                                           self.instance.disks,
8714
                                           self.instance.name,
8715
                                           False)
8716
    for to_node, to_result in result.items():
8717
      msg = to_result.fail_msg
8718
      if msg:
8719
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8720
                           to_node, msg,
8721
                           hint=("please do a gnt-instance info to see the"
8722
                                 " status of disks"))
8723
    cstep = 5
8724
    if self.early_release:
8725
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8726
      cstep += 1
8727
      self._RemoveOldStorage(self.target_node, iv_names)
8728
      # WARNING: we release all node locks here, do not do other RPCs
8729
      # than WaitForSync to the primary node
8730
      self._ReleaseNodeLock([self.instance.primary_node,
8731
                             self.target_node,
8732
                             self.new_node])
8733

    
8734
    # Wait for sync
8735
    # This can fail as the old devices are degraded and _WaitForSync
8736
    # does a combined result over all disks, so we don't check its return value
8737
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8738
    cstep += 1
8739
    _WaitForSync(self.lu, self.instance)
8740

    
8741
    # Check all devices manually
8742
    self._CheckDevices(self.instance.primary_node, iv_names)
8743

    
8744
    # Step: remove old storage
8745
    if not self.early_release:
8746
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8747
      self._RemoveOldStorage(self.target_node, iv_names)
8748

    
8749

    
8750
class LURepairNodeStorage(NoHooksLU):
8751
  """Repairs the volume group on a node.
8752

8753
  """
8754
  _OP_PARAMS = [
8755
    _PNodeName,
8756
    ("storage_type", ht.NoDefault, _CheckStorageType),
8757
    ("name", ht.NoDefault, ht.TNonEmptyString),
8758
    ("ignore_consistency", False, ht.TBool),
8759
    ]
8760
  REQ_BGL = False
8761

    
8762
  def CheckArguments(self):
8763
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8764

    
8765
    storage_type = self.op.storage_type
8766

    
8767
    if (constants.SO_FIX_CONSISTENCY not in
8768
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8769
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8770
                                 " repaired" % storage_type,
8771
                                 errors.ECODE_INVAL)
8772

    
8773
  def ExpandNames(self):
8774
    self.needed_locks = {
8775
      locking.LEVEL_NODE: [self.op.node_name],
8776
      }
8777

    
8778
  def _CheckFaultyDisks(self, instance, node_name):
8779
    """Ensure faulty disks abort the opcode or at least warn."""
8780
    try:
8781
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8782
                                  node_name, True):
8783
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8784
                                   " node '%s'" % (instance.name, node_name),
8785
                                   errors.ECODE_STATE)
8786
    except errors.OpPrereqError, err:
8787
      if self.op.ignore_consistency:
8788
        self.proc.LogWarning(str(err.args[0]))
8789
      else:
8790
        raise
8791

    
8792
  def CheckPrereq(self):
8793
    """Check prerequisites.
8794

8795
    """
8796
    # Check whether any instance on this node has faulty disks
8797
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8798
      if not inst.admin_up:
8799
        continue
8800
      check_nodes = set(inst.all_nodes)
8801
      check_nodes.discard(self.op.node_name)
8802
      for inst_node_name in check_nodes:
8803
        self._CheckFaultyDisks(inst, inst_node_name)
8804

    
8805
  def Exec(self, feedback_fn):
8806
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8807
                (self.op.name, self.op.node_name))
8808

    
8809
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8810
    result = self.rpc.call_storage_execute(self.op.node_name,
8811
                                           self.op.storage_type, st_args,
8812
                                           self.op.name,
8813
                                           constants.SO_FIX_CONSISTENCY)
8814
    result.Raise("Failed to repair storage unit '%s' on %s" %
8815
                 (self.op.name, self.op.node_name))
8816

    
8817

    
8818
class LUNodeEvacuationStrategy(NoHooksLU):
8819
  """Computes the node evacuation strategy.
8820

8821
  """
8822
  _OP_PARAMS = [
8823
    ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8824
    ("remote_node", None, ht.TMaybeString),
8825
    ("iallocator", None, ht.TMaybeString),
8826
    ]
8827
  REQ_BGL = False
8828

    
8829
  def CheckArguments(self):
8830
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8831

    
8832
  def ExpandNames(self):
8833
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8834
    self.needed_locks = locks = {}
8835
    if self.op.remote_node is None:
8836
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8837
    else:
8838
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8839
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8840

    
8841
  def Exec(self, feedback_fn):
8842
    if self.op.remote_node is not None:
8843
      instances = []
8844
      for node in self.op.nodes:
8845
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8846
      result = []
8847
      for i in instances:
8848
        if i.primary_node == self.op.remote_node:
8849
          raise errors.OpPrereqError("Node %s is the primary node of"
8850
                                     " instance %s, cannot use it as"
8851
                                     " secondary" %
8852
                                     (self.op.remote_node, i.name),
8853
                                     errors.ECODE_INVAL)
8854
        result.append([i.name, self.op.remote_node])
8855
    else:
8856
      ial = IAllocator(self.cfg, self.rpc,
8857
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8858
                       evac_nodes=self.op.nodes)
8859
      ial.Run(self.op.iallocator, validate=True)
8860
      if not ial.success:
8861
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8862
                                 errors.ECODE_NORES)
8863
      result = ial.result
8864
    return result
8865

    
8866

    
8867
class LUGrowDisk(LogicalUnit):
8868
  """Grow a disk of an instance.
8869

8870
  """
8871
  HPATH = "disk-grow"
8872
  HTYPE = constants.HTYPE_INSTANCE
8873
  _OP_PARAMS = [
8874
    _PInstanceName,
8875
    ("disk", ht.NoDefault, ht.TInt),
8876
    ("amount", ht.NoDefault, ht.TInt),
8877
    ("wait_for_sync", True, ht.TBool),
8878
    ]
8879
  REQ_BGL = False
8880

    
8881
  def ExpandNames(self):
8882
    self._ExpandAndLockInstance()
8883
    self.needed_locks[locking.LEVEL_NODE] = []
8884
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8885

    
8886
  def DeclareLocks(self, level):
8887
    if level == locking.LEVEL_NODE:
8888
      self._LockInstancesNodes()
8889

    
8890
  def BuildHooksEnv(self):
8891
    """Build hooks env.
8892

8893
    This runs on the master, the primary and all the secondaries.
8894

8895
    """
8896
    env = {
8897
      "DISK": self.op.disk,
8898
      "AMOUNT": self.op.amount,
8899
      }
8900
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8901
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8902
    return env, nl, nl
8903

    
8904
  def CheckPrereq(self):
8905
    """Check prerequisites.
8906

8907
    This checks that the instance is in the cluster.
8908

8909
    """
8910
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8911
    assert instance is not None, \
8912
      "Cannot retrieve locked instance %s" % self.op.instance_name
8913
    nodenames = list(instance.all_nodes)
8914
    for node in nodenames:
8915
      _CheckNodeOnline(self, node)
8916

    
8917
    self.instance = instance
8918

    
8919
    if instance.disk_template not in constants.DTS_GROWABLE:
8920
      raise errors.OpPrereqError("Instance's disk layout does not support"
8921
                                 " growing.", errors.ECODE_INVAL)
8922

    
8923
    self.disk = instance.FindDisk(self.op.disk)
8924

    
8925
    if instance.disk_template != constants.DT_FILE:
8926
      # TODO: check the free disk space for file, when that feature
8927
      # will be supported
8928
      _CheckNodesFreeDiskPerVG(self, nodenames,
8929
                               {self.disk.physical_id[0]: self.op.amount})
8930

    
8931
  def Exec(self, feedback_fn):
8932
    """Execute disk grow.
8933

8934
    """
8935
    instance = self.instance
8936
    disk = self.disk
8937

    
8938
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8939
    if not disks_ok:
8940
      raise errors.OpExecError("Cannot activate block device to grow")
8941

    
8942
    for node in instance.all_nodes:
8943
      self.cfg.SetDiskID(disk, node)
8944
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8945
      result.Raise("Grow request failed to node %s" % node)
8946

    
8947
      # TODO: Rewrite code to work properly
8948
      # DRBD goes into sync mode for a short amount of time after executing the
8949
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8950
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8951
      # time is a work-around.
8952
      time.sleep(5)
8953

    
8954
    disk.RecordGrow(self.op.amount)
8955
    self.cfg.Update(instance, feedback_fn)
8956
    if self.op.wait_for_sync:
8957
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8958
      if disk_abort:
8959
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8960
                             " status.\nPlease check the instance.")
8961
      if not instance.admin_up:
8962
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8963
    elif not instance.admin_up:
8964
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8965
                           " not supposed to be running because no wait for"
8966
                           " sync mode was requested.")
8967

    
8968

    
8969
class LUQueryInstanceData(NoHooksLU):
8970
  """Query runtime instance data.
8971

8972
  """
8973
  _OP_PARAMS = [
8974
    ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
8975
    ("static", False, ht.TBool),
8976
    ]
8977
  REQ_BGL = False
8978

    
8979
  def ExpandNames(self):
8980
    self.needed_locks = {}
8981
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8982

    
8983
    if self.op.instances:
8984
      self.wanted_names = []
8985
      for name in self.op.instances:
8986
        full_name = _ExpandInstanceName(self.cfg, name)
8987
        self.wanted_names.append(full_name)
8988
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8989
    else:
8990
      self.wanted_names = None
8991
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8992

    
8993
    self.needed_locks[locking.LEVEL_NODE] = []
8994
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8995

    
8996
  def DeclareLocks(self, level):
8997
    if level == locking.LEVEL_NODE:
8998
      self._LockInstancesNodes()
8999

    
9000
  def CheckPrereq(self):
9001
    """Check prerequisites.
9002

9003
    This only checks the optional instance list against the existing names.
9004

9005
    """
9006
    if self.wanted_names is None:
9007
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9008

    
9009
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
9010
                             in self.wanted_names]
9011

    
9012
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
9013
    """Returns the status of a block device
9014

9015
    """
9016
    if self.op.static or not node:
9017
      return None
9018

    
9019
    self.cfg.SetDiskID(dev, node)
9020

    
9021
    result = self.rpc.call_blockdev_find(node, dev)
9022
    if result.offline:
9023
      return None
9024

    
9025
    result.Raise("Can't compute disk status for %s" % instance_name)
9026

    
9027
    status = result.payload
9028
    if status is None:
9029
      return None
9030

    
9031
    return (status.dev_path, status.major, status.minor,
9032
            status.sync_percent, status.estimated_time,
9033
            status.is_degraded, status.ldisk_status)
9034

    
9035
  def _ComputeDiskStatus(self, instance, snode, dev):
9036
    """Compute block device status.
9037

9038
    """
9039
    if dev.dev_type in constants.LDS_DRBD:
9040
      # we change the snode then (otherwise we use the one passed in)
9041
      if dev.logical_id[0] == instance.primary_node:
9042
        snode = dev.logical_id[1]
9043
      else:
9044
        snode = dev.logical_id[0]
9045

    
9046
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9047
                                              instance.name, dev)
9048
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9049

    
9050
    if dev.children:
9051
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
9052
                      for child in dev.children]
9053
    else:
9054
      dev_children = []
9055

    
9056
    data = {
9057
      "iv_name": dev.iv_name,
9058
      "dev_type": dev.dev_type,
9059
      "logical_id": dev.logical_id,
9060
      "physical_id": dev.physical_id,
9061
      "pstatus": dev_pstatus,
9062
      "sstatus": dev_sstatus,
9063
      "children": dev_children,
9064
      "mode": dev.mode,
9065
      "size": dev.size,
9066
      }
9067

    
9068
    return data
9069

    
9070
  def Exec(self, feedback_fn):
9071
    """Gather and return data"""
9072
    result = {}
9073

    
9074
    cluster = self.cfg.GetClusterInfo()
9075

    
9076
    for instance in self.wanted_instances:
9077
      if not self.op.static:
9078
        remote_info = self.rpc.call_instance_info(instance.primary_node,
9079
                                                  instance.name,
9080
                                                  instance.hypervisor)
9081
        remote_info.Raise("Error checking node %s" % instance.primary_node)
9082
        remote_info = remote_info.payload
9083
        if remote_info and "state" in remote_info:
9084
          remote_state = "up"
9085
        else:
9086
          remote_state = "down"
9087
      else:
9088
        remote_state = None
9089
      if instance.admin_up:
9090
        config_state = "up"
9091
      else:
9092
        config_state = "down"
9093

    
9094
      disks = [self._ComputeDiskStatus(instance, None, device)
9095
               for device in instance.disks]
9096

    
9097
      idict = {
9098
        "name": instance.name,
9099
        "config_state": config_state,
9100
        "run_state": remote_state,
9101
        "pnode": instance.primary_node,
9102
        "snodes": instance.secondary_nodes,
9103
        "os": instance.os,
9104
        # this happens to be the same format used for hooks
9105
        "nics": _NICListToTuple(self, instance.nics),
9106
        "disk_template": instance.disk_template,
9107
        "disks": disks,
9108
        "hypervisor": instance.hypervisor,
9109
        "network_port": instance.network_port,
9110
        "hv_instance": instance.hvparams,
9111
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
9112
        "be_instance": instance.beparams,
9113
        "be_actual": cluster.FillBE(instance),
9114
        "os_instance": instance.osparams,
9115
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9116
        "serial_no": instance.serial_no,
9117
        "mtime": instance.mtime,
9118
        "ctime": instance.ctime,
9119
        "uuid": instance.uuid,
9120
        }
9121

    
9122
      result[instance.name] = idict
9123

    
9124
    return result
9125

    
9126

    
9127
class LUSetInstanceParams(LogicalUnit):
9128
  """Modifies an instances's parameters.
9129

9130
  """
9131
  HPATH = "instance-modify"
9132
  HTYPE = constants.HTYPE_INSTANCE
9133
  _OP_PARAMS = [
9134
    _PInstanceName,
9135
    ("nics", ht.EmptyList, ht.TList),
9136
    ("disks", ht.EmptyList, ht.TList),
9137
    ("beparams", ht.EmptyDict, ht.TDict),
9138
    ("hvparams", ht.EmptyDict, ht.TDict),
9139
    ("disk_template", None, ht.TMaybeString),
9140
    ("remote_node", None, ht.TMaybeString),
9141
    ("os_name", None, ht.TMaybeString),
9142
    ("force_variant", False, ht.TBool),
9143
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
9144
    _PForce,
9145
    ]
9146
  REQ_BGL = False
9147

    
9148
  def CheckArguments(self):
9149
    if not (self.op.nics or self.op.disks or self.op.disk_template or
9150
            self.op.hvparams or self.op.beparams or self.op.os_name):
9151
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9152

    
9153
    if self.op.hvparams:
9154
      _CheckGlobalHvParams(self.op.hvparams)
9155

    
9156
    # Disk validation
9157
    disk_addremove = 0
9158
    for disk_op, disk_dict in self.op.disks:
9159
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9160
      if disk_op == constants.DDM_REMOVE:
9161
        disk_addremove += 1
9162
        continue
9163
      elif disk_op == constants.DDM_ADD:
9164
        disk_addremove += 1
9165
      else:
9166
        if not isinstance(disk_op, int):
9167
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9168
        if not isinstance(disk_dict, dict):
9169
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9170
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9171

    
9172
      if disk_op == constants.DDM_ADD:
9173
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9174
        if mode not in constants.DISK_ACCESS_SET:
9175
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9176
                                     errors.ECODE_INVAL)
9177
        size = disk_dict.get('size', None)
9178
        if size is None:
9179
          raise errors.OpPrereqError("Required disk parameter size missing",
9180
                                     errors.ECODE_INVAL)
9181
        try:
9182
          size = int(size)
9183
        except (TypeError, ValueError), err:
9184
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9185
                                     str(err), errors.ECODE_INVAL)
9186
        disk_dict['size'] = size
9187
      else:
9188
        # modification of disk
9189
        if 'size' in disk_dict:
9190
          raise errors.OpPrereqError("Disk size change not possible, use"
9191
                                     " grow-disk", errors.ECODE_INVAL)
9192

    
9193
    if disk_addremove > 1:
9194
      raise errors.OpPrereqError("Only one disk add or remove operation"
9195
                                 " supported at a time", errors.ECODE_INVAL)
9196

    
9197
    if self.op.disks and self.op.disk_template is not None:
9198
      raise errors.OpPrereqError("Disk template conversion and other disk"
9199
                                 " changes not supported at the same time",
9200
                                 errors.ECODE_INVAL)
9201

    
9202
    if self.op.disk_template:
9203
      _CheckDiskTemplate(self.op.disk_template)
9204
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
9205
          self.op.remote_node is None):
9206
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
9207
                                   " one requires specifying a secondary node",
9208
                                   errors.ECODE_INVAL)
9209

    
9210
    # NIC validation
9211
    nic_addremove = 0
9212
    for nic_op, nic_dict in self.op.nics:
9213
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9214
      if nic_op == constants.DDM_REMOVE:
9215
        nic_addremove += 1
9216
        continue
9217
      elif nic_op == constants.DDM_ADD:
9218
        nic_addremove += 1
9219
      else:
9220
        if not isinstance(nic_op, int):
9221
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9222
        if not isinstance(nic_dict, dict):
9223
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9224
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9225

    
9226
      # nic_dict should be a dict
9227
      nic_ip = nic_dict.get('ip', None)
9228
      if nic_ip is not None:
9229
        if nic_ip.lower() == constants.VALUE_NONE:
9230
          nic_dict['ip'] = None
9231
        else:
9232
          if not netutils.IPAddress.IsValid(nic_ip):
9233
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9234
                                       errors.ECODE_INVAL)
9235

    
9236
      nic_bridge = nic_dict.get('bridge', None)
9237
      nic_link = nic_dict.get('link', None)
9238
      if nic_bridge and nic_link:
9239
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9240
                                   " at the same time", errors.ECODE_INVAL)
9241
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9242
        nic_dict['bridge'] = None
9243
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9244
        nic_dict['link'] = None
9245

    
9246
      if nic_op == constants.DDM_ADD:
9247
        nic_mac = nic_dict.get('mac', None)
9248
        if nic_mac is None:
9249
          nic_dict['mac'] = constants.VALUE_AUTO
9250

    
9251
      if 'mac' in nic_dict:
9252
        nic_mac = nic_dict['mac']
9253
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9254
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9255

    
9256
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9257
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9258
                                     " modifying an existing nic",
9259
                                     errors.ECODE_INVAL)
9260

    
9261
    if nic_addremove > 1:
9262
      raise errors.OpPrereqError("Only one NIC add or remove operation"
9263
                                 " supported at a time", errors.ECODE_INVAL)
9264

    
9265
  def ExpandNames(self):
9266
    self._ExpandAndLockInstance()
9267
    self.needed_locks[locking.LEVEL_NODE] = []
9268
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9269

    
9270
  def DeclareLocks(self, level):
9271
    if level == locking.LEVEL_NODE:
9272
      self._LockInstancesNodes()
9273
      if self.op.disk_template and self.op.remote_node:
9274
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9275
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9276

    
9277
  def BuildHooksEnv(self):
9278
    """Build hooks env.
9279

9280
    This runs on the master, primary and secondaries.
9281

9282
    """
9283
    args = dict()
9284
    if constants.BE_MEMORY in self.be_new:
9285
      args['memory'] = self.be_new[constants.BE_MEMORY]
9286
    if constants.BE_VCPUS in self.be_new:
9287
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
9288
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9289
    # information at all.
9290
    if self.op.nics:
9291
      args['nics'] = []
9292
      nic_override = dict(self.op.nics)
9293
      for idx, nic in enumerate(self.instance.nics):
9294
        if idx in nic_override:
9295
          this_nic_override = nic_override[idx]
9296
        else:
9297
          this_nic_override = {}
9298
        if 'ip' in this_nic_override:
9299
          ip = this_nic_override['ip']
9300
        else:
9301
          ip = nic.ip
9302
        if 'mac' in this_nic_override:
9303
          mac = this_nic_override['mac']
9304
        else:
9305
          mac = nic.mac
9306
        if idx in self.nic_pnew:
9307
          nicparams = self.nic_pnew[idx]
9308
        else:
9309
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9310
        mode = nicparams[constants.NIC_MODE]
9311
        link = nicparams[constants.NIC_LINK]
9312
        args['nics'].append((ip, mac, mode, link))
9313
      if constants.DDM_ADD in nic_override:
9314
        ip = nic_override[constants.DDM_ADD].get('ip', None)
9315
        mac = nic_override[constants.DDM_ADD]['mac']
9316
        nicparams = self.nic_pnew[constants.DDM_ADD]
9317
        mode = nicparams[constants.NIC_MODE]
9318
        link = nicparams[constants.NIC_LINK]
9319
        args['nics'].append((ip, mac, mode, link))
9320
      elif constants.DDM_REMOVE in nic_override:
9321
        del args['nics'][-1]
9322

    
9323
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9324
    if self.op.disk_template:
9325
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9326
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9327
    return env, nl, nl
9328

    
9329
  def CheckPrereq(self):
9330
    """Check prerequisites.
9331

9332
    This only checks the instance list against the existing names.
9333

9334
    """
9335
    # checking the new params on the primary/secondary nodes
9336

    
9337
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9338
    cluster = self.cluster = self.cfg.GetClusterInfo()
9339
    assert self.instance is not None, \
9340
      "Cannot retrieve locked instance %s" % self.op.instance_name
9341
    pnode = instance.primary_node
9342
    nodelist = list(instance.all_nodes)
9343

    
9344
    # OS change
9345
    if self.op.os_name and not self.op.force:
9346
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9347
                      self.op.force_variant)
9348
      instance_os = self.op.os_name
9349
    else:
9350
      instance_os = instance.os
9351

    
9352
    if self.op.disk_template:
9353
      if instance.disk_template == self.op.disk_template:
9354
        raise errors.OpPrereqError("Instance already has disk template %s" %
9355
                                   instance.disk_template, errors.ECODE_INVAL)
9356

    
9357
      if (instance.disk_template,
9358
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9359
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9360
                                   " %s to %s" % (instance.disk_template,
9361
                                                  self.op.disk_template),
9362
                                   errors.ECODE_INVAL)
9363
      _CheckInstanceDown(self, instance, "cannot change disk template")
9364
      if self.op.disk_template in constants.DTS_NET_MIRROR:
9365
        if self.op.remote_node == pnode:
9366
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9367
                                     " as the primary node of the instance" %
9368
                                     self.op.remote_node, errors.ECODE_STATE)
9369
        _CheckNodeOnline(self, self.op.remote_node)
9370
        _CheckNodeNotDrained(self, self.op.remote_node)
9371
        # FIXME: here we assume that the old instance type is DT_PLAIN
9372
        assert instance.disk_template == constants.DT_PLAIN
9373
        disks = [{"size": d.size, "vg": d.logical_id[0]}
9374
                 for d in instance.disks]
9375
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9376
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9377

    
9378
    # hvparams processing
9379
    if self.op.hvparams:
9380
      hv_type = instance.hypervisor
9381
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9382
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9383
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9384

    
9385
      # local check
9386
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9387
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9388
      self.hv_new = hv_new # the new actual values
9389
      self.hv_inst = i_hvdict # the new dict (without defaults)
9390
    else:
9391
      self.hv_new = self.hv_inst = {}
9392

    
9393
    # beparams processing
9394
    if self.op.beparams:
9395
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9396
                                   use_none=True)
9397
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9398
      be_new = cluster.SimpleFillBE(i_bedict)
9399
      self.be_new = be_new # the new actual values
9400
      self.be_inst = i_bedict # the new dict (without defaults)
9401
    else:
9402
      self.be_new = self.be_inst = {}
9403

    
9404
    # osparams processing
9405
    if self.op.osparams:
9406
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9407
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9408
      self.os_inst = i_osdict # the new dict (without defaults)
9409
    else:
9410
      self.os_inst = {}
9411

    
9412
    self.warn = []
9413

    
9414
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9415
      mem_check_list = [pnode]
9416
      if be_new[constants.BE_AUTO_BALANCE]:
9417
        # either we changed auto_balance to yes or it was from before
9418
        mem_check_list.extend(instance.secondary_nodes)
9419
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9420
                                                  instance.hypervisor)
9421
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9422
                                         instance.hypervisor)
9423
      pninfo = nodeinfo[pnode]
9424
      msg = pninfo.fail_msg
9425
      if msg:
9426
        # Assume the primary node is unreachable and go ahead
9427
        self.warn.append("Can't get info from primary node %s: %s" %
9428
                         (pnode,  msg))
9429
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9430
        self.warn.append("Node data from primary node %s doesn't contain"
9431
                         " free memory information" % pnode)
9432
      elif instance_info.fail_msg:
9433
        self.warn.append("Can't get instance runtime information: %s" %
9434
                        instance_info.fail_msg)
9435
      else:
9436
        if instance_info.payload:
9437
          current_mem = int(instance_info.payload['memory'])
9438
        else:
9439
          # Assume instance not running
9440
          # (there is a slight race condition here, but it's not very probable,
9441
          # and we have no other way to check)
9442
          current_mem = 0
9443
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9444
                    pninfo.payload['memory_free'])
9445
        if miss_mem > 0:
9446
          raise errors.OpPrereqError("This change will prevent the instance"
9447
                                     " from starting, due to %d MB of memory"
9448
                                     " missing on its primary node" % miss_mem,
9449
                                     errors.ECODE_NORES)
9450

    
9451
      if be_new[constants.BE_AUTO_BALANCE]:
9452
        for node, nres in nodeinfo.items():
9453
          if node not in instance.secondary_nodes:
9454
            continue
9455
          msg = nres.fail_msg
9456
          if msg:
9457
            self.warn.append("Can't get info from secondary node %s: %s" %
9458
                             (node, msg))
9459
          elif not isinstance(nres.payload.get('memory_free', None), int):
9460
            self.warn.append("Secondary node %s didn't return free"
9461
                             " memory information" % node)
9462
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9463
            self.warn.append("Not enough memory to failover instance to"
9464
                             " secondary node %s" % node)
9465

    
9466
    # NIC processing
9467
    self.nic_pnew = {}
9468
    self.nic_pinst = {}
9469
    for nic_op, nic_dict in self.op.nics:
9470
      if nic_op == constants.DDM_REMOVE:
9471
        if not instance.nics:
9472
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9473
                                     errors.ECODE_INVAL)
9474
        continue
9475
      if nic_op != constants.DDM_ADD:
9476
        # an existing nic
9477
        if not instance.nics:
9478
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9479
                                     " no NICs" % nic_op,
9480
                                     errors.ECODE_INVAL)
9481
        if nic_op < 0 or nic_op >= len(instance.nics):
9482
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9483
                                     " are 0 to %d" %
9484
                                     (nic_op, len(instance.nics) - 1),
9485
                                     errors.ECODE_INVAL)
9486
        old_nic_params = instance.nics[nic_op].nicparams
9487
        old_nic_ip = instance.nics[nic_op].ip
9488
      else:
9489
        old_nic_params = {}
9490
        old_nic_ip = None
9491

    
9492
      update_params_dict = dict([(key, nic_dict[key])
9493
                                 for key in constants.NICS_PARAMETERS
9494
                                 if key in nic_dict])
9495

    
9496
      if 'bridge' in nic_dict:
9497
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9498

    
9499
      new_nic_params = _GetUpdatedParams(old_nic_params,
9500
                                         update_params_dict)
9501
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9502
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9503
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9504
      self.nic_pinst[nic_op] = new_nic_params
9505
      self.nic_pnew[nic_op] = new_filled_nic_params
9506
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9507

    
9508
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9509
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9510
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9511
        if msg:
9512
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9513
          if self.op.force:
9514
            self.warn.append(msg)
9515
          else:
9516
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9517
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9518
        if 'ip' in nic_dict:
9519
          nic_ip = nic_dict['ip']
9520
        else:
9521
          nic_ip = old_nic_ip
9522
        if nic_ip is None:
9523
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9524
                                     ' on a routed nic', errors.ECODE_INVAL)
9525
      if 'mac' in nic_dict:
9526
        nic_mac = nic_dict['mac']
9527
        if nic_mac is None:
9528
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9529
                                     errors.ECODE_INVAL)
9530
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9531
          # otherwise generate the mac
9532
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9533
        else:
9534
          # or validate/reserve the current one
9535
          try:
9536
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9537
          except errors.ReservationError:
9538
            raise errors.OpPrereqError("MAC address %s already in use"
9539
                                       " in cluster" % nic_mac,
9540
                                       errors.ECODE_NOTUNIQUE)
9541

    
9542
    # DISK processing
9543
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9544
      raise errors.OpPrereqError("Disk operations not supported for"
9545
                                 " diskless instances",
9546
                                 errors.ECODE_INVAL)
9547
    for disk_op, _ in self.op.disks:
9548
      if disk_op == constants.DDM_REMOVE:
9549
        if len(instance.disks) == 1:
9550
          raise errors.OpPrereqError("Cannot remove the last disk of"
9551
                                     " an instance", errors.ECODE_INVAL)
9552
        _CheckInstanceDown(self, instance, "cannot remove disks")
9553

    
9554
      if (disk_op == constants.DDM_ADD and
9555
          len(instance.nics) >= constants.MAX_DISKS):
9556
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9557
                                   " add more" % constants.MAX_DISKS,
9558
                                   errors.ECODE_STATE)
9559
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9560
        # an existing disk
9561
        if disk_op < 0 or disk_op >= len(instance.disks):
9562
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9563
                                     " are 0 to %d" %
9564
                                     (disk_op, len(instance.disks)),
9565
                                     errors.ECODE_INVAL)
9566

    
9567
    return
9568

    
9569
  def _ConvertPlainToDrbd(self, feedback_fn):
9570
    """Converts an instance from plain to drbd.
9571

9572
    """
9573
    feedback_fn("Converting template to drbd")
9574
    instance = self.instance
9575
    pnode = instance.primary_node
9576
    snode = self.op.remote_node
9577

    
9578
    # create a fake disk info for _GenerateDiskTemplate
9579
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9580
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9581
                                      instance.name, pnode, [snode],
9582
                                      disk_info, None, None, 0, feedback_fn)
9583
    info = _GetInstanceInfoText(instance)
9584
    feedback_fn("Creating aditional volumes...")
9585
    # first, create the missing data and meta devices
9586
    for disk in new_disks:
9587
      # unfortunately this is... not too nice
9588
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9589
                            info, True)
9590
      for child in disk.children:
9591
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9592
    # at this stage, all new LVs have been created, we can rename the
9593
    # old ones
9594
    feedback_fn("Renaming original volumes...")
9595
    rename_list = [(o, n.children[0].logical_id)
9596
                   for (o, n) in zip(instance.disks, new_disks)]
9597
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9598
    result.Raise("Failed to rename original LVs")
9599

    
9600
    feedback_fn("Initializing DRBD devices...")
9601
    # all child devices are in place, we can now create the DRBD devices
9602
    for disk in new_disks:
9603
      for node in [pnode, snode]:
9604
        f_create = node == pnode
9605
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9606

    
9607
    # at this point, the instance has been modified
9608
    instance.disk_template = constants.DT_DRBD8
9609
    instance.disks = new_disks
9610
    self.cfg.Update(instance, feedback_fn)
9611

    
9612
    # disks are created, waiting for sync
9613
    disk_abort = not _WaitForSync(self, instance)
9614
    if disk_abort:
9615
      raise errors.OpExecError("There are some degraded disks for"
9616
                               " this instance, please cleanup manually")
9617

    
9618
  def _ConvertDrbdToPlain(self, feedback_fn):
9619
    """Converts an instance from drbd to plain.
9620

9621
    """
9622
    instance = self.instance
9623
    assert len(instance.secondary_nodes) == 1
9624
    pnode = instance.primary_node
9625
    snode = instance.secondary_nodes[0]
9626
    feedback_fn("Converting template to plain")
9627

    
9628
    old_disks = instance.disks
9629
    new_disks = [d.children[0] for d in old_disks]
9630

    
9631
    # copy over size and mode
9632
    for parent, child in zip(old_disks, new_disks):
9633
      child.size = parent.size
9634
      child.mode = parent.mode
9635

    
9636
    # update instance structure
9637
    instance.disks = new_disks
9638
    instance.disk_template = constants.DT_PLAIN
9639
    self.cfg.Update(instance, feedback_fn)
9640

    
9641
    feedback_fn("Removing volumes on the secondary node...")
9642
    for disk in old_disks:
9643
      self.cfg.SetDiskID(disk, snode)
9644
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9645
      if msg:
9646
        self.LogWarning("Could not remove block device %s on node %s,"
9647
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9648

    
9649
    feedback_fn("Removing unneeded volumes on the primary node...")
9650
    for idx, disk in enumerate(old_disks):
9651
      meta = disk.children[1]
9652
      self.cfg.SetDiskID(meta, pnode)
9653
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9654
      if msg:
9655
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9656
                        " continuing anyway: %s", idx, pnode, msg)
9657

    
9658
  def Exec(self, feedback_fn):
9659
    """Modifies an instance.
9660

9661
    All parameters take effect only at the next restart of the instance.
9662

9663
    """
9664
    # Process here the warnings from CheckPrereq, as we don't have a
9665
    # feedback_fn there.
9666
    for warn in self.warn:
9667
      feedback_fn("WARNING: %s" % warn)
9668

    
9669
    result = []
9670
    instance = self.instance
9671
    # disk changes
9672
    for disk_op, disk_dict in self.op.disks:
9673
      if disk_op == constants.DDM_REMOVE:
9674
        # remove the last disk
9675
        device = instance.disks.pop()
9676
        device_idx = len(instance.disks)
9677
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9678
          self.cfg.SetDiskID(disk, node)
9679
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9680
          if msg:
9681
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9682
                            " continuing anyway", device_idx, node, msg)
9683
        result.append(("disk/%d" % device_idx, "remove"))
9684
      elif disk_op == constants.DDM_ADD:
9685
        # add a new disk
9686
        if instance.disk_template == constants.DT_FILE:
9687
          file_driver, file_path = instance.disks[0].logical_id
9688
          file_path = os.path.dirname(file_path)
9689
        else:
9690
          file_driver = file_path = None
9691
        disk_idx_base = len(instance.disks)
9692
        new_disk = _GenerateDiskTemplate(self,
9693
                                         instance.disk_template,
9694
                                         instance.name, instance.primary_node,
9695
                                         instance.secondary_nodes,
9696
                                         [disk_dict],
9697
                                         file_path,
9698
                                         file_driver,
9699
                                         disk_idx_base, feedback_fn)[0]
9700
        instance.disks.append(new_disk)
9701
        info = _GetInstanceInfoText(instance)
9702

    
9703
        logging.info("Creating volume %s for instance %s",
9704
                     new_disk.iv_name, instance.name)
9705
        # Note: this needs to be kept in sync with _CreateDisks
9706
        #HARDCODE
9707
        for node in instance.all_nodes:
9708
          f_create = node == instance.primary_node
9709
          try:
9710
            _CreateBlockDev(self, node, instance, new_disk,
9711
                            f_create, info, f_create)
9712
          except errors.OpExecError, err:
9713
            self.LogWarning("Failed to create volume %s (%s) on"
9714
                            " node %s: %s",
9715
                            new_disk.iv_name, new_disk, node, err)
9716
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9717
                       (new_disk.size, new_disk.mode)))
9718
      else:
9719
        # change a given disk
9720
        instance.disks[disk_op].mode = disk_dict['mode']
9721
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9722

    
9723
    if self.op.disk_template:
9724
      r_shut = _ShutdownInstanceDisks(self, instance)
9725
      if not r_shut:
9726
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9727
                                 " proceed with disk template conversion")
9728
      mode = (instance.disk_template, self.op.disk_template)
9729
      try:
9730
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9731
      except:
9732
        self.cfg.ReleaseDRBDMinors(instance.name)
9733
        raise
9734
      result.append(("disk_template", self.op.disk_template))
9735

    
9736
    # NIC changes
9737
    for nic_op, nic_dict in self.op.nics:
9738
      if nic_op == constants.DDM_REMOVE:
9739
        # remove the last nic
9740
        del instance.nics[-1]
9741
        result.append(("nic.%d" % len(instance.nics), "remove"))
9742
      elif nic_op == constants.DDM_ADD:
9743
        # mac and bridge should be set, by now
9744
        mac = nic_dict['mac']
9745
        ip = nic_dict.get('ip', None)
9746
        nicparams = self.nic_pinst[constants.DDM_ADD]
9747
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9748
        instance.nics.append(new_nic)
9749
        result.append(("nic.%d" % (len(instance.nics) - 1),
9750
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9751
                       (new_nic.mac, new_nic.ip,
9752
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9753
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9754
                       )))
9755
      else:
9756
        for key in 'mac', 'ip':
9757
          if key in nic_dict:
9758
            setattr(instance.nics[nic_op], key, nic_dict[key])
9759
        if nic_op in self.nic_pinst:
9760
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9761
        for key, val in nic_dict.iteritems():
9762
          result.append(("nic.%s/%d" % (key, nic_op), val))
9763

    
9764
    # hvparams changes
9765
    if self.op.hvparams:
9766
      instance.hvparams = self.hv_inst
9767
      for key, val in self.op.hvparams.iteritems():
9768
        result.append(("hv/%s" % key, val))
9769

    
9770
    # beparams changes
9771
    if self.op.beparams:
9772
      instance.beparams = self.be_inst
9773
      for key, val in self.op.beparams.iteritems():
9774
        result.append(("be/%s" % key, val))
9775

    
9776
    # OS change
9777
    if self.op.os_name:
9778
      instance.os = self.op.os_name
9779

    
9780
    # osparams changes
9781
    if self.op.osparams:
9782
      instance.osparams = self.os_inst
9783
      for key, val in self.op.osparams.iteritems():
9784
        result.append(("os/%s" % key, val))
9785

    
9786
    self.cfg.Update(instance, feedback_fn)
9787

    
9788
    return result
9789

    
9790
  _DISK_CONVERSIONS = {
9791
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9792
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9793
    }
9794

    
9795

    
9796
class LUQueryExports(NoHooksLU):
9797
  """Query the exports list
9798

9799
  """
9800
  _OP_PARAMS = [
9801
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9802
    ("use_locking", False, ht.TBool),
9803
    ]
9804
  REQ_BGL = False
9805

    
9806
  def ExpandNames(self):
9807
    self.needed_locks = {}
9808
    self.share_locks[locking.LEVEL_NODE] = 1
9809
    if not self.op.nodes:
9810
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9811
    else:
9812
      self.needed_locks[locking.LEVEL_NODE] = \
9813
        _GetWantedNodes(self, self.op.nodes)
9814

    
9815
  def Exec(self, feedback_fn):
9816
    """Compute the list of all the exported system images.
9817

9818
    @rtype: dict
9819
    @return: a dictionary with the structure node->(export-list)
9820
        where export-list is a list of the instances exported on
9821
        that node.
9822

9823
    """
9824
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9825
    rpcresult = self.rpc.call_export_list(self.nodes)
9826
    result = {}
9827
    for node in rpcresult:
9828
      if rpcresult[node].fail_msg:
9829
        result[node] = False
9830
      else:
9831
        result[node] = rpcresult[node].payload
9832

    
9833
    return result
9834

    
9835

    
9836
class LUPrepareExport(NoHooksLU):
9837
  """Prepares an instance for an export and returns useful information.
9838

9839
  """
9840
  _OP_PARAMS = [
9841
    _PInstanceName,
9842
    ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9843
    ]
9844
  REQ_BGL = False
9845

    
9846
  def ExpandNames(self):
9847
    self._ExpandAndLockInstance()
9848

    
9849
  def CheckPrereq(self):
9850
    """Check prerequisites.
9851

9852
    """
9853
    instance_name = self.op.instance_name
9854

    
9855
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9856
    assert self.instance is not None, \
9857
          "Cannot retrieve locked instance %s" % self.op.instance_name
9858
    _CheckNodeOnline(self, self.instance.primary_node)
9859

    
9860
    self._cds = _GetClusterDomainSecret()
9861

    
9862
  def Exec(self, feedback_fn):
9863
    """Prepares an instance for an export.
9864

9865
    """
9866
    instance = self.instance
9867

    
9868
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9869
      salt = utils.GenerateSecret(8)
9870

    
9871
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9872
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9873
                                              constants.RIE_CERT_VALIDITY)
9874
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9875

    
9876
      (name, cert_pem) = result.payload
9877

    
9878
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9879
                                             cert_pem)
9880

    
9881
      return {
9882
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9883
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9884
                          salt),
9885
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9886
        }
9887

    
9888
    return None
9889

    
9890

    
9891
class LUExportInstance(LogicalUnit):
9892
  """Export an instance to an image in the cluster.
9893

9894
  """
9895
  HPATH = "instance-export"
9896
  HTYPE = constants.HTYPE_INSTANCE
9897
  _OP_PARAMS = [
9898
    _PInstanceName,
9899
    ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9900
    ("shutdown", True, ht.TBool),
9901
    _PShutdownTimeout,
9902
    ("remove_instance", False, ht.TBool),
9903
    ("ignore_remove_failures", False, ht.TBool),
9904
    ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9905
    ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9906
    ("destination_x509_ca", None, ht.TMaybeString),
9907
    ]
9908
  REQ_BGL = False
9909

    
9910
  def CheckArguments(self):
9911
    """Check the arguments.
9912

9913
    """
9914
    self.x509_key_name = self.op.x509_key_name
9915
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9916

    
9917
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9918
      if not self.x509_key_name:
9919
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9920
                                   errors.ECODE_INVAL)
9921

    
9922
      if not self.dest_x509_ca_pem:
9923
        raise errors.OpPrereqError("Missing destination X509 CA",
9924
                                   errors.ECODE_INVAL)
9925

    
9926
  def ExpandNames(self):
9927
    self._ExpandAndLockInstance()
9928

    
9929
    # Lock all nodes for local exports
9930
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9931
      # FIXME: lock only instance primary and destination node
9932
      #
9933
      # Sad but true, for now we have do lock all nodes, as we don't know where
9934
      # the previous export might be, and in this LU we search for it and
9935
      # remove it from its current node. In the future we could fix this by:
9936
      #  - making a tasklet to search (share-lock all), then create the
9937
      #    new one, then one to remove, after
9938
      #  - removing the removal operation altogether
9939
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9940

    
9941
  def DeclareLocks(self, level):
9942
    """Last minute lock declaration."""
9943
    # All nodes are locked anyway, so nothing to do here.
9944

    
9945
  def BuildHooksEnv(self):
9946
    """Build hooks env.
9947

9948
    This will run on the master, primary node and target node.
9949

9950
    """
9951
    env = {
9952
      "EXPORT_MODE": self.op.mode,
9953
      "EXPORT_NODE": self.op.target_node,
9954
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9955
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9956
      # TODO: Generic function for boolean env variables
9957
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9958
      }
9959

    
9960
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9961

    
9962
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9963

    
9964
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9965
      nl.append(self.op.target_node)
9966

    
9967
    return env, nl, nl
9968

    
9969
  def CheckPrereq(self):
9970
    """Check prerequisites.
9971

9972
    This checks that the instance and node names are valid.
9973

9974
    """
9975
    instance_name = self.op.instance_name
9976

    
9977
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9978
    assert self.instance is not None, \
9979
          "Cannot retrieve locked instance %s" % self.op.instance_name
9980
    _CheckNodeOnline(self, self.instance.primary_node)
9981

    
9982
    if (self.op.remove_instance and self.instance.admin_up and
9983
        not self.op.shutdown):
9984
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9985
                                 " down before")
9986

    
9987
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9988
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9989
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9990
      assert self.dst_node is not None
9991

    
9992
      _CheckNodeOnline(self, self.dst_node.name)
9993
      _CheckNodeNotDrained(self, self.dst_node.name)
9994

    
9995
      self._cds = None
9996
      self.dest_disk_info = None
9997
      self.dest_x509_ca = None
9998

    
9999
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10000
      self.dst_node = None
10001

    
10002
      if len(self.op.target_node) != len(self.instance.disks):
10003
        raise errors.OpPrereqError(("Received destination information for %s"
10004
                                    " disks, but instance %s has %s disks") %
10005
                                   (len(self.op.target_node), instance_name,
10006
                                    len(self.instance.disks)),
10007
                                   errors.ECODE_INVAL)
10008

    
10009
      cds = _GetClusterDomainSecret()
10010

    
10011
      # Check X509 key name
10012
      try:
10013
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10014
      except (TypeError, ValueError), err:
10015
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10016

    
10017
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10018
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10019
                                   errors.ECODE_INVAL)
10020

    
10021
      # Load and verify CA
10022
      try:
10023
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10024
      except OpenSSL.crypto.Error, err:
10025
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10026
                                   (err, ), errors.ECODE_INVAL)
10027

    
10028
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10029
      if errcode is not None:
10030
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10031
                                   (msg, ), errors.ECODE_INVAL)
10032

    
10033
      self.dest_x509_ca = cert
10034

    
10035
      # Verify target information
10036
      disk_info = []
10037
      for idx, disk_data in enumerate(self.op.target_node):
10038
        try:
10039
          (host, port, magic) = \
10040
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10041
        except errors.GenericError, err:
10042
          raise errors.OpPrereqError("Target info for disk %s: %s" %
10043
                                     (idx, err), errors.ECODE_INVAL)
10044

    
10045
        disk_info.append((host, port, magic))
10046

    
10047
      assert len(disk_info) == len(self.op.target_node)
10048
      self.dest_disk_info = disk_info
10049

    
10050
    else:
10051
      raise errors.ProgrammerError("Unhandled export mode %r" %
10052
                                   self.op.mode)
10053

    
10054
    # instance disk type verification
10055
    # TODO: Implement export support for file-based disks
10056
    for disk in self.instance.disks:
10057
      if disk.dev_type == constants.LD_FILE:
10058
        raise errors.OpPrereqError("Export not supported for instances with"
10059
                                   " file-based disks", errors.ECODE_INVAL)
10060

    
10061
  def _CleanupExports(self, feedback_fn):
10062
    """Removes exports of current instance from all other nodes.
10063

10064
    If an instance in a cluster with nodes A..D was exported to node C, its
10065
    exports will be removed from the nodes A, B and D.
10066

10067
    """
10068
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10069

    
10070
    nodelist = self.cfg.GetNodeList()
10071
    nodelist.remove(self.dst_node.name)
10072

    
10073
    # on one-node clusters nodelist will be empty after the removal
10074
    # if we proceed the backup would be removed because OpQueryExports
10075
    # substitutes an empty list with the full cluster node list.
10076
    iname = self.instance.name
10077
    if nodelist:
10078
      feedback_fn("Removing old exports for instance %s" % iname)
10079
      exportlist = self.rpc.call_export_list(nodelist)
10080
      for node in exportlist:
10081
        if exportlist[node].fail_msg:
10082
          continue
10083
        if iname in exportlist[node].payload:
10084
          msg = self.rpc.call_export_remove(node, iname).fail_msg
10085
          if msg:
10086
            self.LogWarning("Could not remove older export for instance %s"
10087
                            " on node %s: %s", iname, node, msg)
10088

    
10089
  def Exec(self, feedback_fn):
10090
    """Export an instance to an image in the cluster.
10091

10092
    """
10093
    assert self.op.mode in constants.EXPORT_MODES
10094

    
10095
    instance = self.instance
10096
    src_node = instance.primary_node
10097

    
10098
    if self.op.shutdown:
10099
      # shutdown the instance, but not the disks
10100
      feedback_fn("Shutting down instance %s" % instance.name)
10101
      result = self.rpc.call_instance_shutdown(src_node, instance,
10102
                                               self.op.shutdown_timeout)
10103
      # TODO: Maybe ignore failures if ignore_remove_failures is set
10104
      result.Raise("Could not shutdown instance %s on"
10105
                   " node %s" % (instance.name, src_node))
10106

    
10107
    # set the disks ID correctly since call_instance_start needs the
10108
    # correct drbd minor to create the symlinks
10109
    for disk in instance.disks:
10110
      self.cfg.SetDiskID(disk, src_node)
10111

    
10112
    activate_disks = (not instance.admin_up)
10113

    
10114
    if activate_disks:
10115
      # Activate the instance disks if we'exporting a stopped instance
10116
      feedback_fn("Activating disks for %s" % instance.name)
10117
      _StartInstanceDisks(self, instance, None)
10118

    
10119
    try:
10120
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10121
                                                     instance)
10122

    
10123
      helper.CreateSnapshots()
10124
      try:
10125
        if (self.op.shutdown and instance.admin_up and
10126
            not self.op.remove_instance):
10127
          assert not activate_disks
10128
          feedback_fn("Starting instance %s" % instance.name)
10129
          result = self.rpc.call_instance_start(src_node, instance, None, None)
10130
          msg = result.fail_msg
10131
          if msg:
10132
            feedback_fn("Failed to start instance: %s" % msg)
10133
            _ShutdownInstanceDisks(self, instance)
10134
            raise errors.OpExecError("Could not start instance: %s" % msg)
10135

    
10136
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
10137
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10138
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10139
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
10140
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10141

    
10142
          (key_name, _, _) = self.x509_key_name
10143

    
10144
          dest_ca_pem = \
10145
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10146
                                            self.dest_x509_ca)
10147

    
10148
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10149
                                                     key_name, dest_ca_pem,
10150
                                                     timeouts)
10151
      finally:
10152
        helper.Cleanup()
10153

    
10154
      # Check for backwards compatibility
10155
      assert len(dresults) == len(instance.disks)
10156
      assert compat.all(isinstance(i, bool) for i in dresults), \
10157
             "Not all results are boolean: %r" % dresults
10158

    
10159
    finally:
10160
      if activate_disks:
10161
        feedback_fn("Deactivating disks for %s" % instance.name)
10162
        _ShutdownInstanceDisks(self, instance)
10163

    
10164
    if not (compat.all(dresults) and fin_resu):
10165
      failures = []
10166
      if not fin_resu:
10167
        failures.append("export finalization")
10168
      if not compat.all(dresults):
10169
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10170
                               if not dsk)
10171
        failures.append("disk export: disk(s) %s" % fdsk)
10172

    
10173
      raise errors.OpExecError("Export failed, errors in %s" %
10174
                               utils.CommaJoin(failures))
10175

    
10176
    # At this point, the export was successful, we can cleanup/finish
10177

    
10178
    # Remove instance if requested
10179
    if self.op.remove_instance:
10180
      feedback_fn("Removing instance %s" % instance.name)
10181
      _RemoveInstance(self, feedback_fn, instance,
10182
                      self.op.ignore_remove_failures)
10183

    
10184
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10185
      self._CleanupExports(feedback_fn)
10186

    
10187
    return fin_resu, dresults
10188

    
10189

    
10190
class LURemoveExport(NoHooksLU):
10191
  """Remove exports related to the named instance.
10192

10193
  """
10194
  _OP_PARAMS = [
10195
    _PInstanceName,
10196
    ]
10197
  REQ_BGL = False
10198

    
10199
  def ExpandNames(self):
10200
    self.needed_locks = {}
10201
    # We need all nodes to be locked in order for RemoveExport to work, but we
10202
    # don't need to lock the instance itself, as nothing will happen to it (and
10203
    # we can remove exports also for a removed instance)
10204
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10205

    
10206
  def Exec(self, feedback_fn):
10207
    """Remove any export.
10208

10209
    """
10210
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10211
    # If the instance was not found we'll try with the name that was passed in.
10212
    # This will only work if it was an FQDN, though.
10213
    fqdn_warn = False
10214
    if not instance_name:
10215
      fqdn_warn = True
10216
      instance_name = self.op.instance_name
10217

    
10218
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10219
    exportlist = self.rpc.call_export_list(locked_nodes)
10220
    found = False
10221
    for node in exportlist:
10222
      msg = exportlist[node].fail_msg
10223
      if msg:
10224
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10225
        continue
10226
      if instance_name in exportlist[node].payload:
10227
        found = True
10228
        result = self.rpc.call_export_remove(node, instance_name)
10229
        msg = result.fail_msg
10230
        if msg:
10231
          logging.error("Could not remove export for instance %s"
10232
                        " on node %s: %s", instance_name, node, msg)
10233

    
10234
    if fqdn_warn and not found:
10235
      feedback_fn("Export not found. If trying to remove an export belonging"
10236
                  " to a deleted instance please use its Fully Qualified"
10237
                  " Domain Name.")
10238

    
10239

    
10240
class LUAddGroup(LogicalUnit):
10241
  """Logical unit for creating node groups.
10242

10243
  """
10244
  HPATH = "group-add"
10245
  HTYPE = constants.HTYPE_GROUP
10246

    
10247
  _OP_PARAMS = [
10248
    _PGroupName,
10249
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
10250
    ("alloc_policy", None, ht.TOr(ht.TNone,
10251
                                  ht.TElemOf(constants.VALID_ALLOC_POLICIES))),
10252
    ]
10253

    
10254
  REQ_BGL = False
10255

    
10256
  def ExpandNames(self):
10257
    # We need the new group's UUID here so that we can create and acquire the
10258
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10259
    # that it should not check whether the UUID exists in the configuration.
10260
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10261
    self.needed_locks = {}
10262
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10263

    
10264
  def CheckPrereq(self):
10265
    """Check prerequisites.
10266

10267
    This checks that the given group name is not an existing node group
10268
    already.
10269

10270
    """
10271
    try:
10272
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10273
    except errors.OpPrereqError:
10274
      pass
10275
    else:
10276
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10277
                                 " node group (UUID: %s)" %
10278
                                 (self.op.group_name, existing_uuid),
10279
                                 errors.ECODE_EXISTS)
10280

    
10281
    if self.op.ndparams:
10282
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10283

    
10284
  def BuildHooksEnv(self):
10285
    """Build hooks env.
10286

10287
    """
10288
    env = {
10289
      "GROUP_NAME": self.op.group_name,
10290
      }
10291
    mn = self.cfg.GetMasterNode()
10292
    return env, [mn], [mn]
10293

    
10294
  def Exec(self, feedback_fn):
10295
    """Add the node group to the cluster.
10296

10297
    """
10298
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10299
                                  uuid=self.group_uuid,
10300
                                  alloc_policy=self.op.alloc_policy,
10301
                                  ndparams=self.op.ndparams)
10302

    
10303
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10304
    del self.remove_locks[locking.LEVEL_NODEGROUP]
10305

    
10306

    
10307
class LUQueryGroups(NoHooksLU):
10308
  """Logical unit for querying node groups.
10309

10310
  """
10311
  # pylint: disable-msg=W0142
10312
  _OP_PARAMS = [
10313
    _POutputFields,
10314
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10315
    ]
10316

    
10317
  REQ_BGL = False
10318

    
10319
  _FIELDS_DYNAMIC = utils.FieldSet()
10320

    
10321
  _SIMPLE_FIELDS = ["name", "uuid", "alloc_policy",
10322
                    "ctime", "mtime", "serial_no"]
10323

    
10324
  _FIELDS_STATIC = utils.FieldSet(
10325
      "node_cnt", "node_list", "pinst_cnt", "pinst_list", *_SIMPLE_FIELDS)
10326

    
10327
  def CheckArguments(self):
10328
    _CheckOutputFields(static=self._FIELDS_STATIC,
10329
                       dynamic=self._FIELDS_DYNAMIC,
10330
                       selected=self.op.output_fields)
10331

    
10332
  def ExpandNames(self):
10333
    self.needed_locks = {}
10334

    
10335
  def Exec(self, feedback_fn):
10336
    """Computes the list of groups and their attributes.
10337

10338
    """
10339
    all_groups = self.cfg.GetAllNodeGroupsInfo()
10340
    name_to_uuid = dict((g.name, g.uuid) for g in all_groups.values())
10341

    
10342
    if not self.op.names:
10343
      sorted_names = utils.NiceSort(name_to_uuid.keys())
10344
      my_groups = [name_to_uuid[n] for n in sorted_names]
10345
    else:
10346
      # Accept names to be either names or UUIDs.
10347
      all_uuid = frozenset(all_groups.keys())
10348
      my_groups = []
10349
      missing = []
10350

    
10351
      for name in self.op.names:
10352
        if name in all_uuid:
10353
          my_groups.append(name)
10354
        elif name in name_to_uuid:
10355
          my_groups.append(name_to_uuid[name])
10356
        else:
10357
          missing.append(name)
10358

    
10359
      if missing:
10360
        raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10361
                                   errors.ECODE_NOENT)
10362

    
10363
    do_nodes = bool(frozenset(["node_cnt", "node_list"]).
10364
                    intersection(self.op.output_fields))
10365

    
10366
    do_instances = bool(frozenset(["pinst_cnt", "pinst_list"]).
10367
                        intersection(self.op.output_fields))
10368

    
10369
    # We need to map group->[nodes], and group->[instances]. The former is
10370
    # directly attainable, but the latter we have to do through instance->node,
10371
    # hence we need to process nodes even if we only need instance information.
10372
    if do_nodes or do_instances:
10373
      all_nodes = self.cfg.GetAllNodesInfo()
10374
      group_to_nodes = dict((all_groups[name].uuid, []) for name in my_groups)
10375
      node_to_group = {}
10376

    
10377
      for node in all_nodes.values():
10378
        if node.group in group_to_nodes:
10379
          group_to_nodes[node.group].append(node.name)
10380
          node_to_group[node.name] = node.group
10381

    
10382
      if do_instances:
10383
        all_instances = self.cfg.GetAllInstancesInfo()
10384
        group_to_instances = dict((all_groups[name].uuid, [])
10385
                                  for name in my_groups)
10386
        for instance in all_instances.values():
10387
          node = instance.primary_node
10388
          if node in node_to_group:
10389
            group_to_instances[node_to_group[node]].append(instance.name)
10390

    
10391
    output = []
10392

    
10393
    for uuid in my_groups:
10394
      group = all_groups[uuid]
10395
      group_output = []
10396

    
10397
      for field in self.op.output_fields:
10398
        if field in self._SIMPLE_FIELDS:
10399
          val = getattr(group, field)
10400
        elif field == "node_list":
10401
          val = utils.NiceSort(group_to_nodes[group.uuid])
10402
        elif field == "node_cnt":
10403
          val = len(group_to_nodes[group.uuid])
10404
        elif field == "pinst_list":
10405
          val = utils.NiceSort(group_to_instances[group.uuid])
10406
        elif field == "pinst_cnt":
10407
          val = len(group_to_instances[group.uuid])
10408
        else:
10409
          raise errors.ParameterError(field)
10410
        group_output.append(val)
10411
      output.append(group_output)
10412

    
10413
    return output
10414

    
10415

    
10416
class LUSetGroupParams(LogicalUnit):
10417
  """Modifies the parameters of a node group.
10418

10419
  """
10420
  HPATH = "group-modify"
10421
  HTYPE = constants.HTYPE_GROUP
10422

    
10423
  _OP_PARAMS = [
10424
    _PGroupName,
10425
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
10426
    ("alloc_policy", None, ht.TOr(ht.TNone,
10427
                                  ht.TElemOf(constants.VALID_ALLOC_POLICIES))),
10428
    ]
10429

    
10430
  REQ_BGL = False
10431

    
10432
  def CheckArguments(self):
10433
    all_changes = [
10434
      self.op.ndparams,
10435
      self.op.alloc_policy,
10436
      ]
10437

    
10438
    if all_changes.count(None) == len(all_changes):
10439
      raise errors.OpPrereqError("Please pass at least one modification",
10440
                                 errors.ECODE_INVAL)
10441

    
10442
  def ExpandNames(self):
10443
    # This raises errors.OpPrereqError on its own:
10444
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10445

    
10446
    self.needed_locks = {
10447
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10448
      }
10449

    
10450
  def CheckPrereq(self):
10451
    """Check prerequisites.
10452

10453
    """
10454
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10455

    
10456
    if self.group is None:
10457
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10458
                               (self.op.group_name, self.group_uuid))
10459

    
10460
    if self.op.ndparams:
10461
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10462
      self.new_ndparams = self.group.SimpleFillND(self.op.ndparams)
10463

    
10464
  def BuildHooksEnv(self):
10465
    """Build hooks env.
10466

10467
    """
10468
    env = {
10469
      "GROUP_NAME": self.op.group_name,
10470
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
10471
      }
10472
    mn = self.cfg.GetMasterNode()
10473
    return env, [mn], [mn]
10474

    
10475
  def Exec(self, feedback_fn):
10476
    """Modifies the node group.
10477

10478
    """
10479
    result = []
10480

    
10481
    if self.op.ndparams:
10482
      self.group.ndparams = self.new_ndparams
10483
      result.append(("ndparams", str(self.group.ndparams)))
10484

    
10485
    if self.op.alloc_policy:
10486
      self.group.alloc_policy = self.op.alloc_policy
10487

    
10488
    self.cfg.Update(self.group, feedback_fn)
10489
    return result
10490

    
10491

    
10492

    
10493
class LURemoveGroup(LogicalUnit):
10494
  HPATH = "group-remove"
10495
  HTYPE = constants.HTYPE_GROUP
10496

    
10497
  _OP_PARAMS = [
10498
    _PGroupName,
10499
    ]
10500

    
10501
  REQ_BGL = False
10502

    
10503
  def ExpandNames(self):
10504
    # This will raises errors.OpPrereqError on its own:
10505
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10506
    self.needed_locks = {
10507
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10508
      }
10509

    
10510
  def CheckPrereq(self):
10511
    """Check prerequisites.
10512

10513
    This checks that the given group name exists as a node group, that is
10514
    empty (i.e., contains no nodes), and that is not the last group of the
10515
    cluster.
10516

10517
    """
10518
    # Verify that the group is empty.
10519
    group_nodes = [node.name
10520
                   for node in self.cfg.GetAllNodesInfo().values()
10521
                   if node.group == self.group_uuid]
10522

    
10523
    if group_nodes:
10524
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
10525
                                 " nodes: %s" %
10526
                                 (self.op.group_name,
10527
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
10528
                                 errors.ECODE_STATE)
10529

    
10530
    # Verify the cluster would not be left group-less.
10531
    if len(self.cfg.GetNodeGroupList()) == 1:
10532
      raise errors.OpPrereqError("Group '%s' is the last group in the cluster,"
10533
                                 " which cannot be left without at least one"
10534
                                 " group" % self.op.group_name,
10535
                                 errors.ECODE_STATE)
10536

    
10537
  def BuildHooksEnv(self):
10538
    """Build hooks env.
10539

10540
    """
10541
    env = {
10542
      "GROUP_NAME": self.op.group_name,
10543
      }
10544
    mn = self.cfg.GetMasterNode()
10545
    return env, [mn], [mn]
10546

    
10547
  def Exec(self, feedback_fn):
10548
    """Remove the node group.
10549

10550
    """
10551
    try:
10552
      self.cfg.RemoveNodeGroup(self.group_uuid)
10553
    except errors.ConfigurationError:
10554
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10555
                               (self.op.group_name, self.group_uuid))
10556

    
10557
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10558

    
10559

    
10560
class LURenameGroup(LogicalUnit):
10561
  HPATH = "group-rename"
10562
  HTYPE = constants.HTYPE_GROUP
10563

    
10564
  _OP_PARAMS = [
10565
    ("old_name", ht.NoDefault, ht.TNonEmptyString),
10566
    ("new_name", ht.NoDefault, ht.TNonEmptyString),
10567
    ]
10568

    
10569
  REQ_BGL = False
10570

    
10571
  def ExpandNames(self):
10572
    # This raises errors.OpPrereqError on its own:
10573
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10574

    
10575
    self.needed_locks = {
10576
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10577
      }
10578

    
10579
  def CheckPrereq(self):
10580
    """Check prerequisites.
10581

10582
    This checks that the given old_name exists as a node group, and that
10583
    new_name doesn't.
10584

10585
    """
10586
    try:
10587
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10588
    except errors.OpPrereqError:
10589
      pass
10590
    else:
10591
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10592
                                 " node group (UUID: %s)" %
10593
                                 (self.op.new_name, new_name_uuid),
10594
                                 errors.ECODE_EXISTS)
10595

    
10596
  def BuildHooksEnv(self):
10597
    """Build hooks env.
10598

10599
    """
10600
    env = {
10601
      "OLD_NAME": self.op.old_name,
10602
      "NEW_NAME": self.op.new_name,
10603
      }
10604

    
10605
    mn = self.cfg.GetMasterNode()
10606
    all_nodes = self.cfg.GetAllNodesInfo()
10607
    run_nodes = [mn]
10608
    all_nodes.pop(mn, None)
10609

    
10610
    for node in all_nodes.values():
10611
      if node.group == self.group_uuid:
10612
        run_nodes.append(node.name)
10613

    
10614
    return env, run_nodes, run_nodes
10615

    
10616
  def Exec(self, feedback_fn):
10617
    """Rename the node group.
10618

10619
    """
10620
    group = self.cfg.GetNodeGroup(self.group_uuid)
10621

    
10622
    if group is None:
10623
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10624
                               (self.op.old_name, self.group_uuid))
10625

    
10626
    group.name = self.op.new_name
10627
    self.cfg.Update(group, feedback_fn)
10628

    
10629
    return self.op.new_name
10630

    
10631

    
10632
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10633
  """Generic tags LU.
10634

10635
  This is an abstract class which is the parent of all the other tags LUs.
10636

10637
  """
10638

    
10639
  def ExpandNames(self):
10640
    self.needed_locks = {}
10641
    if self.op.kind == constants.TAG_NODE:
10642
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10643
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
10644
    elif self.op.kind == constants.TAG_INSTANCE:
10645
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10646
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10647

    
10648
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10649
    # not possible to acquire the BGL based on opcode parameters)
10650

    
10651
  def CheckPrereq(self):
10652
    """Check prerequisites.
10653

10654
    """
10655
    if self.op.kind == constants.TAG_CLUSTER:
10656
      self.target = self.cfg.GetClusterInfo()
10657
    elif self.op.kind == constants.TAG_NODE:
10658
      self.target = self.cfg.GetNodeInfo(self.op.name)
10659
    elif self.op.kind == constants.TAG_INSTANCE:
10660
      self.target = self.cfg.GetInstanceInfo(self.op.name)
10661
    else:
10662
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10663
                                 str(self.op.kind), errors.ECODE_INVAL)
10664

    
10665

    
10666
class LUGetTags(TagsLU):
10667
  """Returns the tags of a given object.
10668

10669
  """
10670
  _OP_PARAMS = [
10671
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10672
    # Name is only meaningful for nodes and instances
10673
    ("name", ht.NoDefault, ht.TMaybeString),
10674
    ]
10675
  REQ_BGL = False
10676

    
10677
  def ExpandNames(self):
10678
    TagsLU.ExpandNames(self)
10679

    
10680
    # Share locks as this is only a read operation
10681
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10682

    
10683
  def Exec(self, feedback_fn):
10684
    """Returns the tag list.
10685

10686
    """
10687
    return list(self.target.GetTags())
10688

    
10689

    
10690
class LUSearchTags(NoHooksLU):
10691
  """Searches the tags for a given pattern.
10692

10693
  """
10694
  _OP_PARAMS = [
10695
    ("pattern", ht.NoDefault, ht.TNonEmptyString),
10696
    ]
10697
  REQ_BGL = False
10698

    
10699
  def ExpandNames(self):
10700
    self.needed_locks = {}
10701

    
10702
  def CheckPrereq(self):
10703
    """Check prerequisites.
10704

10705
    This checks the pattern passed for validity by compiling it.
10706

10707
    """
10708
    try:
10709
      self.re = re.compile(self.op.pattern)
10710
    except re.error, err:
10711
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10712
                                 (self.op.pattern, err), errors.ECODE_INVAL)
10713

    
10714
  def Exec(self, feedback_fn):
10715
    """Returns the tag list.
10716

10717
    """
10718
    cfg = self.cfg
10719
    tgts = [("/cluster", cfg.GetClusterInfo())]
10720
    ilist = cfg.GetAllInstancesInfo().values()
10721
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10722
    nlist = cfg.GetAllNodesInfo().values()
10723
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10724
    results = []
10725
    for path, target in tgts:
10726
      for tag in target.GetTags():
10727
        if self.re.search(tag):
10728
          results.append((path, tag))
10729
    return results
10730

    
10731

    
10732
class LUAddTags(TagsLU):
10733
  """Sets a tag on a given object.
10734

10735
  """
10736
  _OP_PARAMS = [
10737
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10738
    # Name is only meaningful for nodes and instances
10739
    ("name", ht.NoDefault, ht.TMaybeString),
10740
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10741
    ]
10742
  REQ_BGL = False
10743

    
10744
  def CheckPrereq(self):
10745
    """Check prerequisites.
10746

10747
    This checks the type and length of the tag name and value.
10748

10749
    """
10750
    TagsLU.CheckPrereq(self)
10751
    for tag in self.op.tags:
10752
      objects.TaggableObject.ValidateTag(tag)
10753

    
10754
  def Exec(self, feedback_fn):
10755
    """Sets the tag.
10756

10757
    """
10758
    try:
10759
      for tag in self.op.tags:
10760
        self.target.AddTag(tag)
10761
    except errors.TagError, err:
10762
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
10763
    self.cfg.Update(self.target, feedback_fn)
10764

    
10765

    
10766
class LUDelTags(TagsLU):
10767
  """Delete a list of tags from a given object.
10768

10769
  """
10770
  _OP_PARAMS = [
10771
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10772
    # Name is only meaningful for nodes and instances
10773
    ("name", ht.NoDefault, ht.TMaybeString),
10774
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10775
    ]
10776
  REQ_BGL = False
10777

    
10778
  def CheckPrereq(self):
10779
    """Check prerequisites.
10780

10781
    This checks that we have the given tag.
10782

10783
    """
10784
    TagsLU.CheckPrereq(self)
10785
    for tag in self.op.tags:
10786
      objects.TaggableObject.ValidateTag(tag)
10787
    del_tags = frozenset(self.op.tags)
10788
    cur_tags = self.target.GetTags()
10789

    
10790
    diff_tags = del_tags - cur_tags
10791
    if diff_tags:
10792
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
10793
      raise errors.OpPrereqError("Tag(s) %s not found" %
10794
                                 (utils.CommaJoin(diff_names), ),
10795
                                 errors.ECODE_NOENT)
10796

    
10797
  def Exec(self, feedback_fn):
10798
    """Remove the tag from the object.
10799

10800
    """
10801
    for tag in self.op.tags:
10802
      self.target.RemoveTag(tag)
10803
    self.cfg.Update(self.target, feedback_fn)
10804

    
10805

    
10806
class LUTestDelay(NoHooksLU):
10807
  """Sleep for a specified amount of time.
10808

10809
  This LU sleeps on the master and/or nodes for a specified amount of
10810
  time.
10811

10812
  """
10813
  _OP_PARAMS = [
10814
    ("duration", ht.NoDefault, ht.TFloat),
10815
    ("on_master", True, ht.TBool),
10816
    ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10817
    ("repeat", 0, ht.TPositiveInt)
10818
    ]
10819
  REQ_BGL = False
10820

    
10821
  def ExpandNames(self):
10822
    """Expand names and set required locks.
10823

10824
    This expands the node list, if any.
10825

10826
    """
10827
    self.needed_locks = {}
10828
    if self.op.on_nodes:
10829
      # _GetWantedNodes can be used here, but is not always appropriate to use
10830
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10831
      # more information.
10832
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10833
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10834

    
10835
  def _TestDelay(self):
10836
    """Do the actual sleep.
10837

10838
    """
10839
    if self.op.on_master:
10840
      if not utils.TestDelay(self.op.duration):
10841
        raise errors.OpExecError("Error during master delay test")
10842
    if self.op.on_nodes:
10843
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10844
      for node, node_result in result.items():
10845
        node_result.Raise("Failure during rpc call to node %s" % node)
10846

    
10847
  def Exec(self, feedback_fn):
10848
    """Execute the test delay opcode, with the wanted repetitions.
10849

10850
    """
10851
    if self.op.repeat == 0:
10852
      self._TestDelay()
10853
    else:
10854
      top_value = self.op.repeat - 1
10855
      for i in range(self.op.repeat):
10856
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10857
        self._TestDelay()
10858

    
10859

    
10860
class LUTestJobqueue(NoHooksLU):
10861
  """Utility LU to test some aspects of the job queue.
10862

10863
  """
10864
  _OP_PARAMS = [
10865
    ("notify_waitlock", False, ht.TBool),
10866
    ("notify_exec", False, ht.TBool),
10867
    ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
10868
    ("fail", False, ht.TBool),
10869
    ]
10870
  REQ_BGL = False
10871

    
10872
  # Must be lower than default timeout for WaitForJobChange to see whether it
10873
  # notices changed jobs
10874
  _CLIENT_CONNECT_TIMEOUT = 20.0
10875
  _CLIENT_CONFIRM_TIMEOUT = 60.0
10876

    
10877
  @classmethod
10878
  def _NotifyUsingSocket(cls, cb, errcls):
10879
    """Opens a Unix socket and waits for another program to connect.
10880

10881
    @type cb: callable
10882
    @param cb: Callback to send socket name to client
10883
    @type errcls: class
10884
    @param errcls: Exception class to use for errors
10885

10886
    """
10887
    # Using a temporary directory as there's no easy way to create temporary
10888
    # sockets without writing a custom loop around tempfile.mktemp and
10889
    # socket.bind
10890
    tmpdir = tempfile.mkdtemp()
10891
    try:
10892
      tmpsock = utils.PathJoin(tmpdir, "sock")
10893

    
10894
      logging.debug("Creating temporary socket at %s", tmpsock)
10895
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10896
      try:
10897
        sock.bind(tmpsock)
10898
        sock.listen(1)
10899

    
10900
        # Send details to client
10901
        cb(tmpsock)
10902

    
10903
        # Wait for client to connect before continuing
10904
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10905
        try:
10906
          (conn, _) = sock.accept()
10907
        except socket.error, err:
10908
          raise errcls("Client didn't connect in time (%s)" % err)
10909
      finally:
10910
        sock.close()
10911
    finally:
10912
      # Remove as soon as client is connected
10913
      shutil.rmtree(tmpdir)
10914

    
10915
    # Wait for client to close
10916
    try:
10917
      try:
10918
        # pylint: disable-msg=E1101
10919
        # Instance of '_socketobject' has no ... member
10920
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10921
        conn.recv(1)
10922
      except socket.error, err:
10923
        raise errcls("Client failed to confirm notification (%s)" % err)
10924
    finally:
10925
      conn.close()
10926

    
10927
  def _SendNotification(self, test, arg, sockname):
10928
    """Sends a notification to the client.
10929

10930
    @type test: string
10931
    @param test: Test name
10932
    @param arg: Test argument (depends on test)
10933
    @type sockname: string
10934
    @param sockname: Socket path
10935

10936
    """
10937
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10938

    
10939
  def _Notify(self, prereq, test, arg):
10940
    """Notifies the client of a test.
10941

10942
    @type prereq: bool
10943
    @param prereq: Whether this is a prereq-phase test
10944
    @type test: string
10945
    @param test: Test name
10946
    @param arg: Test argument (depends on test)
10947

10948
    """
10949
    if prereq:
10950
      errcls = errors.OpPrereqError
10951
    else:
10952
      errcls = errors.OpExecError
10953

    
10954
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10955
                                                  test, arg),
10956
                                   errcls)
10957

    
10958
  def CheckArguments(self):
10959
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10960
    self.expandnames_calls = 0
10961

    
10962
  def ExpandNames(self):
10963
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10964
    if checkargs_calls < 1:
10965
      raise errors.ProgrammerError("CheckArguments was not called")
10966

    
10967
    self.expandnames_calls += 1
10968

    
10969
    if self.op.notify_waitlock:
10970
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10971

    
10972
    self.LogInfo("Expanding names")
10973

    
10974
    # Get lock on master node (just to get a lock, not for a particular reason)
10975
    self.needed_locks = {
10976
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10977
      }
10978

    
10979
  def Exec(self, feedback_fn):
10980
    if self.expandnames_calls < 1:
10981
      raise errors.ProgrammerError("ExpandNames was not called")
10982

    
10983
    if self.op.notify_exec:
10984
      self._Notify(False, constants.JQT_EXEC, None)
10985

    
10986
    self.LogInfo("Executing")
10987

    
10988
    if self.op.log_messages:
10989
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10990
      for idx, msg in enumerate(self.op.log_messages):
10991
        self.LogInfo("Sending log message %s", idx + 1)
10992
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10993
        # Report how many test messages have been sent
10994
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10995

    
10996
    if self.op.fail:
10997
      raise errors.OpExecError("Opcode failure was requested")
10998

    
10999
    return True
11000

    
11001

    
11002
class IAllocator(object):
11003
  """IAllocator framework.
11004

11005
  An IAllocator instance has three sets of attributes:
11006
    - cfg that is needed to query the cluster
11007
    - input data (all members of the _KEYS class attribute are required)
11008
    - four buffer attributes (in|out_data|text), that represent the
11009
      input (to the external script) in text and data structure format,
11010
      and the output from it, again in two formats
11011
    - the result variables from the script (success, info, nodes) for
11012
      easy usage
11013

11014
  """
11015
  # pylint: disable-msg=R0902
11016
  # lots of instance attributes
11017
  _ALLO_KEYS = [
11018
    "name", "mem_size", "disks", "disk_template",
11019
    "os", "tags", "nics", "vcpus", "hypervisor",
11020
    ]
11021
  _RELO_KEYS = [
11022
    "name", "relocate_from",
11023
    ]
11024
  _EVAC_KEYS = [
11025
    "evac_nodes",
11026
    ]
11027

    
11028
  def __init__(self, cfg, rpc, mode, **kwargs):
11029
    self.cfg = cfg
11030
    self.rpc = rpc
11031
    # init buffer variables
11032
    self.in_text = self.out_text = self.in_data = self.out_data = None
11033
    # init all input fields so that pylint is happy
11034
    self.mode = mode
11035
    self.mem_size = self.disks = self.disk_template = None
11036
    self.os = self.tags = self.nics = self.vcpus = None
11037
    self.hypervisor = None
11038
    self.relocate_from = None
11039
    self.name = None
11040
    self.evac_nodes = None
11041
    # computed fields
11042
    self.required_nodes = None
11043
    # init result fields
11044
    self.success = self.info = self.result = None
11045
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11046
      keyset = self._ALLO_KEYS
11047
      fn = self._AddNewInstance
11048
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11049
      keyset = self._RELO_KEYS
11050
      fn = self._AddRelocateInstance
11051
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11052
      keyset = self._EVAC_KEYS
11053
      fn = self._AddEvacuateNodes
11054
    else:
11055
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11056
                                   " IAllocator" % self.mode)
11057
    for key in kwargs:
11058
      if key not in keyset:
11059
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
11060
                                     " IAllocator" % key)
11061
      setattr(self, key, kwargs[key])
11062

    
11063
    for key in keyset:
11064
      if key not in kwargs:
11065
        raise errors.ProgrammerError("Missing input parameter '%s' to"
11066
                                     " IAllocator" % key)
11067
    self._BuildInputData(fn)
11068

    
11069
  def _ComputeClusterData(self):
11070
    """Compute the generic allocator input data.
11071

11072
    This is the data that is independent of the actual operation.
11073

11074
    """
11075
    cfg = self.cfg
11076
    cluster_info = cfg.GetClusterInfo()
11077
    # cluster data
11078
    data = {
11079
      "version": constants.IALLOCATOR_VERSION,
11080
      "cluster_name": cfg.GetClusterName(),
11081
      "cluster_tags": list(cluster_info.GetTags()),
11082
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11083
      # we don't have job IDs
11084
      }
11085
    iinfo = cfg.GetAllInstancesInfo().values()
11086
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11087

    
11088
    # node data
11089
    node_list = cfg.GetNodeList()
11090

    
11091
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11092
      hypervisor_name = self.hypervisor
11093
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11094
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11095
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11096
      hypervisor_name = cluster_info.enabled_hypervisors[0]
11097

    
11098
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11099
                                        hypervisor_name)
11100
    node_iinfo = \
11101
      self.rpc.call_all_instances_info(node_list,
11102
                                       cluster_info.enabled_hypervisors)
11103

    
11104
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11105

    
11106
    data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
11107

    
11108
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11109

    
11110
    self.in_data = data
11111

    
11112
  @staticmethod
11113
  def _ComputeNodeGroupData(cfg):
11114
    """Compute node groups data.
11115

11116
    """
11117
    ng = {}
11118
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11119
      ng[guuid] = {
11120
        "name": gdata.name,
11121
        "alloc_policy": gdata.alloc_policy,
11122
        }
11123
    return ng
11124

    
11125
  @staticmethod
11126
  def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
11127
    """Compute global node data.
11128

11129
    """
11130
    node_results = {}
11131
    for nname, nresult in node_data.items():
11132
      # first fill in static (config-based) values
11133
      ninfo = cfg.GetNodeInfo(nname)
11134
      pnr = {
11135
        "tags": list(ninfo.GetTags()),
11136
        "primary_ip": ninfo.primary_ip,
11137
        "secondary_ip": ninfo.secondary_ip,
11138
        "offline": ninfo.offline,
11139
        "drained": ninfo.drained,
11140
        "master_candidate": ninfo.master_candidate,
11141
        "group": ninfo.group,
11142
        "master_capable": ninfo.master_capable,
11143
        "vm_capable": ninfo.vm_capable,
11144
        }
11145

    
11146
      if not (ninfo.offline or ninfo.drained):
11147
        nresult.Raise("Can't get data for node %s" % nname)
11148
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11149
                                nname)
11150
        remote_info = nresult.payload
11151

    
11152
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
11153
                     'vg_size', 'vg_free', 'cpu_total']:
11154
          if attr not in remote_info:
11155
            raise errors.OpExecError("Node '%s' didn't return attribute"
11156
                                     " '%s'" % (nname, attr))
11157
          if not isinstance(remote_info[attr], int):
11158
            raise errors.OpExecError("Node '%s' returned invalid value"
11159
                                     " for '%s': %s" %
11160
                                     (nname, attr, remote_info[attr]))
11161
        # compute memory used by primary instances
11162
        i_p_mem = i_p_up_mem = 0
11163
        for iinfo, beinfo in i_list:
11164
          if iinfo.primary_node == nname:
11165
            i_p_mem += beinfo[constants.BE_MEMORY]
11166
            if iinfo.name not in node_iinfo[nname].payload:
11167
              i_used_mem = 0
11168
            else:
11169
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11170
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11171
            remote_info['memory_free'] -= max(0, i_mem_diff)
11172

    
11173
            if iinfo.admin_up:
11174
              i_p_up_mem += beinfo[constants.BE_MEMORY]
11175

    
11176
        # compute memory used by instances
11177
        pnr_dyn = {
11178
          "total_memory": remote_info['memory_total'],
11179
          "reserved_memory": remote_info['memory_dom0'],
11180
          "free_memory": remote_info['memory_free'],
11181
          "total_disk": remote_info['vg_size'],
11182
          "free_disk": remote_info['vg_free'],
11183
          "total_cpus": remote_info['cpu_total'],
11184
          "i_pri_memory": i_p_mem,
11185
          "i_pri_up_memory": i_p_up_mem,
11186
          }
11187
        pnr.update(pnr_dyn)
11188

    
11189
      node_results[nname] = pnr
11190

    
11191
    return node_results
11192

    
11193
  @staticmethod
11194
  def _ComputeInstanceData(cluster_info, i_list):
11195
    """Compute global instance data.
11196

11197
    """
11198
    instance_data = {}
11199
    for iinfo, beinfo in i_list:
11200
      nic_data = []
11201
      for nic in iinfo.nics:
11202
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11203
        nic_dict = {"mac": nic.mac,
11204
                    "ip": nic.ip,
11205
                    "mode": filled_params[constants.NIC_MODE],
11206
                    "link": filled_params[constants.NIC_LINK],
11207
                   }
11208
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11209
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11210
        nic_data.append(nic_dict)
11211
      pir = {
11212
        "tags": list(iinfo.GetTags()),
11213
        "admin_up": iinfo.admin_up,
11214
        "vcpus": beinfo[constants.BE_VCPUS],
11215
        "memory": beinfo[constants.BE_MEMORY],
11216
        "os": iinfo.os,
11217
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11218
        "nics": nic_data,
11219
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11220
        "disk_template": iinfo.disk_template,
11221
        "hypervisor": iinfo.hypervisor,
11222
        }
11223
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11224
                                                 pir["disks"])
11225
      instance_data[iinfo.name] = pir
11226

    
11227
    return instance_data
11228

    
11229
  def _AddNewInstance(self):
11230
    """Add new instance data to allocator structure.
11231

11232
    This in combination with _AllocatorGetClusterData will create the
11233
    correct structure needed as input for the allocator.
11234

11235
    The checks for the completeness of the opcode must have already been
11236
    done.
11237

11238
    """
11239
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11240

    
11241
    if self.disk_template in constants.DTS_NET_MIRROR:
11242
      self.required_nodes = 2
11243
    else:
11244
      self.required_nodes = 1
11245
    request = {
11246
      "name": self.name,
11247
      "disk_template": self.disk_template,
11248
      "tags": self.tags,
11249
      "os": self.os,
11250
      "vcpus": self.vcpus,
11251
      "memory": self.mem_size,
11252
      "disks": self.disks,
11253
      "disk_space_total": disk_space,
11254
      "nics": self.nics,
11255
      "required_nodes": self.required_nodes,
11256
      }
11257
    return request
11258

    
11259
  def _AddRelocateInstance(self):
11260
    """Add relocate instance data to allocator structure.
11261

11262
    This in combination with _IAllocatorGetClusterData will create the
11263
    correct structure needed as input for the allocator.
11264

11265
    The checks for the completeness of the opcode must have already been
11266
    done.
11267

11268
    """
11269
    instance = self.cfg.GetInstanceInfo(self.name)
11270
    if instance is None:
11271
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
11272
                                   " IAllocator" % self.name)
11273

    
11274
    if instance.disk_template not in constants.DTS_NET_MIRROR:
11275
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11276
                                 errors.ECODE_INVAL)
11277

    
11278
    if len(instance.secondary_nodes) != 1:
11279
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
11280
                                 errors.ECODE_STATE)
11281

    
11282
    self.required_nodes = 1
11283
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
11284
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11285

    
11286
    request = {
11287
      "name": self.name,
11288
      "disk_space_total": disk_space,
11289
      "required_nodes": self.required_nodes,
11290
      "relocate_from": self.relocate_from,
11291
      }
11292
    return request
11293

    
11294
  def _AddEvacuateNodes(self):
11295
    """Add evacuate nodes data to allocator structure.
11296

11297
    """
11298
    request = {
11299
      "evac_nodes": self.evac_nodes
11300
      }
11301
    return request
11302

    
11303
  def _BuildInputData(self, fn):
11304
    """Build input data structures.
11305

11306
    """
11307
    self._ComputeClusterData()
11308

    
11309
    request = fn()
11310
    request["type"] = self.mode
11311
    self.in_data["request"] = request
11312

    
11313
    self.in_text = serializer.Dump(self.in_data)
11314

    
11315
  def Run(self, name, validate=True, call_fn=None):
11316
    """Run an instance allocator and return the results.
11317

11318
    """
11319
    if call_fn is None:
11320
      call_fn = self.rpc.call_iallocator_runner
11321

    
11322
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11323
    result.Raise("Failure while running the iallocator script")
11324

    
11325
    self.out_text = result.payload
11326
    if validate:
11327
      self._ValidateResult()
11328

    
11329
  def _ValidateResult(self):
11330
    """Process the allocator results.
11331

11332
    This will process and if successful save the result in
11333
    self.out_data and the other parameters.
11334

11335
    """
11336
    try:
11337
      rdict = serializer.Load(self.out_text)
11338
    except Exception, err:
11339
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11340

    
11341
    if not isinstance(rdict, dict):
11342
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
11343

    
11344
    # TODO: remove backwards compatiblity in later versions
11345
    if "nodes" in rdict and "result" not in rdict:
11346
      rdict["result"] = rdict["nodes"]
11347
      del rdict["nodes"]
11348

    
11349
    for key in "success", "info", "result":
11350
      if key not in rdict:
11351
        raise errors.OpExecError("Can't parse iallocator results:"
11352
                                 " missing key '%s'" % key)
11353
      setattr(self, key, rdict[key])
11354

    
11355
    if not isinstance(rdict["result"], list):
11356
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11357
                               " is not a list")
11358
    self.out_data = rdict
11359

    
11360

    
11361
class LUTestAllocator(NoHooksLU):
11362
  """Run allocator tests.
11363

11364
  This LU runs the allocator tests
11365

11366
  """
11367
  _OP_PARAMS = [
11368
    ("direction", ht.NoDefault,
11369
     ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
11370
    ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
11371
    ("name", ht.NoDefault, ht.TNonEmptyString),
11372
    ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
11373
      ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
11374
               ht.TOr(ht.TNone, ht.TNonEmptyString))))),
11375
    ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
11376
    ("hypervisor", None, ht.TMaybeString),
11377
    ("allocator", None, ht.TMaybeString),
11378
    ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
11379
    ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
11380
    ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
11381
    ("os", None, ht.TMaybeString),
11382
    ("disk_template", None, ht.TMaybeString),
11383
    ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
11384
    ]
11385

    
11386
  def CheckPrereq(self):
11387
    """Check prerequisites.
11388

11389
    This checks the opcode parameters depending on the director and mode test.
11390

11391
    """
11392
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11393
      for attr in ["mem_size", "disks", "disk_template",
11394
                   "os", "tags", "nics", "vcpus"]:
11395
        if not hasattr(self.op, attr):
11396
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11397
                                     attr, errors.ECODE_INVAL)
11398
      iname = self.cfg.ExpandInstanceName(self.op.name)
11399
      if iname is not None:
11400
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11401
                                   iname, errors.ECODE_EXISTS)
11402
      if not isinstance(self.op.nics, list):
11403
        raise errors.OpPrereqError("Invalid parameter 'nics'",
11404
                                   errors.ECODE_INVAL)
11405
      if not isinstance(self.op.disks, list):
11406
        raise errors.OpPrereqError("Invalid parameter 'disks'",
11407
                                   errors.ECODE_INVAL)
11408
      for row in self.op.disks:
11409
        if (not isinstance(row, dict) or
11410
            "size" not in row or
11411
            not isinstance(row["size"], int) or
11412
            "mode" not in row or
11413
            row["mode"] not in ['r', 'w']):
11414
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
11415
                                     " parameter", errors.ECODE_INVAL)
11416
      if self.op.hypervisor is None:
11417
        self.op.hypervisor = self.cfg.GetHypervisorType()
11418
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11419
      fname = _ExpandInstanceName(self.cfg, self.op.name)
11420
      self.op.name = fname
11421
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11422
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11423
      if not hasattr(self.op, "evac_nodes"):
11424
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11425
                                   " opcode input", errors.ECODE_INVAL)
11426
    else:
11427
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11428
                                 self.op.mode, errors.ECODE_INVAL)
11429

    
11430
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11431
      if self.op.allocator is None:
11432
        raise errors.OpPrereqError("Missing allocator name",
11433
                                   errors.ECODE_INVAL)
11434
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11435
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
11436
                                 self.op.direction, errors.ECODE_INVAL)
11437

    
11438
  def Exec(self, feedback_fn):
11439
    """Run the allocator test.
11440

11441
    """
11442
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11443
      ial = IAllocator(self.cfg, self.rpc,
11444
                       mode=self.op.mode,
11445
                       name=self.op.name,
11446
                       mem_size=self.op.mem_size,
11447
                       disks=self.op.disks,
11448
                       disk_template=self.op.disk_template,
11449
                       os=self.op.os,
11450
                       tags=self.op.tags,
11451
                       nics=self.op.nics,
11452
                       vcpus=self.op.vcpus,
11453
                       hypervisor=self.op.hypervisor,
11454
                       )
11455
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11456
      ial = IAllocator(self.cfg, self.rpc,
11457
                       mode=self.op.mode,
11458
                       name=self.op.name,
11459
                       relocate_from=list(self.relocate_from),
11460
                       )
11461
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11462
      ial = IAllocator(self.cfg, self.rpc,
11463
                       mode=self.op.mode,
11464
                       evac_nodes=self.op.evac_nodes)
11465
    else:
11466
      raise errors.ProgrammerError("Uncatched mode %s in"
11467
                                   " LUTestAllocator.Exec", self.op.mode)
11468

    
11469
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
11470
      result = ial.in_text
11471
    else:
11472
      ial.Run(self.op.allocator, validate=False)
11473
      result = ial.out_text
11474
    return result