Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 1052d622

History | View | Annotate | Download (353 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39

    
40
from ganeti import ssh
41
from ganeti import utils
42
from ganeti import errors
43
from ganeti import hypervisor
44
from ganeti import locking
45
from ganeti import constants
46
from ganeti import objects
47
from ganeti import serializer
48
from ganeti import ssconf
49
from ganeti import uidpool
50
from ganeti import compat
51
from ganeti import masterd
52

    
53
import ganeti.masterd.instance # pylint: disable-msg=W0611
54

    
55

    
56
class LogicalUnit(object):
57
  """Logical Unit base class.
58

59
  Subclasses must follow these rules:
60
    - implement ExpandNames
61
    - implement CheckPrereq (except when tasklets are used)
62
    - implement Exec (except when tasklets are used)
63
    - implement BuildHooksEnv
64
    - redefine HPATH and HTYPE
65
    - optionally redefine their run requirements:
66
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
67

68
  Note that all commands require root permissions.
69

70
  @ivar dry_run_result: the value (if any) that will be returned to the caller
71
      in dry-run mode (signalled by opcode dry_run parameter)
72

73
  """
74
  HPATH = None
75
  HTYPE = None
76
  _OP_REQP = []
77
  REQ_BGL = True
78

    
79
  def __init__(self, processor, op, context, rpc):
80
    """Constructor for LogicalUnit.
81

82
    This needs to be overridden in derived classes in order to check op
83
    validity.
84

85
    """
86
    self.proc = processor
87
    self.op = op
88
    self.cfg = context.cfg
89
    self.context = context
90
    self.rpc = rpc
91
    # Dicts used to declare locking needs to mcpu
92
    self.needed_locks = None
93
    self.acquired_locks = {}
94
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
95
    self.add_locks = {}
96
    self.remove_locks = {}
97
    # Used to force good behavior when calling helper functions
98
    self.recalculate_locks = {}
99
    self.__ssh = None
100
    # logging
101
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
102
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
103
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
104
    # support for dry-run
105
    self.dry_run_result = None
106
    # support for generic debug attribute
107
    if (not hasattr(self.op, "debug_level") or
108
        not isinstance(self.op.debug_level, int)):
109
      self.op.debug_level = 0
110

    
111
    # Tasklets
112
    self.tasklets = None
113

    
114
    for attr_name in self._OP_REQP:
115
      attr_val = getattr(op, attr_name, None)
116
      if attr_val is None:
117
        raise errors.OpPrereqError("Required parameter '%s' missing" %
118
                                   attr_name, errors.ECODE_INVAL)
119

    
120
    self.CheckArguments()
121

    
122
  def __GetSSH(self):
123
    """Returns the SshRunner object
124

125
    """
126
    if not self.__ssh:
127
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
128
    return self.__ssh
129

    
130
  ssh = property(fget=__GetSSH)
131

    
132
  def CheckArguments(self):
133
    """Check syntactic validity for the opcode arguments.
134

135
    This method is for doing a simple syntactic check and ensure
136
    validity of opcode parameters, without any cluster-related
137
    checks. While the same can be accomplished in ExpandNames and/or
138
    CheckPrereq, doing these separate is better because:
139

140
      - ExpandNames is left as as purely a lock-related function
141
      - CheckPrereq is run after we have acquired locks (and possible
142
        waited for them)
143

144
    The function is allowed to change the self.op attribute so that
145
    later methods can no longer worry about missing parameters.
146

147
    """
148
    pass
149

    
150
  def ExpandNames(self):
151
    """Expand names for this LU.
152

153
    This method is called before starting to execute the opcode, and it should
154
    update all the parameters of the opcode to their canonical form (e.g. a
155
    short node name must be fully expanded after this method has successfully
156
    completed). This way locking, hooks, logging, ecc. can work correctly.
157

158
    LUs which implement this method must also populate the self.needed_locks
159
    member, as a dict with lock levels as keys, and a list of needed lock names
160
    as values. Rules:
161

162
      - use an empty dict if you don't need any lock
163
      - if you don't need any lock at a particular level omit that level
164
      - don't put anything for the BGL level
165
      - if you want all locks at a level use locking.ALL_SET as a value
166

167
    If you need to share locks (rather than acquire them exclusively) at one
168
    level you can modify self.share_locks, setting a true value (usually 1) for
169
    that level. By default locks are not shared.
170

171
    This function can also define a list of tasklets, which then will be
172
    executed in order instead of the usual LU-level CheckPrereq and Exec
173
    functions, if those are not defined by the LU.
174

175
    Examples::
176

177
      # Acquire all nodes and one instance
178
      self.needed_locks = {
179
        locking.LEVEL_NODE: locking.ALL_SET,
180
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
181
      }
182
      # Acquire just two nodes
183
      self.needed_locks = {
184
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
185
      }
186
      # Acquire no locks
187
      self.needed_locks = {} # No, you can't leave it to the default value None
188

189
    """
190
    # The implementation of this method is mandatory only if the new LU is
191
    # concurrent, so that old LUs don't need to be changed all at the same
192
    # time.
193
    if self.REQ_BGL:
194
      self.needed_locks = {} # Exclusive LUs don't need locks.
195
    else:
196
      raise NotImplementedError
197

    
198
  def DeclareLocks(self, level):
199
    """Declare LU locking needs for a level
200

201
    While most LUs can just declare their locking needs at ExpandNames time,
202
    sometimes there's the need to calculate some locks after having acquired
203
    the ones before. This function is called just before acquiring locks at a
204
    particular level, but after acquiring the ones at lower levels, and permits
205
    such calculations. It can be used to modify self.needed_locks, and by
206
    default it does nothing.
207

208
    This function is only called if you have something already set in
209
    self.needed_locks for the level.
210

211
    @param level: Locking level which is going to be locked
212
    @type level: member of ganeti.locking.LEVELS
213

214
    """
215

    
216
  def CheckPrereq(self):
217
    """Check prerequisites for this LU.
218

219
    This method should check that the prerequisites for the execution
220
    of this LU are fulfilled. It can do internode communication, but
221
    it should be idempotent - no cluster or system changes are
222
    allowed.
223

224
    The method should raise errors.OpPrereqError in case something is
225
    not fulfilled. Its return value is ignored.
226

227
    This method should also update all the parameters of the opcode to
228
    their canonical form if it hasn't been done by ExpandNames before.
229

230
    """
231
    if self.tasklets is not None:
232
      for (idx, tl) in enumerate(self.tasklets):
233
        logging.debug("Checking prerequisites for tasklet %s/%s",
234
                      idx + 1, len(self.tasklets))
235
        tl.CheckPrereq()
236
    else:
237
      raise NotImplementedError
238

    
239
  def Exec(self, feedback_fn):
240
    """Execute the LU.
241

242
    This method should implement the actual work. It should raise
243
    errors.OpExecError for failures that are somewhat dealt with in
244
    code, or expected.
245

246
    """
247
    if self.tasklets is not None:
248
      for (idx, tl) in enumerate(self.tasklets):
249
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
250
        tl.Exec(feedback_fn)
251
    else:
252
      raise NotImplementedError
253

    
254
  def BuildHooksEnv(self):
255
    """Build hooks environment for this LU.
256

257
    This method should return a three-node tuple consisting of: a dict
258
    containing the environment that will be used for running the
259
    specific hook for this LU, a list of node names on which the hook
260
    should run before the execution, and a list of node names on which
261
    the hook should run after the execution.
262

263
    The keys of the dict must not have 'GANETI_' prefixed as this will
264
    be handled in the hooks runner. Also note additional keys will be
265
    added by the hooks runner. If the LU doesn't define any
266
    environment, an empty dict (and not None) should be returned.
267

268
    No nodes should be returned as an empty list (and not None).
269

270
    Note that if the HPATH for a LU class is None, this function will
271
    not be called.
272

273
    """
274
    raise NotImplementedError
275

    
276
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
277
    """Notify the LU about the results of its hooks.
278

279
    This method is called every time a hooks phase is executed, and notifies
280
    the Logical Unit about the hooks' result. The LU can then use it to alter
281
    its result based on the hooks.  By default the method does nothing and the
282
    previous result is passed back unchanged but any LU can define it if it
283
    wants to use the local cluster hook-scripts somehow.
284

285
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
286
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
287
    @param hook_results: the results of the multi-node hooks rpc call
288
    @param feedback_fn: function used send feedback back to the caller
289
    @param lu_result: the previous Exec result this LU had, or None
290
        in the PRE phase
291
    @return: the new Exec result, based on the previous result
292
        and hook results
293

294
    """
295
    # API must be kept, thus we ignore the unused argument and could
296
    # be a function warnings
297
    # pylint: disable-msg=W0613,R0201
298
    return lu_result
299

    
300
  def _ExpandAndLockInstance(self):
301
    """Helper function to expand and lock an instance.
302

303
    Many LUs that work on an instance take its name in self.op.instance_name
304
    and need to expand it and then declare the expanded name for locking. This
305
    function does it, and then updates self.op.instance_name to the expanded
306
    name. It also initializes needed_locks as a dict, if this hasn't been done
307
    before.
308

309
    """
310
    if self.needed_locks is None:
311
      self.needed_locks = {}
312
    else:
313
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
314
        "_ExpandAndLockInstance called with instance-level locks set"
315
    self.op.instance_name = _ExpandInstanceName(self.cfg,
316
                                                self.op.instance_name)
317
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
318

    
319
  def _LockInstancesNodes(self, primary_only=False):
320
    """Helper function to declare instances' nodes for locking.
321

322
    This function should be called after locking one or more instances to lock
323
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
324
    with all primary or secondary nodes for instances already locked and
325
    present in self.needed_locks[locking.LEVEL_INSTANCE].
326

327
    It should be called from DeclareLocks, and for safety only works if
328
    self.recalculate_locks[locking.LEVEL_NODE] is set.
329

330
    In the future it may grow parameters to just lock some instance's nodes, or
331
    to just lock primaries or secondary nodes, if needed.
332

333
    If should be called in DeclareLocks in a way similar to::
334

335
      if level == locking.LEVEL_NODE:
336
        self._LockInstancesNodes()
337

338
    @type primary_only: boolean
339
    @param primary_only: only lock primary nodes of locked instances
340

341
    """
342
    assert locking.LEVEL_NODE in self.recalculate_locks, \
343
      "_LockInstancesNodes helper function called with no nodes to recalculate"
344

    
345
    # TODO: check if we're really been called with the instance locks held
346

    
347
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
348
    # future we might want to have different behaviors depending on the value
349
    # of self.recalculate_locks[locking.LEVEL_NODE]
350
    wanted_nodes = []
351
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
352
      instance = self.context.cfg.GetInstanceInfo(instance_name)
353
      wanted_nodes.append(instance.primary_node)
354
      if not primary_only:
355
        wanted_nodes.extend(instance.secondary_nodes)
356

    
357
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
358
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
359
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
360
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
361

    
362
    del self.recalculate_locks[locking.LEVEL_NODE]
363

    
364

    
365
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
366
  """Simple LU which runs no hooks.
367

368
  This LU is intended as a parent for other LogicalUnits which will
369
  run no hooks, in order to reduce duplicate code.
370

371
  """
372
  HPATH = None
373
  HTYPE = None
374

    
375
  def BuildHooksEnv(self):
376
    """Empty BuildHooksEnv for NoHooksLu.
377

378
    This just raises an error.
379

380
    """
381
    assert False, "BuildHooksEnv called for NoHooksLUs"
382

    
383

    
384
class Tasklet:
385
  """Tasklet base class.
386

387
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
388
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
389
  tasklets know nothing about locks.
390

391
  Subclasses must follow these rules:
392
    - Implement CheckPrereq
393
    - Implement Exec
394

395
  """
396
  def __init__(self, lu):
397
    self.lu = lu
398

    
399
    # Shortcuts
400
    self.cfg = lu.cfg
401
    self.rpc = lu.rpc
402

    
403
  def CheckPrereq(self):
404
    """Check prerequisites for this tasklets.
405

406
    This method should check whether the prerequisites for the execution of
407
    this tasklet are fulfilled. It can do internode communication, but it
408
    should be idempotent - no cluster or system changes are allowed.
409

410
    The method should raise errors.OpPrereqError in case something is not
411
    fulfilled. Its return value is ignored.
412

413
    This method should also update all parameters to their canonical form if it
414
    hasn't been done before.
415

416
    """
417
    raise NotImplementedError
418

    
419
  def Exec(self, feedback_fn):
420
    """Execute the tasklet.
421

422
    This method should implement the actual work. It should raise
423
    errors.OpExecError for failures that are somewhat dealt with in code, or
424
    expected.
425

426
    """
427
    raise NotImplementedError
428

    
429

    
430
def _GetWantedNodes(lu, nodes):
431
  """Returns list of checked and expanded node names.
432

433
  @type lu: L{LogicalUnit}
434
  @param lu: the logical unit on whose behalf we execute
435
  @type nodes: list
436
  @param nodes: list of node names or None for all nodes
437
  @rtype: list
438
  @return: the list of nodes, sorted
439
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
440

441
  """
442
  if not isinstance(nodes, list):
443
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
444
                               errors.ECODE_INVAL)
445

    
446
  if not nodes:
447
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
448
      " non-empty list of nodes whose name is to be expanded.")
449

    
450
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
451
  return utils.NiceSort(wanted)
452

    
453

    
454
def _GetWantedInstances(lu, instances):
455
  """Returns list of checked and expanded instance names.
456

457
  @type lu: L{LogicalUnit}
458
  @param lu: the logical unit on whose behalf we execute
459
  @type instances: list
460
  @param instances: list of instance names or None for all instances
461
  @rtype: list
462
  @return: the list of instances, sorted
463
  @raise errors.OpPrereqError: if the instances parameter is wrong type
464
  @raise errors.OpPrereqError: if any of the passed instances is not found
465

466
  """
467
  if not isinstance(instances, list):
468
    raise errors.OpPrereqError("Invalid argument type 'instances'",
469
                               errors.ECODE_INVAL)
470

    
471
  if instances:
472
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
473
  else:
474
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
475
  return wanted
476

    
477

    
478
def _GetUpdatedParams(old_params, update_dict,
479
                      use_default=True, use_none=False):
480
  """Return the new version of a parameter dictionary.
481

482
  @type old_params: dict
483
  @param old_params: old parameters
484
  @type update_dict: dict
485
  @param update_dict: dict containing new parameter values, or
486
      constants.VALUE_DEFAULT to reset the parameter to its default
487
      value
488
  @param use_default: boolean
489
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
490
      values as 'to be deleted' values
491
  @param use_none: boolean
492
  @type use_none: whether to recognise C{None} values as 'to be
493
      deleted' values
494
  @rtype: dict
495
  @return: the new parameter dictionary
496

497
  """
498
  params_copy = copy.deepcopy(old_params)
499
  for key, val in update_dict.iteritems():
500
    if ((use_default and val == constants.VALUE_DEFAULT) or
501
        (use_none and val is None)):
502
      try:
503
        del params_copy[key]
504
      except KeyError:
505
        pass
506
    else:
507
      params_copy[key] = val
508
  return params_copy
509

    
510

    
511
def _CheckOutputFields(static, dynamic, selected):
512
  """Checks whether all selected fields are valid.
513

514
  @type static: L{utils.FieldSet}
515
  @param static: static fields set
516
  @type dynamic: L{utils.FieldSet}
517
  @param dynamic: dynamic fields set
518

519
  """
520
  f = utils.FieldSet()
521
  f.Extend(static)
522
  f.Extend(dynamic)
523

    
524
  delta = f.NonMatching(selected)
525
  if delta:
526
    raise errors.OpPrereqError("Unknown output fields selected: %s"
527
                               % ",".join(delta), errors.ECODE_INVAL)
528

    
529

    
530
def _CheckBooleanOpField(op, name):
531
  """Validates boolean opcode parameters.
532

533
  This will ensure that an opcode parameter is either a boolean value,
534
  or None (but that it always exists).
535

536
  """
537
  val = getattr(op, name, None)
538
  if not (val is None or isinstance(val, bool)):
539
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
540
                               (name, str(val)), errors.ECODE_INVAL)
541
  setattr(op, name, val)
542

    
543

    
544
def _CheckGlobalHvParams(params):
545
  """Validates that given hypervisor params are not global ones.
546

547
  This will ensure that instances don't get customised versions of
548
  global params.
549

550
  """
551
  used_globals = constants.HVC_GLOBALS.intersection(params)
552
  if used_globals:
553
    msg = ("The following hypervisor parameters are global and cannot"
554
           " be customized at instance level, please modify them at"
555
           " cluster level: %s" % utils.CommaJoin(used_globals))
556
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
557

    
558

    
559
def _CheckNodeOnline(lu, node):
560
  """Ensure that a given node is online.
561

562
  @param lu: the LU on behalf of which we make the check
563
  @param node: the node to check
564
  @raise errors.OpPrereqError: if the node is offline
565

566
  """
567
  if lu.cfg.GetNodeInfo(node).offline:
568
    raise errors.OpPrereqError("Can't use offline node %s" % node,
569
                               errors.ECODE_INVAL)
570

    
571

    
572
def _CheckNodeNotDrained(lu, node):
573
  """Ensure that a given node is not drained.
574

575
  @param lu: the LU on behalf of which we make the check
576
  @param node: the node to check
577
  @raise errors.OpPrereqError: if the node is drained
578

579
  """
580
  if lu.cfg.GetNodeInfo(node).drained:
581
    raise errors.OpPrereqError("Can't use drained node %s" % node,
582
                               errors.ECODE_INVAL)
583

    
584

    
585
def _CheckNodeHasOS(lu, node, os_name, force_variant):
586
  """Ensure that a node supports a given OS.
587

588
  @param lu: the LU on behalf of which we make the check
589
  @param node: the node to check
590
  @param os_name: the OS to query about
591
  @param force_variant: whether to ignore variant errors
592
  @raise errors.OpPrereqError: if the node is not supporting the OS
593

594
  """
595
  result = lu.rpc.call_os_get(node, os_name)
596
  result.Raise("OS '%s' not in supported OS list for node %s" %
597
               (os_name, node),
598
               prereq=True, ecode=errors.ECODE_INVAL)
599
  if not force_variant:
600
    _CheckOSVariant(result.payload, os_name)
601

    
602

    
603
def _RequireFileStorage():
604
  """Checks that file storage is enabled.
605

606
  @raise errors.OpPrereqError: when file storage is disabled
607

608
  """
609
  if not constants.ENABLE_FILE_STORAGE:
610
    raise errors.OpPrereqError("File storage disabled at configure time",
611
                               errors.ECODE_INVAL)
612

    
613

    
614
def _CheckDiskTemplate(template):
615
  """Ensure a given disk template is valid.
616

617
  """
618
  if template not in constants.DISK_TEMPLATES:
619
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
620
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
621
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
622
  if template == constants.DT_FILE:
623
    _RequireFileStorage()
624

    
625

    
626
def _CheckStorageType(storage_type):
627
  """Ensure a given storage type is valid.
628

629
  """
630
  if storage_type not in constants.VALID_STORAGE_TYPES:
631
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
632
                               errors.ECODE_INVAL)
633
  if storage_type == constants.ST_FILE:
634
    _RequireFileStorage()
635

    
636

    
637
def _GetClusterDomainSecret():
638
  """Reads the cluster domain secret.
639

640
  """
641
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
642
                               strict=True)
643

    
644

    
645
def _CheckInstanceDown(lu, instance, reason):
646
  """Ensure that an instance is not running."""
647
  if instance.admin_up:
648
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
649
                               (instance.name, reason), errors.ECODE_STATE)
650

    
651
  pnode = instance.primary_node
652
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
653
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
654
              prereq=True, ecode=errors.ECODE_ENVIRON)
655

    
656
  if instance.name in ins_l.payload:
657
    raise errors.OpPrereqError("Instance %s is running, %s" %
658
                               (instance.name, reason), errors.ECODE_STATE)
659

    
660

    
661
def _ExpandItemName(fn, name, kind):
662
  """Expand an item name.
663

664
  @param fn: the function to use for expansion
665
  @param name: requested item name
666
  @param kind: text description ('Node' or 'Instance')
667
  @return: the resolved (full) name
668
  @raise errors.OpPrereqError: if the item is not found
669

670
  """
671
  full_name = fn(name)
672
  if full_name is None:
673
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
674
                               errors.ECODE_NOENT)
675
  return full_name
676

    
677

    
678
def _ExpandNodeName(cfg, name):
679
  """Wrapper over L{_ExpandItemName} for nodes."""
680
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
681

    
682

    
683
def _ExpandInstanceName(cfg, name):
684
  """Wrapper over L{_ExpandItemName} for instance."""
685
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
686

    
687

    
688
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
689
                          memory, vcpus, nics, disk_template, disks,
690
                          bep, hvp, hypervisor_name):
691
  """Builds instance related env variables for hooks
692

693
  This builds the hook environment from individual variables.
694

695
  @type name: string
696
  @param name: the name of the instance
697
  @type primary_node: string
698
  @param primary_node: the name of the instance's primary node
699
  @type secondary_nodes: list
700
  @param secondary_nodes: list of secondary nodes as strings
701
  @type os_type: string
702
  @param os_type: the name of the instance's OS
703
  @type status: boolean
704
  @param status: the should_run status of the instance
705
  @type memory: string
706
  @param memory: the memory size of the instance
707
  @type vcpus: string
708
  @param vcpus: the count of VCPUs the instance has
709
  @type nics: list
710
  @param nics: list of tuples (ip, mac, mode, link) representing
711
      the NICs the instance has
712
  @type disk_template: string
713
  @param disk_template: the disk template of the instance
714
  @type disks: list
715
  @param disks: the list of (size, mode) pairs
716
  @type bep: dict
717
  @param bep: the backend parameters for the instance
718
  @type hvp: dict
719
  @param hvp: the hypervisor parameters for the instance
720
  @type hypervisor_name: string
721
  @param hypervisor_name: the hypervisor for the instance
722
  @rtype: dict
723
  @return: the hook environment for this instance
724

725
  """
726
  if status:
727
    str_status = "up"
728
  else:
729
    str_status = "down"
730
  env = {
731
    "OP_TARGET": name,
732
    "INSTANCE_NAME": name,
733
    "INSTANCE_PRIMARY": primary_node,
734
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
735
    "INSTANCE_OS_TYPE": os_type,
736
    "INSTANCE_STATUS": str_status,
737
    "INSTANCE_MEMORY": memory,
738
    "INSTANCE_VCPUS": vcpus,
739
    "INSTANCE_DISK_TEMPLATE": disk_template,
740
    "INSTANCE_HYPERVISOR": hypervisor_name,
741
  }
742

    
743
  if nics:
744
    nic_count = len(nics)
745
    for idx, (ip, mac, mode, link) in enumerate(nics):
746
      if ip is None:
747
        ip = ""
748
      env["INSTANCE_NIC%d_IP" % idx] = ip
749
      env["INSTANCE_NIC%d_MAC" % idx] = mac
750
      env["INSTANCE_NIC%d_MODE" % idx] = mode
751
      env["INSTANCE_NIC%d_LINK" % idx] = link
752
      if mode == constants.NIC_MODE_BRIDGED:
753
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
754
  else:
755
    nic_count = 0
756

    
757
  env["INSTANCE_NIC_COUNT"] = nic_count
758

    
759
  if disks:
760
    disk_count = len(disks)
761
    for idx, (size, mode) in enumerate(disks):
762
      env["INSTANCE_DISK%d_SIZE" % idx] = size
763
      env["INSTANCE_DISK%d_MODE" % idx] = mode
764
  else:
765
    disk_count = 0
766

    
767
  env["INSTANCE_DISK_COUNT"] = disk_count
768

    
769
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
770
    for key, value in source.items():
771
      env["INSTANCE_%s_%s" % (kind, key)] = value
772

    
773
  return env
774

    
775

    
776
def _NICListToTuple(lu, nics):
777
  """Build a list of nic information tuples.
778

779
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
780
  value in LUQueryInstanceData.
781

782
  @type lu:  L{LogicalUnit}
783
  @param lu: the logical unit on whose behalf we execute
784
  @type nics: list of L{objects.NIC}
785
  @param nics: list of nics to convert to hooks tuples
786

787
  """
788
  hooks_nics = []
789
  cluster = lu.cfg.GetClusterInfo()
790
  for nic in nics:
791
    ip = nic.ip
792
    mac = nic.mac
793
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
794
    mode = filled_params[constants.NIC_MODE]
795
    link = filled_params[constants.NIC_LINK]
796
    hooks_nics.append((ip, mac, mode, link))
797
  return hooks_nics
798

    
799

    
800
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
801
  """Builds instance related env variables for hooks from an object.
802

803
  @type lu: L{LogicalUnit}
804
  @param lu: the logical unit on whose behalf we execute
805
  @type instance: L{objects.Instance}
806
  @param instance: the instance for which we should build the
807
      environment
808
  @type override: dict
809
  @param override: dictionary with key/values that will override
810
      our values
811
  @rtype: dict
812
  @return: the hook environment dictionary
813

814
  """
815
  cluster = lu.cfg.GetClusterInfo()
816
  bep = cluster.FillBE(instance)
817
  hvp = cluster.FillHV(instance)
818
  args = {
819
    'name': instance.name,
820
    'primary_node': instance.primary_node,
821
    'secondary_nodes': instance.secondary_nodes,
822
    'os_type': instance.os,
823
    'status': instance.admin_up,
824
    'memory': bep[constants.BE_MEMORY],
825
    'vcpus': bep[constants.BE_VCPUS],
826
    'nics': _NICListToTuple(lu, instance.nics),
827
    'disk_template': instance.disk_template,
828
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
829
    'bep': bep,
830
    'hvp': hvp,
831
    'hypervisor_name': instance.hypervisor,
832
  }
833
  if override:
834
    args.update(override)
835
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
836

    
837

    
838
def _AdjustCandidatePool(lu, exceptions):
839
  """Adjust the candidate pool after node operations.
840

841
  """
842
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
843
  if mod_list:
844
    lu.LogInfo("Promoted nodes to master candidate role: %s",
845
               utils.CommaJoin(node.name for node in mod_list))
846
    for name in mod_list:
847
      lu.context.ReaddNode(name)
848
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
849
  if mc_now > mc_max:
850
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
851
               (mc_now, mc_max))
852

    
853

    
854
def _DecideSelfPromotion(lu, exceptions=None):
855
  """Decide whether I should promote myself as a master candidate.
856

857
  """
858
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
859
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
860
  # the new node will increase mc_max with one, so:
861
  mc_should = min(mc_should + 1, cp_size)
862
  return mc_now < mc_should
863

    
864

    
865
def _CheckNicsBridgesExist(lu, target_nics, target_node):
866
  """Check that the brigdes needed by a list of nics exist.
867

868
  """
869
  cluster = lu.cfg.GetClusterInfo()
870
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
871
  brlist = [params[constants.NIC_LINK] for params in paramslist
872
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
873
  if brlist:
874
    result = lu.rpc.call_bridges_exist(target_node, brlist)
875
    result.Raise("Error checking bridges on destination node '%s'" %
876
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
877

    
878

    
879
def _CheckInstanceBridgesExist(lu, instance, node=None):
880
  """Check that the brigdes needed by an instance exist.
881

882
  """
883
  if node is None:
884
    node = instance.primary_node
885
  _CheckNicsBridgesExist(lu, instance.nics, node)
886

    
887

    
888
def _CheckOSVariant(os_obj, name):
889
  """Check whether an OS name conforms to the os variants specification.
890

891
  @type os_obj: L{objects.OS}
892
  @param os_obj: OS object to check
893
  @type name: string
894
  @param name: OS name passed by the user, to check for validity
895

896
  """
897
  if not os_obj.supported_variants:
898
    return
899
  try:
900
    variant = name.split("+", 1)[1]
901
  except IndexError:
902
    raise errors.OpPrereqError("OS name must include a variant",
903
                               errors.ECODE_INVAL)
904

    
905
  if variant not in os_obj.supported_variants:
906
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
907

    
908

    
909
def _GetNodeInstancesInner(cfg, fn):
910
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
911

    
912

    
913
def _GetNodeInstances(cfg, node_name):
914
  """Returns a list of all primary and secondary instances on a node.
915

916
  """
917

    
918
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
919

    
920

    
921
def _GetNodePrimaryInstances(cfg, node_name):
922
  """Returns primary instances on a node.
923

924
  """
925
  return _GetNodeInstancesInner(cfg,
926
                                lambda inst: node_name == inst.primary_node)
927

    
928

    
929
def _GetNodeSecondaryInstances(cfg, node_name):
930
  """Returns secondary instances on a node.
931

932
  """
933
  return _GetNodeInstancesInner(cfg,
934
                                lambda inst: node_name in inst.secondary_nodes)
935

    
936

    
937
def _GetStorageTypeArgs(cfg, storage_type):
938
  """Returns the arguments for a storage type.
939

940
  """
941
  # Special case for file storage
942
  if storage_type == constants.ST_FILE:
943
    # storage.FileStorage wants a list of storage directories
944
    return [[cfg.GetFileStorageDir()]]
945

    
946
  return []
947

    
948

    
949
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
950
  faulty = []
951

    
952
  for dev in instance.disks:
953
    cfg.SetDiskID(dev, node_name)
954

    
955
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
956
  result.Raise("Failed to get disk status from node %s" % node_name,
957
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
958

    
959
  for idx, bdev_status in enumerate(result.payload):
960
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
961
      faulty.append(idx)
962

    
963
  return faulty
964

    
965

    
966
class LUPostInitCluster(LogicalUnit):
967
  """Logical unit for running hooks after cluster initialization.
968

969
  """
970
  HPATH = "cluster-init"
971
  HTYPE = constants.HTYPE_CLUSTER
972
  _OP_REQP = []
973

    
974
  def BuildHooksEnv(self):
975
    """Build hooks env.
976

977
    """
978
    env = {"OP_TARGET": self.cfg.GetClusterName()}
979
    mn = self.cfg.GetMasterNode()
980
    return env, [], [mn]
981

    
982
  def CheckPrereq(self):
983
    """No prerequisites to check.
984

985
    """
986
    return True
987

    
988
  def Exec(self, feedback_fn):
989
    """Nothing to do.
990

991
    """
992
    return True
993

    
994

    
995
class LUDestroyCluster(LogicalUnit):
996
  """Logical unit for destroying the cluster.
997

998
  """
999
  HPATH = "cluster-destroy"
1000
  HTYPE = constants.HTYPE_CLUSTER
1001
  _OP_REQP = []
1002

    
1003
  def BuildHooksEnv(self):
1004
    """Build hooks env.
1005

1006
    """
1007
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1008
    return env, [], []
1009

    
1010
  def CheckPrereq(self):
1011
    """Check prerequisites.
1012

1013
    This checks whether the cluster is empty.
1014

1015
    Any errors are signaled by raising errors.OpPrereqError.
1016

1017
    """
1018
    master = self.cfg.GetMasterNode()
1019

    
1020
    nodelist = self.cfg.GetNodeList()
1021
    if len(nodelist) != 1 or nodelist[0] != master:
1022
      raise errors.OpPrereqError("There are still %d node(s) in"
1023
                                 " this cluster." % (len(nodelist) - 1),
1024
                                 errors.ECODE_INVAL)
1025
    instancelist = self.cfg.GetInstanceList()
1026
    if instancelist:
1027
      raise errors.OpPrereqError("There are still %d instance(s) in"
1028
                                 " this cluster." % len(instancelist),
1029
                                 errors.ECODE_INVAL)
1030

    
1031
  def Exec(self, feedback_fn):
1032
    """Destroys the cluster.
1033

1034
    """
1035
    master = self.cfg.GetMasterNode()
1036
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1037

    
1038
    # Run post hooks on master node before it's removed
1039
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1040
    try:
1041
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1042
    except:
1043
      # pylint: disable-msg=W0702
1044
      self.LogWarning("Errors occurred running hooks on %s" % master)
1045

    
1046
    result = self.rpc.call_node_stop_master(master, False)
1047
    result.Raise("Could not disable the master role")
1048

    
1049
    if modify_ssh_setup:
1050
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1051
      utils.CreateBackup(priv_key)
1052
      utils.CreateBackup(pub_key)
1053

    
1054
    return master
1055

    
1056

    
1057
def _VerifyCertificate(filename):
1058
  """Verifies a certificate for LUVerifyCluster.
1059

1060
  @type filename: string
1061
  @param filename: Path to PEM file
1062

1063
  """
1064
  try:
1065
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1066
                                           utils.ReadFile(filename))
1067
  except Exception, err: # pylint: disable-msg=W0703
1068
    return (LUVerifyCluster.ETYPE_ERROR,
1069
            "Failed to load X509 certificate %s: %s" % (filename, err))
1070

    
1071
  (errcode, msg) = \
1072
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1073
                                constants.SSL_CERT_EXPIRATION_ERROR)
1074

    
1075
  if msg:
1076
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1077
  else:
1078
    fnamemsg = None
1079

    
1080
  if errcode is None:
1081
    return (None, fnamemsg)
1082
  elif errcode == utils.CERT_WARNING:
1083
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1084
  elif errcode == utils.CERT_ERROR:
1085
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1086

    
1087
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1088

    
1089

    
1090
class LUVerifyCluster(LogicalUnit):
1091
  """Verifies the cluster status.
1092

1093
  """
1094
  HPATH = "cluster-verify"
1095
  HTYPE = constants.HTYPE_CLUSTER
1096
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1097
  REQ_BGL = False
1098

    
1099
  TCLUSTER = "cluster"
1100
  TNODE = "node"
1101
  TINSTANCE = "instance"
1102

    
1103
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1104
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1105
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1106
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1107
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1108
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1109
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1110
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1111
  ENODEDRBD = (TNODE, "ENODEDRBD")
1112
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1113
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1114
  ENODEHV = (TNODE, "ENODEHV")
1115
  ENODELVM = (TNODE, "ENODELVM")
1116
  ENODEN1 = (TNODE, "ENODEN1")
1117
  ENODENET = (TNODE, "ENODENET")
1118
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1119
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1120
  ENODERPC = (TNODE, "ENODERPC")
1121
  ENODESSH = (TNODE, "ENODESSH")
1122
  ENODEVERSION = (TNODE, "ENODEVERSION")
1123
  ENODESETUP = (TNODE, "ENODESETUP")
1124
  ENODETIME = (TNODE, "ENODETIME")
1125

    
1126
  ETYPE_FIELD = "code"
1127
  ETYPE_ERROR = "ERROR"
1128
  ETYPE_WARNING = "WARNING"
1129

    
1130
  class NodeImage(object):
1131
    """A class representing the logical and physical status of a node.
1132

1133
    @ivar volumes: a structure as returned from
1134
        L{ganeti.backend.GetVolumeList} (runtime)
1135
    @ivar instances: a list of running instances (runtime)
1136
    @ivar pinst: list of configured primary instances (config)
1137
    @ivar sinst: list of configured secondary instances (config)
1138
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1139
        of this node (config)
1140
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1141
    @ivar dfree: free disk, as reported by the node (runtime)
1142
    @ivar offline: the offline status (config)
1143
    @type rpc_fail: boolean
1144
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1145
        not whether the individual keys were correct) (runtime)
1146
    @type lvm_fail: boolean
1147
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1148
    @type hyp_fail: boolean
1149
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1150
    @type ghost: boolean
1151
    @ivar ghost: whether this is a known node or not (config)
1152

1153
    """
1154
    def __init__(self, offline=False):
1155
      self.volumes = {}
1156
      self.instances = []
1157
      self.pinst = []
1158
      self.sinst = []
1159
      self.sbp = {}
1160
      self.mfree = 0
1161
      self.dfree = 0
1162
      self.offline = offline
1163
      self.rpc_fail = False
1164
      self.lvm_fail = False
1165
      self.hyp_fail = False
1166
      self.ghost = False
1167

    
1168
  def ExpandNames(self):
1169
    self.needed_locks = {
1170
      locking.LEVEL_NODE: locking.ALL_SET,
1171
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1172
    }
1173
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1174

    
1175
  def _Error(self, ecode, item, msg, *args, **kwargs):
1176
    """Format an error message.
1177

1178
    Based on the opcode's error_codes parameter, either format a
1179
    parseable error code, or a simpler error string.
1180

1181
    This must be called only from Exec and functions called from Exec.
1182

1183
    """
1184
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1185
    itype, etxt = ecode
1186
    # first complete the msg
1187
    if args:
1188
      msg = msg % args
1189
    # then format the whole message
1190
    if self.op.error_codes:
1191
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1192
    else:
1193
      if item:
1194
        item = " " + item
1195
      else:
1196
        item = ""
1197
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1198
    # and finally report it via the feedback_fn
1199
    self._feedback_fn("  - %s" % msg)
1200

    
1201
  def _ErrorIf(self, cond, *args, **kwargs):
1202
    """Log an error message if the passed condition is True.
1203

1204
    """
1205
    cond = bool(cond) or self.op.debug_simulate_errors
1206
    if cond:
1207
      self._Error(*args, **kwargs)
1208
    # do not mark the operation as failed for WARN cases only
1209
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1210
      self.bad = self.bad or cond
1211

    
1212
  def _VerifyNode(self, ninfo, nresult):
1213
    """Run multiple tests against a node.
1214

1215
    Test list:
1216

1217
      - compares ganeti version
1218
      - checks vg existence and size > 20G
1219
      - checks config file checksum
1220
      - checks ssh to other nodes
1221

1222
    @type ninfo: L{objects.Node}
1223
    @param ninfo: the node to check
1224
    @param nresult: the results from the node
1225
    @rtype: boolean
1226
    @return: whether overall this call was successful (and we can expect
1227
         reasonable values in the respose)
1228

1229
    """
1230
    node = ninfo.name
1231
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1232

    
1233
    # main result, nresult should be a non-empty dict
1234
    test = not nresult or not isinstance(nresult, dict)
1235
    _ErrorIf(test, self.ENODERPC, node,
1236
                  "unable to verify node: no data returned")
1237
    if test:
1238
      return False
1239

    
1240
    # compares ganeti version
1241
    local_version = constants.PROTOCOL_VERSION
1242
    remote_version = nresult.get("version", None)
1243
    test = not (remote_version and
1244
                isinstance(remote_version, (list, tuple)) and
1245
                len(remote_version) == 2)
1246
    _ErrorIf(test, self.ENODERPC, node,
1247
             "connection to node returned invalid data")
1248
    if test:
1249
      return False
1250

    
1251
    test = local_version != remote_version[0]
1252
    _ErrorIf(test, self.ENODEVERSION, node,
1253
             "incompatible protocol versions: master %s,"
1254
             " node %s", local_version, remote_version[0])
1255
    if test:
1256
      return False
1257

    
1258
    # node seems compatible, we can actually try to look into its results
1259

    
1260
    # full package version
1261
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1262
                  self.ENODEVERSION, node,
1263
                  "software version mismatch: master %s, node %s",
1264
                  constants.RELEASE_VERSION, remote_version[1],
1265
                  code=self.ETYPE_WARNING)
1266

    
1267
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1268
    if isinstance(hyp_result, dict):
1269
      for hv_name, hv_result in hyp_result.iteritems():
1270
        test = hv_result is not None
1271
        _ErrorIf(test, self.ENODEHV, node,
1272
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1273

    
1274

    
1275
    test = nresult.get(constants.NV_NODESETUP,
1276
                           ["Missing NODESETUP results"])
1277
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1278
             "; ".join(test))
1279

    
1280
    return True
1281

    
1282
  def _VerifyNodeTime(self, ninfo, nresult,
1283
                      nvinfo_starttime, nvinfo_endtime):
1284
    """Check the node time.
1285

1286
    @type ninfo: L{objects.Node}
1287
    @param ninfo: the node to check
1288
    @param nresult: the remote results for the node
1289
    @param nvinfo_starttime: the start time of the RPC call
1290
    @param nvinfo_endtime: the end time of the RPC call
1291

1292
    """
1293
    node = ninfo.name
1294
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1295

    
1296
    ntime = nresult.get(constants.NV_TIME, None)
1297
    try:
1298
      ntime_merged = utils.MergeTime(ntime)
1299
    except (ValueError, TypeError):
1300
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1301
      return
1302

    
1303
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1304
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1305
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1306
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1307
    else:
1308
      ntime_diff = None
1309

    
1310
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1311
             "Node time diverges by at least %s from master node time",
1312
             ntime_diff)
1313

    
1314
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1315
    """Check the node time.
1316

1317
    @type ninfo: L{objects.Node}
1318
    @param ninfo: the node to check
1319
    @param nresult: the remote results for the node
1320
    @param vg_name: the configured VG name
1321

1322
    """
1323
    if vg_name is None:
1324
      return
1325

    
1326
    node = ninfo.name
1327
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1328

    
1329
    # checks vg existence and size > 20G
1330
    vglist = nresult.get(constants.NV_VGLIST, None)
1331
    test = not vglist
1332
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1333
    if not test:
1334
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1335
                                            constants.MIN_VG_SIZE)
1336
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1337

    
1338
    # check pv names
1339
    pvlist = nresult.get(constants.NV_PVLIST, None)
1340
    test = pvlist is None
1341
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1342
    if not test:
1343
      # check that ':' is not present in PV names, since it's a
1344
      # special character for lvcreate (denotes the range of PEs to
1345
      # use on the PV)
1346
      for _, pvname, owner_vg in pvlist:
1347
        test = ":" in pvname
1348
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1349
                 " '%s' of VG '%s'", pvname, owner_vg)
1350

    
1351
  def _VerifyNodeNetwork(self, ninfo, nresult):
1352
    """Check the node time.
1353

1354
    @type ninfo: L{objects.Node}
1355
    @param ninfo: the node to check
1356
    @param nresult: the remote results for the node
1357

1358
    """
1359
    node = ninfo.name
1360
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1361

    
1362
    test = constants.NV_NODELIST not in nresult
1363
    _ErrorIf(test, self.ENODESSH, node,
1364
             "node hasn't returned node ssh connectivity data")
1365
    if not test:
1366
      if nresult[constants.NV_NODELIST]:
1367
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1368
          _ErrorIf(True, self.ENODESSH, node,
1369
                   "ssh communication with node '%s': %s", a_node, a_msg)
1370

    
1371
    test = constants.NV_NODENETTEST not in nresult
1372
    _ErrorIf(test, self.ENODENET, node,
1373
             "node hasn't returned node tcp connectivity data")
1374
    if not test:
1375
      if nresult[constants.NV_NODENETTEST]:
1376
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1377
        for anode in nlist:
1378
          _ErrorIf(True, self.ENODENET, node,
1379
                   "tcp communication with node '%s': %s",
1380
                   anode, nresult[constants.NV_NODENETTEST][anode])
1381

    
1382
    test = constants.NV_MASTERIP not in nresult
1383
    _ErrorIf(test, self.ENODENET, node,
1384
             "node hasn't returned node master IP reachability data")
1385
    if not test:
1386
      if not nresult[constants.NV_MASTERIP]:
1387
        if node == self.master_node:
1388
          msg = "the master node cannot reach the master IP (not configured?)"
1389
        else:
1390
          msg = "cannot reach the master IP"
1391
        _ErrorIf(True, self.ENODENET, node, msg)
1392

    
1393

    
1394
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1395
    """Verify an instance.
1396

1397
    This function checks to see if the required block devices are
1398
    available on the instance's node.
1399

1400
    """
1401
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1402
    node_current = instanceconfig.primary_node
1403

    
1404
    node_vol_should = {}
1405
    instanceconfig.MapLVsByNode(node_vol_should)
1406

    
1407
    for node in node_vol_should:
1408
      n_img = node_image[node]
1409
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1410
        # ignore missing volumes on offline or broken nodes
1411
        continue
1412
      for volume in node_vol_should[node]:
1413
        test = volume not in n_img.volumes
1414
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1415
                 "volume %s missing on node %s", volume, node)
1416

    
1417
    if instanceconfig.admin_up:
1418
      pri_img = node_image[node_current]
1419
      test = instance not in pri_img.instances and not pri_img.offline
1420
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1421
               "instance not running on its primary node %s",
1422
               node_current)
1423

    
1424
    for node, n_img in node_image.items():
1425
      if (not node == node_current):
1426
        test = instance in n_img.instances
1427
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1428
                 "instance should not run on node %s", node)
1429

    
1430
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1431
    """Verify if there are any unknown volumes in the cluster.
1432

1433
    The .os, .swap and backup volumes are ignored. All other volumes are
1434
    reported as unknown.
1435

1436
    """
1437
    for node, n_img in node_image.items():
1438
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1439
        # skip non-healthy nodes
1440
        continue
1441
      for volume in n_img.volumes:
1442
        test = (node not in node_vol_should or
1443
                volume not in node_vol_should[node])
1444
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1445
                      "volume %s is unknown", volume)
1446

    
1447
  def _VerifyOrphanInstances(self, instancelist, node_image):
1448
    """Verify the list of running instances.
1449

1450
    This checks what instances are running but unknown to the cluster.
1451

1452
    """
1453
    for node, n_img in node_image.items():
1454
      for o_inst in n_img.instances:
1455
        test = o_inst not in instancelist
1456
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1457
                      "instance %s on node %s should not exist", o_inst, node)
1458

    
1459
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1460
    """Verify N+1 Memory Resilience.
1461

1462
    Check that if one single node dies we can still start all the
1463
    instances it was primary for.
1464

1465
    """
1466
    for node, n_img in node_image.items():
1467
      # This code checks that every node which is now listed as
1468
      # secondary has enough memory to host all instances it is
1469
      # supposed to should a single other node in the cluster fail.
1470
      # FIXME: not ready for failover to an arbitrary node
1471
      # FIXME: does not support file-backed instances
1472
      # WARNING: we currently take into account down instances as well
1473
      # as up ones, considering that even if they're down someone
1474
      # might want to start them even in the event of a node failure.
1475
      for prinode, instances in n_img.sbp.items():
1476
        needed_mem = 0
1477
        for instance in instances:
1478
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1479
          if bep[constants.BE_AUTO_BALANCE]:
1480
            needed_mem += bep[constants.BE_MEMORY]
1481
        test = n_img.mfree < needed_mem
1482
        self._ErrorIf(test, self.ENODEN1, node,
1483
                      "not enough memory on to accommodate"
1484
                      " failovers should peer node %s fail", prinode)
1485

    
1486
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1487
                       master_files):
1488
    """Verifies and computes the node required file checksums.
1489

1490
    @type ninfo: L{objects.Node}
1491
    @param ninfo: the node to check
1492
    @param nresult: the remote results for the node
1493
    @param file_list: required list of files
1494
    @param local_cksum: dictionary of local files and their checksums
1495
    @param master_files: list of files that only masters should have
1496

1497
    """
1498
    node = ninfo.name
1499
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1500

    
1501
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1502
    test = not isinstance(remote_cksum, dict)
1503
    _ErrorIf(test, self.ENODEFILECHECK, node,
1504
             "node hasn't returned file checksum data")
1505
    if test:
1506
      return
1507

    
1508
    for file_name in file_list:
1509
      node_is_mc = ninfo.master_candidate
1510
      must_have = (file_name not in master_files) or node_is_mc
1511
      # missing
1512
      test1 = file_name not in remote_cksum
1513
      # invalid checksum
1514
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1515
      # existing and good
1516
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1517
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1518
               "file '%s' missing", file_name)
1519
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1520
               "file '%s' has wrong checksum", file_name)
1521
      # not candidate and this is not a must-have file
1522
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1523
               "file '%s' should not exist on non master"
1524
               " candidates (and the file is outdated)", file_name)
1525
      # all good, except non-master/non-must have combination
1526
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1527
               "file '%s' should not exist"
1528
               " on non master candidates", file_name)
1529

    
1530
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1531
    """Verifies and the node DRBD status.
1532

1533
    @type ninfo: L{objects.Node}
1534
    @param ninfo: the node to check
1535
    @param nresult: the remote results for the node
1536
    @param instanceinfo: the dict of instances
1537
    @param drbd_map: the DRBD map as returned by
1538
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1539

1540
    """
1541
    node = ninfo.name
1542
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1543

    
1544
    # compute the DRBD minors
1545
    node_drbd = {}
1546
    for minor, instance in drbd_map[node].items():
1547
      test = instance not in instanceinfo
1548
      _ErrorIf(test, self.ECLUSTERCFG, None,
1549
               "ghost instance '%s' in temporary DRBD map", instance)
1550
        # ghost instance should not be running, but otherwise we
1551
        # don't give double warnings (both ghost instance and
1552
        # unallocated minor in use)
1553
      if test:
1554
        node_drbd[minor] = (instance, False)
1555
      else:
1556
        instance = instanceinfo[instance]
1557
        node_drbd[minor] = (instance.name, instance.admin_up)
1558

    
1559
    # and now check them
1560
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1561
    test = not isinstance(used_minors, (tuple, list))
1562
    _ErrorIf(test, self.ENODEDRBD, node,
1563
             "cannot parse drbd status file: %s", str(used_minors))
1564
    if test:
1565
      # we cannot check drbd status
1566
      return
1567

    
1568
    for minor, (iname, must_exist) in node_drbd.items():
1569
      test = minor not in used_minors and must_exist
1570
      _ErrorIf(test, self.ENODEDRBD, node,
1571
               "drbd minor %d of instance %s is not active", minor, iname)
1572
    for minor in used_minors:
1573
      test = minor not in node_drbd
1574
      _ErrorIf(test, self.ENODEDRBD, node,
1575
               "unallocated drbd minor %d is in use", minor)
1576

    
1577
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1578
    """Verifies and updates the node volume data.
1579

1580
    This function will update a L{NodeImage}'s internal structures
1581
    with data from the remote call.
1582

1583
    @type ninfo: L{objects.Node}
1584
    @param ninfo: the node to check
1585
    @param nresult: the remote results for the node
1586
    @param nimg: the node image object
1587
    @param vg_name: the configured VG name
1588

1589
    """
1590
    node = ninfo.name
1591
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1592

    
1593
    nimg.lvm_fail = True
1594
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1595
    if vg_name is None:
1596
      pass
1597
    elif isinstance(lvdata, basestring):
1598
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1599
               utils.SafeEncode(lvdata))
1600
    elif not isinstance(lvdata, dict):
1601
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1602
    else:
1603
      nimg.volumes = lvdata
1604
      nimg.lvm_fail = False
1605

    
1606
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1607
    """Verifies and updates the node instance list.
1608

1609
    If the listing was successful, then updates this node's instance
1610
    list. Otherwise, it marks the RPC call as failed for the instance
1611
    list key.
1612

1613
    @type ninfo: L{objects.Node}
1614
    @param ninfo: the node to check
1615
    @param nresult: the remote results for the node
1616
    @param nimg: the node image object
1617

1618
    """
1619
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1620
    test = not isinstance(idata, list)
1621
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1622
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1623
    if test:
1624
      nimg.hyp_fail = True
1625
    else:
1626
      nimg.instances = idata
1627

    
1628
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1629
    """Verifies and computes a node information map
1630

1631
    @type ninfo: L{objects.Node}
1632
    @param ninfo: the node to check
1633
    @param nresult: the remote results for the node
1634
    @param nimg: the node image object
1635
    @param vg_name: the configured VG name
1636

1637
    """
1638
    node = ninfo.name
1639
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1640

    
1641
    # try to read free memory (from the hypervisor)
1642
    hv_info = nresult.get(constants.NV_HVINFO, None)
1643
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1644
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1645
    if not test:
1646
      try:
1647
        nimg.mfree = int(hv_info["memory_free"])
1648
      except (ValueError, TypeError):
1649
        _ErrorIf(True, self.ENODERPC, node,
1650
                 "node returned invalid nodeinfo, check hypervisor")
1651

    
1652
    # FIXME: devise a free space model for file based instances as well
1653
    if vg_name is not None:
1654
      test = (constants.NV_VGLIST not in nresult or
1655
              vg_name not in nresult[constants.NV_VGLIST])
1656
      _ErrorIf(test, self.ENODELVM, node,
1657
               "node didn't return data for the volume group '%s'"
1658
               " - it is either missing or broken", vg_name)
1659
      if not test:
1660
        try:
1661
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1662
        except (ValueError, TypeError):
1663
          _ErrorIf(True, self.ENODERPC, node,
1664
                   "node returned invalid LVM info, check LVM status")
1665

    
1666
  def CheckPrereq(self):
1667
    """Check prerequisites.
1668

1669
    Transform the list of checks we're going to skip into a set and check that
1670
    all its members are valid.
1671

1672
    """
1673
    self.skip_set = frozenset(self.op.skip_checks)
1674
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1675
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1676
                                 errors.ECODE_INVAL)
1677

    
1678
  def BuildHooksEnv(self):
1679
    """Build hooks env.
1680

1681
    Cluster-Verify hooks just ran in the post phase and their failure makes
1682
    the output be logged in the verify output and the verification to fail.
1683

1684
    """
1685
    all_nodes = self.cfg.GetNodeList()
1686
    env = {
1687
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1688
      }
1689
    for node in self.cfg.GetAllNodesInfo().values():
1690
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1691

    
1692
    return env, [], all_nodes
1693

    
1694
  def Exec(self, feedback_fn):
1695
    """Verify integrity of cluster, performing various test on nodes.
1696

1697
    """
1698
    self.bad = False
1699
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1700
    verbose = self.op.verbose
1701
    self._feedback_fn = feedback_fn
1702
    feedback_fn("* Verifying global settings")
1703
    for msg in self.cfg.VerifyConfig():
1704
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1705

    
1706
    # Check the cluster certificates
1707
    for cert_filename in constants.ALL_CERT_FILES:
1708
      (errcode, msg) = _VerifyCertificate(cert_filename)
1709
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1710

    
1711
    vg_name = self.cfg.GetVGName()
1712
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1713
    cluster = self.cfg.GetClusterInfo()
1714
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1715
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1716
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1717
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1718
                        for iname in instancelist)
1719
    i_non_redundant = [] # Non redundant instances
1720
    i_non_a_balanced = [] # Non auto-balanced instances
1721
    n_offline = 0 # Count of offline nodes
1722
    n_drained = 0 # Count of nodes being drained
1723
    node_vol_should = {}
1724

    
1725
    # FIXME: verify OS list
1726
    # do local checksums
1727
    master_files = [constants.CLUSTER_CONF_FILE]
1728
    master_node = self.master_node = self.cfg.GetMasterNode()
1729
    master_ip = self.cfg.GetMasterIP()
1730

    
1731
    file_names = ssconf.SimpleStore().GetFileList()
1732
    file_names.extend(constants.ALL_CERT_FILES)
1733
    file_names.extend(master_files)
1734
    if cluster.modify_etc_hosts:
1735
      file_names.append(constants.ETC_HOSTS)
1736

    
1737
    local_checksums = utils.FingerprintFiles(file_names)
1738

    
1739
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1740
    node_verify_param = {
1741
      constants.NV_FILELIST: file_names,
1742
      constants.NV_NODELIST: [node.name for node in nodeinfo
1743
                              if not node.offline],
1744
      constants.NV_HYPERVISOR: hypervisors,
1745
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1746
                                  node.secondary_ip) for node in nodeinfo
1747
                                 if not node.offline],
1748
      constants.NV_INSTANCELIST: hypervisors,
1749
      constants.NV_VERSION: None,
1750
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1751
      constants.NV_NODESETUP: None,
1752
      constants.NV_TIME: None,
1753
      constants.NV_MASTERIP: (master_node, master_ip),
1754
      }
1755

    
1756
    if vg_name is not None:
1757
      node_verify_param[constants.NV_VGLIST] = None
1758
      node_verify_param[constants.NV_LVLIST] = vg_name
1759
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1760
      node_verify_param[constants.NV_DRBDLIST] = None
1761

    
1762
    # Build our expected cluster state
1763
    node_image = dict((node.name, self.NodeImage(offline=node.offline))
1764
                      for node in nodeinfo)
1765

    
1766
    for instance in instancelist:
1767
      inst_config = instanceinfo[instance]
1768

    
1769
      for nname in inst_config.all_nodes:
1770
        if nname not in node_image:
1771
          # ghost node
1772
          gnode = self.NodeImage()
1773
          gnode.ghost = True
1774
          node_image[nname] = gnode
1775

    
1776
      inst_config.MapLVsByNode(node_vol_should)
1777

    
1778
      pnode = inst_config.primary_node
1779
      node_image[pnode].pinst.append(instance)
1780

    
1781
      for snode in inst_config.secondary_nodes:
1782
        nimg = node_image[snode]
1783
        nimg.sinst.append(instance)
1784
        if pnode not in nimg.sbp:
1785
          nimg.sbp[pnode] = []
1786
        nimg.sbp[pnode].append(instance)
1787

    
1788
    # At this point, we have the in-memory data structures complete,
1789
    # except for the runtime information, which we'll gather next
1790

    
1791
    # Due to the way our RPC system works, exact response times cannot be
1792
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1793
    # time before and after executing the request, we can at least have a time
1794
    # window.
1795
    nvinfo_starttime = time.time()
1796
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1797
                                           self.cfg.GetClusterName())
1798
    nvinfo_endtime = time.time()
1799

    
1800
    all_drbd_map = self.cfg.ComputeDRBDMap()
1801

    
1802
    feedback_fn("* Verifying node status")
1803
    for node_i in nodeinfo:
1804
      node = node_i.name
1805
      nimg = node_image[node]
1806

    
1807
      if node_i.offline:
1808
        if verbose:
1809
          feedback_fn("* Skipping offline node %s" % (node,))
1810
        n_offline += 1
1811
        continue
1812

    
1813
      if node == master_node:
1814
        ntype = "master"
1815
      elif node_i.master_candidate:
1816
        ntype = "master candidate"
1817
      elif node_i.drained:
1818
        ntype = "drained"
1819
        n_drained += 1
1820
      else:
1821
        ntype = "regular"
1822
      if verbose:
1823
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1824

    
1825
      msg = all_nvinfo[node].fail_msg
1826
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1827
      if msg:
1828
        nimg.rpc_fail = True
1829
        continue
1830

    
1831
      nresult = all_nvinfo[node].payload
1832

    
1833
      nimg.call_ok = self._VerifyNode(node_i, nresult)
1834
      self._VerifyNodeNetwork(node_i, nresult)
1835
      self._VerifyNodeLVM(node_i, nresult, vg_name)
1836
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1837
                            master_files)
1838
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1839
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1840

    
1841
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1842
      self._UpdateNodeInstances(node_i, nresult, nimg)
1843
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1844

    
1845
    feedback_fn("* Verifying instance status")
1846
    for instance in instancelist:
1847
      if verbose:
1848
        feedback_fn("* Verifying instance %s" % instance)
1849
      inst_config = instanceinfo[instance]
1850
      self._VerifyInstance(instance, inst_config, node_image)
1851
      inst_nodes_offline = []
1852

    
1853
      pnode = inst_config.primary_node
1854
      pnode_img = node_image[pnode]
1855
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1856
               self.ENODERPC, pnode, "instance %s, connection to"
1857
               " primary node failed", instance)
1858

    
1859
      if pnode_img.offline:
1860
        inst_nodes_offline.append(pnode)
1861

    
1862
      # If the instance is non-redundant we cannot survive losing its primary
1863
      # node, so we are not N+1 compliant. On the other hand we have no disk
1864
      # templates with more than one secondary so that situation is not well
1865
      # supported either.
1866
      # FIXME: does not support file-backed instances
1867
      if not inst_config.secondary_nodes:
1868
        i_non_redundant.append(instance)
1869
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1870
               instance, "instance has multiple secondary nodes: %s",
1871
               utils.CommaJoin(inst_config.secondary_nodes),
1872
               code=self.ETYPE_WARNING)
1873

    
1874
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1875
        i_non_a_balanced.append(instance)
1876

    
1877
      for snode in inst_config.secondary_nodes:
1878
        s_img = node_image[snode]
1879
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1880
                 "instance %s, connection to secondary node failed", instance)
1881

    
1882
        if s_img.offline:
1883
          inst_nodes_offline.append(snode)
1884

    
1885
      # warn that the instance lives on offline nodes
1886
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1887
               "instance lives on offline node(s) %s",
1888
               utils.CommaJoin(inst_nodes_offline))
1889
      # ... or ghost nodes
1890
      for node in inst_config.all_nodes:
1891
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1892
                 "instance lives on ghost node %s", node)
1893

    
1894
    feedback_fn("* Verifying orphan volumes")
1895
    self._VerifyOrphanVolumes(node_vol_should, node_image)
1896

    
1897
    feedback_fn("* Verifying orphan instances")
1898
    self._VerifyOrphanInstances(instancelist, node_image)
1899

    
1900
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1901
      feedback_fn("* Verifying N+1 Memory redundancy")
1902
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
1903

    
1904
    feedback_fn("* Other Notes")
1905
    if i_non_redundant:
1906
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1907
                  % len(i_non_redundant))
1908

    
1909
    if i_non_a_balanced:
1910
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1911
                  % len(i_non_a_balanced))
1912

    
1913
    if n_offline:
1914
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
1915

    
1916
    if n_drained:
1917
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
1918

    
1919
    return not self.bad
1920

    
1921
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1922
    """Analyze the post-hooks' result
1923

1924
    This method analyses the hook result, handles it, and sends some
1925
    nicely-formatted feedback back to the user.
1926

1927
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1928
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1929
    @param hooks_results: the results of the multi-node hooks rpc call
1930
    @param feedback_fn: function used send feedback back to the caller
1931
    @param lu_result: previous Exec result
1932
    @return: the new Exec result, based on the previous result
1933
        and hook results
1934

1935
    """
1936
    # We only really run POST phase hooks, and are only interested in
1937
    # their results
1938
    if phase == constants.HOOKS_PHASE_POST:
1939
      # Used to change hooks' output to proper indentation
1940
      indent_re = re.compile('^', re.M)
1941
      feedback_fn("* Hooks Results")
1942
      assert hooks_results, "invalid result from hooks"
1943

    
1944
      for node_name in hooks_results:
1945
        res = hooks_results[node_name]
1946
        msg = res.fail_msg
1947
        test = msg and not res.offline
1948
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1949
                      "Communication failure in hooks execution: %s", msg)
1950
        if res.offline or msg:
1951
          # No need to investigate payload if node is offline or gave an error.
1952
          # override manually lu_result here as _ErrorIf only
1953
          # overrides self.bad
1954
          lu_result = 1
1955
          continue
1956
        for script, hkr, output in res.payload:
1957
          test = hkr == constants.HKR_FAIL
1958
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1959
                        "Script %s failed, output:", script)
1960
          if test:
1961
            output = indent_re.sub('      ', output)
1962
            feedback_fn("%s" % output)
1963
            lu_result = 0
1964

    
1965
      return lu_result
1966

    
1967

    
1968
class LUVerifyDisks(NoHooksLU):
1969
  """Verifies the cluster disks status.
1970

1971
  """
1972
  _OP_REQP = []
1973
  REQ_BGL = False
1974

    
1975
  def ExpandNames(self):
1976
    self.needed_locks = {
1977
      locking.LEVEL_NODE: locking.ALL_SET,
1978
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1979
    }
1980
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1981

    
1982
  def CheckPrereq(self):
1983
    """Check prerequisites.
1984

1985
    This has no prerequisites.
1986

1987
    """
1988
    pass
1989

    
1990
  def Exec(self, feedback_fn):
1991
    """Verify integrity of cluster disks.
1992

1993
    @rtype: tuple of three items
1994
    @return: a tuple of (dict of node-to-node_error, list of instances
1995
        which need activate-disks, dict of instance: (node, volume) for
1996
        missing volumes
1997

1998
    """
1999
    result = res_nodes, res_instances, res_missing = {}, [], {}
2000

    
2001
    vg_name = self.cfg.GetVGName()
2002
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2003
    instances = [self.cfg.GetInstanceInfo(name)
2004
                 for name in self.cfg.GetInstanceList()]
2005

    
2006
    nv_dict = {}
2007
    for inst in instances:
2008
      inst_lvs = {}
2009
      if (not inst.admin_up or
2010
          inst.disk_template not in constants.DTS_NET_MIRROR):
2011
        continue
2012
      inst.MapLVsByNode(inst_lvs)
2013
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2014
      for node, vol_list in inst_lvs.iteritems():
2015
        for vol in vol_list:
2016
          nv_dict[(node, vol)] = inst
2017

    
2018
    if not nv_dict:
2019
      return result
2020

    
2021
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2022

    
2023
    for node in nodes:
2024
      # node_volume
2025
      node_res = node_lvs[node]
2026
      if node_res.offline:
2027
        continue
2028
      msg = node_res.fail_msg
2029
      if msg:
2030
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2031
        res_nodes[node] = msg
2032
        continue
2033

    
2034
      lvs = node_res.payload
2035
      for lv_name, (_, _, lv_online) in lvs.items():
2036
        inst = nv_dict.pop((node, lv_name), None)
2037
        if (not lv_online and inst is not None
2038
            and inst.name not in res_instances):
2039
          res_instances.append(inst.name)
2040

    
2041
    # any leftover items in nv_dict are missing LVs, let's arrange the
2042
    # data better
2043
    for key, inst in nv_dict.iteritems():
2044
      if inst.name not in res_missing:
2045
        res_missing[inst.name] = []
2046
      res_missing[inst.name].append(key)
2047

    
2048
    return result
2049

    
2050

    
2051
class LURepairDiskSizes(NoHooksLU):
2052
  """Verifies the cluster disks sizes.
2053

2054
  """
2055
  _OP_REQP = ["instances"]
2056
  REQ_BGL = False
2057

    
2058
  def ExpandNames(self):
2059
    if not isinstance(self.op.instances, list):
2060
      raise errors.OpPrereqError("Invalid argument type 'instances'",
2061
                                 errors.ECODE_INVAL)
2062

    
2063
    if self.op.instances:
2064
      self.wanted_names = []
2065
      for name in self.op.instances:
2066
        full_name = _ExpandInstanceName(self.cfg, name)
2067
        self.wanted_names.append(full_name)
2068
      self.needed_locks = {
2069
        locking.LEVEL_NODE: [],
2070
        locking.LEVEL_INSTANCE: self.wanted_names,
2071
        }
2072
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2073
    else:
2074
      self.wanted_names = None
2075
      self.needed_locks = {
2076
        locking.LEVEL_NODE: locking.ALL_SET,
2077
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2078
        }
2079
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2080

    
2081
  def DeclareLocks(self, level):
2082
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2083
      self._LockInstancesNodes(primary_only=True)
2084

    
2085
  def CheckPrereq(self):
2086
    """Check prerequisites.
2087

2088
    This only checks the optional instance list against the existing names.
2089

2090
    """
2091
    if self.wanted_names is None:
2092
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2093

    
2094
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2095
                             in self.wanted_names]
2096

    
2097
  def _EnsureChildSizes(self, disk):
2098
    """Ensure children of the disk have the needed disk size.
2099

2100
    This is valid mainly for DRBD8 and fixes an issue where the
2101
    children have smaller disk size.
2102

2103
    @param disk: an L{ganeti.objects.Disk} object
2104

2105
    """
2106
    if disk.dev_type == constants.LD_DRBD8:
2107
      assert disk.children, "Empty children for DRBD8?"
2108
      fchild = disk.children[0]
2109
      mismatch = fchild.size < disk.size
2110
      if mismatch:
2111
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2112
                     fchild.size, disk.size)
2113
        fchild.size = disk.size
2114

    
2115
      # and we recurse on this child only, not on the metadev
2116
      return self._EnsureChildSizes(fchild) or mismatch
2117
    else:
2118
      return False
2119

    
2120
  def Exec(self, feedback_fn):
2121
    """Verify the size of cluster disks.
2122

2123
    """
2124
    # TODO: check child disks too
2125
    # TODO: check differences in size between primary/secondary nodes
2126
    per_node_disks = {}
2127
    for instance in self.wanted_instances:
2128
      pnode = instance.primary_node
2129
      if pnode not in per_node_disks:
2130
        per_node_disks[pnode] = []
2131
      for idx, disk in enumerate(instance.disks):
2132
        per_node_disks[pnode].append((instance, idx, disk))
2133

    
2134
    changed = []
2135
    for node, dskl in per_node_disks.items():
2136
      newl = [v[2].Copy() for v in dskl]
2137
      for dsk in newl:
2138
        self.cfg.SetDiskID(dsk, node)
2139
      result = self.rpc.call_blockdev_getsizes(node, newl)
2140
      if result.fail_msg:
2141
        self.LogWarning("Failure in blockdev_getsizes call to node"
2142
                        " %s, ignoring", node)
2143
        continue
2144
      if len(result.data) != len(dskl):
2145
        self.LogWarning("Invalid result from node %s, ignoring node results",
2146
                        node)
2147
        continue
2148
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2149
        if size is None:
2150
          self.LogWarning("Disk %d of instance %s did not return size"
2151
                          " information, ignoring", idx, instance.name)
2152
          continue
2153
        if not isinstance(size, (int, long)):
2154
          self.LogWarning("Disk %d of instance %s did not return valid"
2155
                          " size information, ignoring", idx, instance.name)
2156
          continue
2157
        size = size >> 20
2158
        if size != disk.size:
2159
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2160
                       " correcting: recorded %d, actual %d", idx,
2161
                       instance.name, disk.size, size)
2162
          disk.size = size
2163
          self.cfg.Update(instance, feedback_fn)
2164
          changed.append((instance.name, idx, size))
2165
        if self._EnsureChildSizes(disk):
2166
          self.cfg.Update(instance, feedback_fn)
2167
          changed.append((instance.name, idx, disk.size))
2168
    return changed
2169

    
2170

    
2171
class LURenameCluster(LogicalUnit):
2172
  """Rename the cluster.
2173

2174
  """
2175
  HPATH = "cluster-rename"
2176
  HTYPE = constants.HTYPE_CLUSTER
2177
  _OP_REQP = ["name"]
2178

    
2179
  def BuildHooksEnv(self):
2180
    """Build hooks env.
2181

2182
    """
2183
    env = {
2184
      "OP_TARGET": self.cfg.GetClusterName(),
2185
      "NEW_NAME": self.op.name,
2186
      }
2187
    mn = self.cfg.GetMasterNode()
2188
    all_nodes = self.cfg.GetNodeList()
2189
    return env, [mn], all_nodes
2190

    
2191
  def CheckPrereq(self):
2192
    """Verify that the passed name is a valid one.
2193

2194
    """
2195
    hostname = utils.GetHostInfo(self.op.name)
2196

    
2197
    new_name = hostname.name
2198
    self.ip = new_ip = hostname.ip
2199
    old_name = self.cfg.GetClusterName()
2200
    old_ip = self.cfg.GetMasterIP()
2201
    if new_name == old_name and new_ip == old_ip:
2202
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2203
                                 " cluster has changed",
2204
                                 errors.ECODE_INVAL)
2205
    if new_ip != old_ip:
2206
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2207
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2208
                                   " reachable on the network. Aborting." %
2209
                                   new_ip, errors.ECODE_NOTUNIQUE)
2210

    
2211
    self.op.name = new_name
2212

    
2213
  def Exec(self, feedback_fn):
2214
    """Rename the cluster.
2215

2216
    """
2217
    clustername = self.op.name
2218
    ip = self.ip
2219

    
2220
    # shutdown the master IP
2221
    master = self.cfg.GetMasterNode()
2222
    result = self.rpc.call_node_stop_master(master, False)
2223
    result.Raise("Could not disable the master role")
2224

    
2225
    try:
2226
      cluster = self.cfg.GetClusterInfo()
2227
      cluster.cluster_name = clustername
2228
      cluster.master_ip = ip
2229
      self.cfg.Update(cluster, feedback_fn)
2230

    
2231
      # update the known hosts file
2232
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2233
      node_list = self.cfg.GetNodeList()
2234
      try:
2235
        node_list.remove(master)
2236
      except ValueError:
2237
        pass
2238
      result = self.rpc.call_upload_file(node_list,
2239
                                         constants.SSH_KNOWN_HOSTS_FILE)
2240
      for to_node, to_result in result.iteritems():
2241
        msg = to_result.fail_msg
2242
        if msg:
2243
          msg = ("Copy of file %s to node %s failed: %s" %
2244
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2245
          self.proc.LogWarning(msg)
2246

    
2247
    finally:
2248
      result = self.rpc.call_node_start_master(master, False, False)
2249
      msg = result.fail_msg
2250
      if msg:
2251
        self.LogWarning("Could not re-enable the master role on"
2252
                        " the master, please restart manually: %s", msg)
2253

    
2254

    
2255
def _RecursiveCheckIfLVMBased(disk):
2256
  """Check if the given disk or its children are lvm-based.
2257

2258
  @type disk: L{objects.Disk}
2259
  @param disk: the disk to check
2260
  @rtype: boolean
2261
  @return: boolean indicating whether a LD_LV dev_type was found or not
2262

2263
  """
2264
  if disk.children:
2265
    for chdisk in disk.children:
2266
      if _RecursiveCheckIfLVMBased(chdisk):
2267
        return True
2268
  return disk.dev_type == constants.LD_LV
2269

    
2270

    
2271
class LUSetClusterParams(LogicalUnit):
2272
  """Change the parameters of the cluster.
2273

2274
  """
2275
  HPATH = "cluster-modify"
2276
  HTYPE = constants.HTYPE_CLUSTER
2277
  _OP_REQP = []
2278
  REQ_BGL = False
2279

    
2280
  def CheckArguments(self):
2281
    """Check parameters
2282

2283
    """
2284
    for attr in ["candidate_pool_size",
2285
                 "uid_pool", "add_uids", "remove_uids"]:
2286
      if not hasattr(self.op, attr):
2287
        setattr(self.op, attr, None)
2288

    
2289
    if self.op.candidate_pool_size is not None:
2290
      try:
2291
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2292
      except (ValueError, TypeError), err:
2293
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2294
                                   str(err), errors.ECODE_INVAL)
2295
      if self.op.candidate_pool_size < 1:
2296
        raise errors.OpPrereqError("At least one master candidate needed",
2297
                                   errors.ECODE_INVAL)
2298

    
2299
    _CheckBooleanOpField(self.op, "maintain_node_health")
2300

    
2301
    if self.op.uid_pool:
2302
      uidpool.CheckUidPool(self.op.uid_pool)
2303

    
2304
    if self.op.add_uids:
2305
      uidpool.CheckUidPool(self.op.add_uids)
2306

    
2307
    if self.op.remove_uids:
2308
      uidpool.CheckUidPool(self.op.remove_uids)
2309

    
2310
  def ExpandNames(self):
2311
    # FIXME: in the future maybe other cluster params won't require checking on
2312
    # all nodes to be modified.
2313
    self.needed_locks = {
2314
      locking.LEVEL_NODE: locking.ALL_SET,
2315
    }
2316
    self.share_locks[locking.LEVEL_NODE] = 1
2317

    
2318
  def BuildHooksEnv(self):
2319
    """Build hooks env.
2320

2321
    """
2322
    env = {
2323
      "OP_TARGET": self.cfg.GetClusterName(),
2324
      "NEW_VG_NAME": self.op.vg_name,
2325
      }
2326
    mn = self.cfg.GetMasterNode()
2327
    return env, [mn], [mn]
2328

    
2329
  def CheckPrereq(self):
2330
    """Check prerequisites.
2331

2332
    This checks whether the given params don't conflict and
2333
    if the given volume group is valid.
2334

2335
    """
2336
    if self.op.vg_name is not None and not self.op.vg_name:
2337
      instances = self.cfg.GetAllInstancesInfo().values()
2338
      for inst in instances:
2339
        for disk in inst.disks:
2340
          if _RecursiveCheckIfLVMBased(disk):
2341
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2342
                                       " lvm-based instances exist",
2343
                                       errors.ECODE_INVAL)
2344

    
2345
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2346

    
2347
    # if vg_name not None, checks given volume group on all nodes
2348
    if self.op.vg_name:
2349
      vglist = self.rpc.call_vg_list(node_list)
2350
      for node in node_list:
2351
        msg = vglist[node].fail_msg
2352
        if msg:
2353
          # ignoring down node
2354
          self.LogWarning("Error while gathering data on node %s"
2355
                          " (ignoring node): %s", node, msg)
2356
          continue
2357
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2358
                                              self.op.vg_name,
2359
                                              constants.MIN_VG_SIZE)
2360
        if vgstatus:
2361
          raise errors.OpPrereqError("Error on node '%s': %s" %
2362
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2363

    
2364
    self.cluster = cluster = self.cfg.GetClusterInfo()
2365
    # validate params changes
2366
    if self.op.beparams:
2367
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2368
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2369

    
2370
    if self.op.nicparams:
2371
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2372
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2373
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2374
      nic_errors = []
2375

    
2376
      # check all instances for consistency
2377
      for instance in self.cfg.GetAllInstancesInfo().values():
2378
        for nic_idx, nic in enumerate(instance.nics):
2379
          params_copy = copy.deepcopy(nic.nicparams)
2380
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2381

    
2382
          # check parameter syntax
2383
          try:
2384
            objects.NIC.CheckParameterSyntax(params_filled)
2385
          except errors.ConfigurationError, err:
2386
            nic_errors.append("Instance %s, nic/%d: %s" %
2387
                              (instance.name, nic_idx, err))
2388

    
2389
          # if we're moving instances to routed, check that they have an ip
2390
          target_mode = params_filled[constants.NIC_MODE]
2391
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2392
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2393
                              (instance.name, nic_idx))
2394
      if nic_errors:
2395
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2396
                                   "\n".join(nic_errors))
2397

    
2398
    # hypervisor list/parameters
2399
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2400
    if self.op.hvparams:
2401
      if not isinstance(self.op.hvparams, dict):
2402
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2403
                                   errors.ECODE_INVAL)
2404
      for hv_name, hv_dict in self.op.hvparams.items():
2405
        if hv_name not in self.new_hvparams:
2406
          self.new_hvparams[hv_name] = hv_dict
2407
        else:
2408
          self.new_hvparams[hv_name].update(hv_dict)
2409

    
2410
    # os hypervisor parameters
2411
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2412
    if self.op.os_hvp:
2413
      if not isinstance(self.op.os_hvp, dict):
2414
        raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2415
                                   errors.ECODE_INVAL)
2416
      for os_name, hvs in self.op.os_hvp.items():
2417
        if not isinstance(hvs, dict):
2418
          raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2419
                                      " input"), errors.ECODE_INVAL)
2420
        if os_name not in self.new_os_hvp:
2421
          self.new_os_hvp[os_name] = hvs
2422
        else:
2423
          for hv_name, hv_dict in hvs.items():
2424
            if hv_name not in self.new_os_hvp[os_name]:
2425
              self.new_os_hvp[os_name][hv_name] = hv_dict
2426
            else:
2427
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2428

    
2429
    # os parameters
2430
    self.new_osp = objects.FillDict(cluster.osparams, {})
2431
    if self.op.osparams:
2432
      if not isinstance(self.op.osparams, dict):
2433
        raise errors.OpPrereqError("Invalid 'osparams' parameter on input",
2434
                                   errors.ECODE_INVAL)
2435
      for os_name, osp in self.op.osparams.items():
2436
        if not isinstance(osp, dict):
2437
          raise errors.OpPrereqError(("Invalid 'osparams' parameter on"
2438
                                      " input"), errors.ECODE_INVAL)
2439
        if os_name not in self.new_osp:
2440
          self.new_osp[os_name] = {}
2441

    
2442
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2443
                                                  use_none=True)
2444

    
2445
        if not self.new_osp[os_name]:
2446
          # we removed all parameters
2447
          del self.new_osp[os_name]
2448
        else:
2449
          # check the parameter validity (remote check)
2450
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2451
                         os_name, self.new_osp[os_name])
2452

    
2453
    # changes to the hypervisor list
2454
    if self.op.enabled_hypervisors is not None:
2455
      self.hv_list = self.op.enabled_hypervisors
2456
      if not self.hv_list:
2457
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2458
                                   " least one member",
2459
                                   errors.ECODE_INVAL)
2460
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2461
      if invalid_hvs:
2462
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2463
                                   " entries: %s" %
2464
                                   utils.CommaJoin(invalid_hvs),
2465
                                   errors.ECODE_INVAL)
2466
      for hv in self.hv_list:
2467
        # if the hypervisor doesn't already exist in the cluster
2468
        # hvparams, we initialize it to empty, and then (in both
2469
        # cases) we make sure to fill the defaults, as we might not
2470
        # have a complete defaults list if the hypervisor wasn't
2471
        # enabled before
2472
        if hv not in new_hvp:
2473
          new_hvp[hv] = {}
2474
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2475
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2476
    else:
2477
      self.hv_list = cluster.enabled_hypervisors
2478

    
2479
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2480
      # either the enabled list has changed, or the parameters have, validate
2481
      for hv_name, hv_params in self.new_hvparams.items():
2482
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2483
            (self.op.enabled_hypervisors and
2484
             hv_name in self.op.enabled_hypervisors)):
2485
          # either this is a new hypervisor, or its parameters have changed
2486
          hv_class = hypervisor.GetHypervisor(hv_name)
2487
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2488
          hv_class.CheckParameterSyntax(hv_params)
2489
          _CheckHVParams(self, node_list, hv_name, hv_params)
2490

    
2491
    if self.op.os_hvp:
2492
      # no need to check any newly-enabled hypervisors, since the
2493
      # defaults have already been checked in the above code-block
2494
      for os_name, os_hvp in self.new_os_hvp.items():
2495
        for hv_name, hv_params in os_hvp.items():
2496
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2497
          # we need to fill in the new os_hvp on top of the actual hv_p
2498
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2499
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2500
          hv_class = hypervisor.GetHypervisor(hv_name)
2501
          hv_class.CheckParameterSyntax(new_osp)
2502
          _CheckHVParams(self, node_list, hv_name, new_osp)
2503

    
2504

    
2505
  def Exec(self, feedback_fn):
2506
    """Change the parameters of the cluster.
2507

2508
    """
2509
    if self.op.vg_name is not None:
2510
      new_volume = self.op.vg_name
2511
      if not new_volume:
2512
        new_volume = None
2513
      if new_volume != self.cfg.GetVGName():
2514
        self.cfg.SetVGName(new_volume)
2515
      else:
2516
        feedback_fn("Cluster LVM configuration already in desired"
2517
                    " state, not changing")
2518
    if self.op.hvparams:
2519
      self.cluster.hvparams = self.new_hvparams
2520
    if self.op.os_hvp:
2521
      self.cluster.os_hvp = self.new_os_hvp
2522
    if self.op.enabled_hypervisors is not None:
2523
      self.cluster.hvparams = self.new_hvparams
2524
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2525
    if self.op.beparams:
2526
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2527
    if self.op.nicparams:
2528
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2529
    if self.op.osparams:
2530
      self.cluster.osparams = self.new_osp
2531

    
2532
    if self.op.candidate_pool_size is not None:
2533
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2534
      # we need to update the pool size here, otherwise the save will fail
2535
      _AdjustCandidatePool(self, [])
2536

    
2537
    if self.op.maintain_node_health is not None:
2538
      self.cluster.maintain_node_health = self.op.maintain_node_health
2539

    
2540
    if self.op.add_uids is not None:
2541
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2542

    
2543
    if self.op.remove_uids is not None:
2544
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2545

    
2546
    if self.op.uid_pool is not None:
2547
      self.cluster.uid_pool = self.op.uid_pool
2548

    
2549
    self.cfg.Update(self.cluster, feedback_fn)
2550

    
2551

    
2552
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2553
  """Distribute additional files which are part of the cluster configuration.
2554

2555
  ConfigWriter takes care of distributing the config and ssconf files, but
2556
  there are more files which should be distributed to all nodes. This function
2557
  makes sure those are copied.
2558

2559
  @param lu: calling logical unit
2560
  @param additional_nodes: list of nodes not in the config to distribute to
2561

2562
  """
2563
  # 1. Gather target nodes
2564
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2565
  dist_nodes = lu.cfg.GetOnlineNodeList()
2566
  if additional_nodes is not None:
2567
    dist_nodes.extend(additional_nodes)
2568
  if myself.name in dist_nodes:
2569
    dist_nodes.remove(myself.name)
2570

    
2571
  # 2. Gather files to distribute
2572
  dist_files = set([constants.ETC_HOSTS,
2573
                    constants.SSH_KNOWN_HOSTS_FILE,
2574
                    constants.RAPI_CERT_FILE,
2575
                    constants.RAPI_USERS_FILE,
2576
                    constants.CONFD_HMAC_KEY,
2577
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2578
                   ])
2579

    
2580
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2581
  for hv_name in enabled_hypervisors:
2582
    hv_class = hypervisor.GetHypervisor(hv_name)
2583
    dist_files.update(hv_class.GetAncillaryFiles())
2584

    
2585
  # 3. Perform the files upload
2586
  for fname in dist_files:
2587
    if os.path.exists(fname):
2588
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2589
      for to_node, to_result in result.items():
2590
        msg = to_result.fail_msg
2591
        if msg:
2592
          msg = ("Copy of file %s to node %s failed: %s" %
2593
                 (fname, to_node, msg))
2594
          lu.proc.LogWarning(msg)
2595

    
2596

    
2597
class LURedistributeConfig(NoHooksLU):
2598
  """Force the redistribution of cluster configuration.
2599

2600
  This is a very simple LU.
2601

2602
  """
2603
  _OP_REQP = []
2604
  REQ_BGL = False
2605

    
2606
  def ExpandNames(self):
2607
    self.needed_locks = {
2608
      locking.LEVEL_NODE: locking.ALL_SET,
2609
    }
2610
    self.share_locks[locking.LEVEL_NODE] = 1
2611

    
2612
  def CheckPrereq(self):
2613
    """Check prerequisites.
2614

2615
    """
2616

    
2617
  def Exec(self, feedback_fn):
2618
    """Redistribute the configuration.
2619

2620
    """
2621
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2622
    _RedistributeAncillaryFiles(self)
2623

    
2624

    
2625
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2626
  """Sleep and poll for an instance's disk to sync.
2627

2628
  """
2629
  if not instance.disks or disks is not None and not disks:
2630
    return True
2631

    
2632
  disks = _ExpandCheckDisks(instance, disks)
2633

    
2634
  if not oneshot:
2635
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2636

    
2637
  node = instance.primary_node
2638

    
2639
  for dev in disks:
2640
    lu.cfg.SetDiskID(dev, node)
2641

    
2642
  # TODO: Convert to utils.Retry
2643

    
2644
  retries = 0
2645
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2646
  while True:
2647
    max_time = 0
2648
    done = True
2649
    cumul_degraded = False
2650
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2651
    msg = rstats.fail_msg
2652
    if msg:
2653
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2654
      retries += 1
2655
      if retries >= 10:
2656
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2657
                                 " aborting." % node)
2658
      time.sleep(6)
2659
      continue
2660
    rstats = rstats.payload
2661
    retries = 0
2662
    for i, mstat in enumerate(rstats):
2663
      if mstat is None:
2664
        lu.LogWarning("Can't compute data for node %s/%s",
2665
                           node, disks[i].iv_name)
2666
        continue
2667

    
2668
      cumul_degraded = (cumul_degraded or
2669
                        (mstat.is_degraded and mstat.sync_percent is None))
2670
      if mstat.sync_percent is not None:
2671
        done = False
2672
        if mstat.estimated_time is not None:
2673
          rem_time = ("%s remaining (estimated)" %
2674
                      utils.FormatSeconds(mstat.estimated_time))
2675
          max_time = mstat.estimated_time
2676
        else:
2677
          rem_time = "no time estimate"
2678
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2679
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
2680

    
2681
    # if we're done but degraded, let's do a few small retries, to
2682
    # make sure we see a stable and not transient situation; therefore
2683
    # we force restart of the loop
2684
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2685
      logging.info("Degraded disks found, %d retries left", degr_retries)
2686
      degr_retries -= 1
2687
      time.sleep(1)
2688
      continue
2689

    
2690
    if done or oneshot:
2691
      break
2692

    
2693
    time.sleep(min(60, max_time))
2694

    
2695
  if done:
2696
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2697
  return not cumul_degraded
2698

    
2699

    
2700
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2701
  """Check that mirrors are not degraded.
2702

2703
  The ldisk parameter, if True, will change the test from the
2704
  is_degraded attribute (which represents overall non-ok status for
2705
  the device(s)) to the ldisk (representing the local storage status).
2706

2707
  """
2708
  lu.cfg.SetDiskID(dev, node)
2709

    
2710
  result = True
2711

    
2712
  if on_primary or dev.AssembleOnSecondary():
2713
    rstats = lu.rpc.call_blockdev_find(node, dev)
2714
    msg = rstats.fail_msg
2715
    if msg:
2716
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2717
      result = False
2718
    elif not rstats.payload:
2719
      lu.LogWarning("Can't find disk on node %s", node)
2720
      result = False
2721
    else:
2722
      if ldisk:
2723
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2724
      else:
2725
        result = result and not rstats.payload.is_degraded
2726

    
2727
  if dev.children:
2728
    for child in dev.children:
2729
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2730

    
2731
  return result
2732

    
2733

    
2734
class LUDiagnoseOS(NoHooksLU):
2735
  """Logical unit for OS diagnose/query.
2736

2737
  """
2738
  _OP_REQP = ["output_fields", "names"]
2739
  REQ_BGL = False
2740
  _FIELDS_STATIC = utils.FieldSet()
2741
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2742
  # Fields that need calculation of global os validity
2743
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2744

    
2745
  def ExpandNames(self):
2746
    if self.op.names:
2747
      raise errors.OpPrereqError("Selective OS query not supported",
2748
                                 errors.ECODE_INVAL)
2749

    
2750
    _CheckOutputFields(static=self._FIELDS_STATIC,
2751
                       dynamic=self._FIELDS_DYNAMIC,
2752
                       selected=self.op.output_fields)
2753

    
2754
    # Lock all nodes, in shared mode
2755
    # Temporary removal of locks, should be reverted later
2756
    # TODO: reintroduce locks when they are lighter-weight
2757
    self.needed_locks = {}
2758
    #self.share_locks[locking.LEVEL_NODE] = 1
2759
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2760

    
2761
  def CheckPrereq(self):
2762
    """Check prerequisites.
2763

2764
    """
2765

    
2766
  @staticmethod
2767
  def _DiagnoseByOS(rlist):
2768
    """Remaps a per-node return list into an a per-os per-node dictionary
2769

2770
    @param rlist: a map with node names as keys and OS objects as values
2771

2772
    @rtype: dict
2773
    @return: a dictionary with osnames as keys and as value another
2774
        map, with nodes as keys and tuples of (path, status, diagnose,
2775
        variants, parameters) as values, eg::
2776

2777
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2778
                                     (/srv/..., False, "invalid api")],
2779
                           "node2": [(/srv/..., True, "", [], [])]}
2780
          }
2781

2782
    """
2783
    all_os = {}
2784
    # we build here the list of nodes that didn't fail the RPC (at RPC
2785
    # level), so that nodes with a non-responding node daemon don't
2786
    # make all OSes invalid
2787
    good_nodes = [node_name for node_name in rlist
2788
                  if not rlist[node_name].fail_msg]
2789
    for node_name, nr in rlist.items():
2790
      if nr.fail_msg or not nr.payload:
2791
        continue
2792
      for name, path, status, diagnose, variants, params in nr.payload:
2793
        if name not in all_os:
2794
          # build a list of nodes for this os containing empty lists
2795
          # for each node in node_list
2796
          all_os[name] = {}
2797
          for nname in good_nodes:
2798
            all_os[name][nname] = []
2799
        all_os[name][node_name].append((path, status, diagnose,
2800
                                        variants, params))
2801
    return all_os
2802

    
2803
  def Exec(self, feedback_fn):
2804
    """Compute the list of OSes.
2805

2806
    """
2807
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2808
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2809
    pol = self._DiagnoseByOS(node_data)
2810
    output = []
2811
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2812
    calc_variants = "variants" in self.op.output_fields
2813

    
2814
    for os_name, os_data in pol.items():
2815
      row = []
2816
      if calc_valid:
2817
        valid = True
2818
        variants = None
2819
        for osl in os_data.values():
2820
          valid = bool(valid and osl and osl[0][1])
2821
          if not valid:
2822
            variants = set()
2823
            break
2824
          if calc_variants:
2825
            node_variants = osl[0][3]
2826
            if variants is None:
2827
              variants = set(node_variants)
2828
            else:
2829
              variants.intersection_update(node_variants)
2830

    
2831
      for field in self.op.output_fields:
2832
        if field == "name":
2833
          val = os_name
2834
        elif field == "valid":
2835
          val = valid
2836
        elif field == "node_status":
2837
          # this is just a copy of the dict
2838
          val = {}
2839
          for node_name, nos_list in os_data.items():
2840
            val[node_name] = nos_list
2841
        elif field == "variants":
2842
          val = list(variants)
2843
        else:
2844
          raise errors.ParameterError(field)
2845
        row.append(val)
2846
      output.append(row)
2847

    
2848
    return output
2849

    
2850

    
2851
class LURemoveNode(LogicalUnit):
2852
  """Logical unit for removing a node.
2853

2854
  """
2855
  HPATH = "node-remove"
2856
  HTYPE = constants.HTYPE_NODE
2857
  _OP_REQP = ["node_name"]
2858

    
2859
  def BuildHooksEnv(self):
2860
    """Build hooks env.
2861

2862
    This doesn't run on the target node in the pre phase as a failed
2863
    node would then be impossible to remove.
2864

2865
    """
2866
    env = {
2867
      "OP_TARGET": self.op.node_name,
2868
      "NODE_NAME": self.op.node_name,
2869
      }
2870
    all_nodes = self.cfg.GetNodeList()
2871
    try:
2872
      all_nodes.remove(self.op.node_name)
2873
    except ValueError:
2874
      logging.warning("Node %s which is about to be removed not found"
2875
                      " in the all nodes list", self.op.node_name)
2876
    return env, all_nodes, all_nodes
2877

    
2878
  def CheckPrereq(self):
2879
    """Check prerequisites.
2880

2881
    This checks:
2882
     - the node exists in the configuration
2883
     - it does not have primary or secondary instances
2884
     - it's not the master
2885

2886
    Any errors are signaled by raising errors.OpPrereqError.
2887

2888
    """
2889
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2890
    node = self.cfg.GetNodeInfo(self.op.node_name)
2891
    assert node is not None
2892

    
2893
    instance_list = self.cfg.GetInstanceList()
2894

    
2895
    masternode = self.cfg.GetMasterNode()
2896
    if node.name == masternode:
2897
      raise errors.OpPrereqError("Node is the master node,"
2898
                                 " you need to failover first.",
2899
                                 errors.ECODE_INVAL)
2900

    
2901
    for instance_name in instance_list:
2902
      instance = self.cfg.GetInstanceInfo(instance_name)
2903
      if node.name in instance.all_nodes:
2904
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2905
                                   " please remove first." % instance_name,
2906
                                   errors.ECODE_INVAL)
2907
    self.op.node_name = node.name
2908
    self.node = node
2909

    
2910
  def Exec(self, feedback_fn):
2911
    """Removes the node from the cluster.
2912

2913
    """
2914
    node = self.node
2915
    logging.info("Stopping the node daemon and removing configs from node %s",
2916
                 node.name)
2917

    
2918
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2919

    
2920
    # Promote nodes to master candidate as needed
2921
    _AdjustCandidatePool(self, exceptions=[node.name])
2922
    self.context.RemoveNode(node.name)
2923

    
2924
    # Run post hooks on the node before it's removed
2925
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2926
    try:
2927
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2928
    except:
2929
      # pylint: disable-msg=W0702
2930
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2931

    
2932
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2933
    msg = result.fail_msg
2934
    if msg:
2935
      self.LogWarning("Errors encountered on the remote node while leaving"
2936
                      " the cluster: %s", msg)
2937

    
2938
    # Remove node from our /etc/hosts
2939
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2940
      # FIXME: this should be done via an rpc call to node daemon
2941
      utils.RemoveHostFromEtcHosts(node.name)
2942
      _RedistributeAncillaryFiles(self)
2943

    
2944

    
2945
class LUQueryNodes(NoHooksLU):
2946
  """Logical unit for querying nodes.
2947

2948
  """
2949
  # pylint: disable-msg=W0142
2950
  _OP_REQP = ["output_fields", "names", "use_locking"]
2951
  REQ_BGL = False
2952

    
2953
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2954
                    "master_candidate", "offline", "drained"]
2955

    
2956
  _FIELDS_DYNAMIC = utils.FieldSet(
2957
    "dtotal", "dfree",
2958
    "mtotal", "mnode", "mfree",
2959
    "bootid",
2960
    "ctotal", "cnodes", "csockets",
2961
    )
2962

    
2963
  _FIELDS_STATIC = utils.FieldSet(*[
2964
    "pinst_cnt", "sinst_cnt",
2965
    "pinst_list", "sinst_list",
2966
    "pip", "sip", "tags",
2967
    "master",
2968
    "role"] + _SIMPLE_FIELDS
2969
    )
2970

    
2971
  def ExpandNames(self):
2972
    _CheckOutputFields(static=self._FIELDS_STATIC,
2973
                       dynamic=self._FIELDS_DYNAMIC,
2974
                       selected=self.op.output_fields)
2975

    
2976
    self.needed_locks = {}
2977
    self.share_locks[locking.LEVEL_NODE] = 1
2978

    
2979
    if self.op.names:
2980
      self.wanted = _GetWantedNodes(self, self.op.names)
2981
    else:
2982
      self.wanted = locking.ALL_SET
2983

    
2984
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2985
    self.do_locking = self.do_node_query and self.op.use_locking
2986
    if self.do_locking:
2987
      # if we don't request only static fields, we need to lock the nodes
2988
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2989

    
2990
  def CheckPrereq(self):
2991
    """Check prerequisites.
2992

2993
    """
2994
    # The validation of the node list is done in the _GetWantedNodes,
2995
    # if non empty, and if empty, there's no validation to do
2996
    pass
2997

    
2998
  def Exec(self, feedback_fn):
2999
    """Computes the list of nodes and their attributes.
3000

3001
    """
3002
    all_info = self.cfg.GetAllNodesInfo()
3003
    if self.do_locking:
3004
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
3005
    elif self.wanted != locking.ALL_SET:
3006
      nodenames = self.wanted
3007
      missing = set(nodenames).difference(all_info.keys())
3008
      if missing:
3009
        raise errors.OpExecError(
3010
          "Some nodes were removed before retrieving their data: %s" % missing)
3011
    else:
3012
      nodenames = all_info.keys()
3013

    
3014
    nodenames = utils.NiceSort(nodenames)
3015
    nodelist = [all_info[name] for name in nodenames]
3016

    
3017
    # begin data gathering
3018

    
3019
    if self.do_node_query:
3020
      live_data = {}
3021
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3022
                                          self.cfg.GetHypervisorType())
3023
      for name in nodenames:
3024
        nodeinfo = node_data[name]
3025
        if not nodeinfo.fail_msg and nodeinfo.payload:
3026
          nodeinfo = nodeinfo.payload
3027
          fn = utils.TryConvert
3028
          live_data[name] = {
3029
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3030
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3031
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
3032
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3033
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3034
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3035
            "bootid": nodeinfo.get('bootid', None),
3036
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3037
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3038
            }
3039
        else:
3040
          live_data[name] = {}
3041
    else:
3042
      live_data = dict.fromkeys(nodenames, {})
3043

    
3044
    node_to_primary = dict([(name, set()) for name in nodenames])
3045
    node_to_secondary = dict([(name, set()) for name in nodenames])
3046

    
3047
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3048
                             "sinst_cnt", "sinst_list"))
3049
    if inst_fields & frozenset(self.op.output_fields):
3050
      inst_data = self.cfg.GetAllInstancesInfo()
3051

    
3052
      for inst in inst_data.values():
3053
        if inst.primary_node in node_to_primary:
3054
          node_to_primary[inst.primary_node].add(inst.name)
3055
        for secnode in inst.secondary_nodes:
3056
          if secnode in node_to_secondary:
3057
            node_to_secondary[secnode].add(inst.name)
3058

    
3059
    master_node = self.cfg.GetMasterNode()
3060

    
3061
    # end data gathering
3062

    
3063
    output = []
3064
    for node in nodelist:
3065
      node_output = []
3066
      for field in self.op.output_fields:
3067
        if field in self._SIMPLE_FIELDS:
3068
          val = getattr(node, field)
3069
        elif field == "pinst_list":
3070
          val = list(node_to_primary[node.name])
3071
        elif field == "sinst_list":
3072
          val = list(node_to_secondary[node.name])
3073
        elif field == "pinst_cnt":
3074
          val = len(node_to_primary[node.name])
3075
        elif field == "sinst_cnt":
3076
          val = len(node_to_secondary[node.name])
3077
        elif field == "pip":
3078
          val = node.primary_ip
3079
        elif field == "sip":
3080
          val = node.secondary_ip
3081
        elif field == "tags":
3082
          val = list(node.GetTags())
3083
        elif field == "master":
3084
          val = node.name == master_node
3085
        elif self._FIELDS_DYNAMIC.Matches(field):
3086
          val = live_data[node.name].get(field, None)
3087
        elif field == "role":
3088
          if node.name == master_node:
3089
            val = "M"
3090
          elif node.master_candidate:
3091
            val = "C"
3092
          elif node.drained:
3093
            val = "D"
3094
          elif node.offline:
3095
            val = "O"
3096
          else:
3097
            val = "R"
3098
        else:
3099
          raise errors.ParameterError(field)
3100
        node_output.append(val)
3101
      output.append(node_output)
3102

    
3103
    return output
3104

    
3105

    
3106
class LUQueryNodeVolumes(NoHooksLU):
3107
  """Logical unit for getting volumes on node(s).
3108

3109
  """
3110
  _OP_REQP = ["nodes", "output_fields"]
3111
  REQ_BGL = False
3112
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3113
  _FIELDS_STATIC = utils.FieldSet("node")
3114

    
3115
  def ExpandNames(self):
3116
    _CheckOutputFields(static=self._FIELDS_STATIC,
3117
                       dynamic=self._FIELDS_DYNAMIC,
3118
                       selected=self.op.output_fields)
3119

    
3120
    self.needed_locks = {}
3121
    self.share_locks[locking.LEVEL_NODE] = 1
3122
    if not self.op.nodes:
3123
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3124
    else:
3125
      self.needed_locks[locking.LEVEL_NODE] = \
3126
        _GetWantedNodes(self, self.op.nodes)
3127

    
3128
  def CheckPrereq(self):
3129
    """Check prerequisites.
3130

3131
    This checks that the fields required are valid output fields.
3132

3133
    """
3134
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3135

    
3136
  def Exec(self, feedback_fn):
3137
    """Computes the list of nodes and their attributes.
3138

3139
    """
3140
    nodenames = self.nodes
3141
    volumes = self.rpc.call_node_volumes(nodenames)
3142

    
3143
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3144
             in self.cfg.GetInstanceList()]
3145

    
3146
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3147

    
3148
    output = []
3149
    for node in nodenames:
3150
      nresult = volumes[node]
3151
      if nresult.offline:
3152
        continue
3153
      msg = nresult.fail_msg
3154
      if msg:
3155
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3156
        continue
3157

    
3158
      node_vols = nresult.payload[:]
3159
      node_vols.sort(key=lambda vol: vol['dev'])
3160

    
3161
      for vol in node_vols:
3162
        node_output = []
3163
        for field in self.op.output_fields:
3164
          if field == "node":
3165
            val = node
3166
          elif field == "phys":
3167
            val = vol['dev']
3168
          elif field == "vg":
3169
            val = vol['vg']
3170
          elif field == "name":
3171
            val = vol['name']
3172
          elif field == "size":
3173
            val = int(float(vol['size']))
3174
          elif field == "instance":
3175
            for inst in ilist:
3176
              if node not in lv_by_node[inst]:
3177
                continue
3178
              if vol['name'] in lv_by_node[inst][node]:
3179
                val = inst.name
3180
                break
3181
            else:
3182
              val = '-'
3183
          else:
3184
            raise errors.ParameterError(field)
3185
          node_output.append(str(val))
3186

    
3187
        output.append(node_output)
3188

    
3189
    return output
3190

    
3191

    
3192
class LUQueryNodeStorage(NoHooksLU):
3193
  """Logical unit for getting information on storage units on node(s).
3194

3195
  """
3196
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
3197
  REQ_BGL = False
3198
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3199

    
3200
  def CheckArguments(self):
3201
    _CheckStorageType(self.op.storage_type)
3202

    
3203
    _CheckOutputFields(static=self._FIELDS_STATIC,
3204
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3205
                       selected=self.op.output_fields)
3206

    
3207
  def ExpandNames(self):
3208
    self.needed_locks = {}
3209
    self.share_locks[locking.LEVEL_NODE] = 1
3210

    
3211
    if self.op.nodes:
3212
      self.needed_locks[locking.LEVEL_NODE] = \
3213
        _GetWantedNodes(self, self.op.nodes)
3214
    else:
3215
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3216

    
3217
  def CheckPrereq(self):
3218
    """Check prerequisites.
3219

3220
    This checks that the fields required are valid output fields.
3221

3222
    """
3223
    self.op.name = getattr(self.op, "name", None)
3224

    
3225
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3226

    
3227
  def Exec(self, feedback_fn):
3228
    """Computes the list of nodes and their attributes.
3229

3230
    """
3231
    # Always get name to sort by
3232
    if constants.SF_NAME in self.op.output_fields:
3233
      fields = self.op.output_fields[:]
3234
    else:
3235
      fields = [constants.SF_NAME] + self.op.output_fields
3236

    
3237
    # Never ask for node or type as it's only known to the LU
3238
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3239
      while extra in fields:
3240
        fields.remove(extra)
3241

    
3242
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3243
    name_idx = field_idx[constants.SF_NAME]
3244

    
3245
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3246
    data = self.rpc.call_storage_list(self.nodes,
3247
                                      self.op.storage_type, st_args,
3248
                                      self.op.name, fields)
3249

    
3250
    result = []
3251

    
3252
    for node in utils.NiceSort(self.nodes):
3253
      nresult = data[node]
3254
      if nresult.offline:
3255
        continue
3256

    
3257
      msg = nresult.fail_msg
3258
      if msg:
3259
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3260
        continue
3261

    
3262
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3263

    
3264
      for name in utils.NiceSort(rows.keys()):
3265
        row = rows[name]
3266

    
3267
        out = []
3268

    
3269
        for field in self.op.output_fields:
3270
          if field == constants.SF_NODE:
3271
            val = node
3272
          elif field == constants.SF_TYPE:
3273
            val = self.op.storage_type
3274
          elif field in field_idx:
3275
            val = row[field_idx[field]]
3276
          else:
3277
            raise errors.ParameterError(field)
3278

    
3279
          out.append(val)
3280

    
3281
        result.append(out)
3282

    
3283
    return result
3284

    
3285

    
3286
class LUModifyNodeStorage(NoHooksLU):
3287
  """Logical unit for modifying a storage volume on a node.
3288

3289
  """
3290
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3291
  REQ_BGL = False
3292

    
3293
  def CheckArguments(self):
3294
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3295

    
3296
    _CheckStorageType(self.op.storage_type)
3297

    
3298
  def ExpandNames(self):
3299
    self.needed_locks = {
3300
      locking.LEVEL_NODE: self.op.node_name,
3301
      }
3302

    
3303
  def CheckPrereq(self):
3304
    """Check prerequisites.
3305

3306
    """
3307
    storage_type = self.op.storage_type
3308

    
3309
    try:
3310
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3311
    except KeyError:
3312
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3313
                                 " modified" % storage_type,
3314
                                 errors.ECODE_INVAL)
3315

    
3316
    diff = set(self.op.changes.keys()) - modifiable
3317
    if diff:
3318
      raise errors.OpPrereqError("The following fields can not be modified for"
3319
                                 " storage units of type '%s': %r" %
3320
                                 (storage_type, list(diff)),
3321
                                 errors.ECODE_INVAL)
3322

    
3323
  def Exec(self, feedback_fn):
3324
    """Computes the list of nodes and their attributes.
3325

3326
    """
3327
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3328
    result = self.rpc.call_storage_modify(self.op.node_name,
3329
                                          self.op.storage_type, st_args,
3330
                                          self.op.name, self.op.changes)
3331
    result.Raise("Failed to modify storage unit '%s' on %s" %
3332
                 (self.op.name, self.op.node_name))
3333

    
3334

    
3335
class LUAddNode(LogicalUnit):
3336
  """Logical unit for adding node to the cluster.
3337

3338
  """
3339
  HPATH = "node-add"
3340
  HTYPE = constants.HTYPE_NODE
3341
  _OP_REQP = ["node_name"]
3342

    
3343
  def CheckArguments(self):
3344
    # validate/normalize the node name
3345
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3346

    
3347
  def BuildHooksEnv(self):
3348
    """Build hooks env.
3349

3350
    This will run on all nodes before, and on all nodes + the new node after.
3351

3352
    """
3353
    env = {
3354
      "OP_TARGET": self.op.node_name,
3355
      "NODE_NAME": self.op.node_name,
3356
      "NODE_PIP": self.op.primary_ip,
3357
      "NODE_SIP": self.op.secondary_ip,
3358
      }
3359
    nodes_0 = self.cfg.GetNodeList()
3360
    nodes_1 = nodes_0 + [self.op.node_name, ]
3361
    return env, nodes_0, nodes_1
3362

    
3363
  def CheckPrereq(self):
3364
    """Check prerequisites.
3365

3366
    This checks:
3367
     - the new node is not already in the config
3368
     - it is resolvable
3369
     - its parameters (single/dual homed) matches the cluster
3370

3371
    Any errors are signaled by raising errors.OpPrereqError.
3372

3373
    """
3374
    node_name = self.op.node_name
3375
    cfg = self.cfg
3376

    
3377
    dns_data = utils.GetHostInfo(node_name)
3378

    
3379
    node = dns_data.name
3380
    primary_ip = self.op.primary_ip = dns_data.ip
3381
    secondary_ip = getattr(self.op, "secondary_ip", None)
3382
    if secondary_ip is None:
3383
      secondary_ip = primary_ip
3384
    if not utils.IsValidIP(secondary_ip):
3385
      raise errors.OpPrereqError("Invalid secondary IP given",
3386
                                 errors.ECODE_INVAL)
3387
    self.op.secondary_ip = secondary_ip
3388

    
3389
    node_list = cfg.GetNodeList()
3390
    if not self.op.readd and node in node_list:
3391
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3392
                                 node, errors.ECODE_EXISTS)
3393
    elif self.op.readd and node not in node_list:
3394
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3395
                                 errors.ECODE_NOENT)
3396

    
3397
    self.changed_primary_ip = False
3398

    
3399
    for existing_node_name in node_list:
3400
      existing_node = cfg.GetNodeInfo(existing_node_name)
3401

    
3402
      if self.op.readd and node == existing_node_name:
3403
        if existing_node.secondary_ip != secondary_ip:
3404
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3405
                                     " address configuration as before",
3406
                                     errors.ECODE_INVAL)
3407
        if existing_node.primary_ip != primary_ip:
3408
          self.changed_primary_ip = True
3409

    
3410
        continue
3411

    
3412
      if (existing_node.primary_ip == primary_ip or
3413
          existing_node.secondary_ip == primary_ip or
3414
          existing_node.primary_ip == secondary_ip or
3415
          existing_node.secondary_ip == secondary_ip):
3416
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3417
                                   " existing node %s" % existing_node.name,
3418
                                   errors.ECODE_NOTUNIQUE)
3419

    
3420
    # check that the type of the node (single versus dual homed) is the
3421
    # same as for the master
3422
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3423
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3424
    newbie_singlehomed = secondary_ip == primary_ip
3425
    if master_singlehomed != newbie_singlehomed:
3426
      if master_singlehomed:
3427
        raise errors.OpPrereqError("The master has no private ip but the"
3428
                                   " new node has one",
3429
                                   errors.ECODE_INVAL)
3430
      else:
3431
        raise errors.OpPrereqError("The master has a private ip but the"
3432
                                   " new node doesn't have one",
3433
                                   errors.ECODE_INVAL)
3434

    
3435
    # checks reachability
3436
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3437
      raise errors.OpPrereqError("Node not reachable by ping",
3438
                                 errors.ECODE_ENVIRON)
3439

    
3440
    if not newbie_singlehomed:
3441
      # check reachability from my secondary ip to newbie's secondary ip
3442
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3443
                           source=myself.secondary_ip):
3444
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3445
                                   " based ping to noded port",
3446
                                   errors.ECODE_ENVIRON)
3447

    
3448
    if self.op.readd:
3449
      exceptions = [node]
3450
    else:
3451
      exceptions = []
3452

    
3453
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3454

    
3455
    if self.op.readd:
3456
      self.new_node = self.cfg.GetNodeInfo(node)
3457
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3458
    else:
3459
      self.new_node = objects.Node(name=node,
3460
                                   primary_ip=primary_ip,
3461
                                   secondary_ip=secondary_ip,
3462
                                   master_candidate=self.master_candidate,
3463
                                   offline=False, drained=False)
3464

    
3465
  def Exec(self, feedback_fn):
3466
    """Adds the new node to the cluster.
3467

3468
    """
3469
    new_node = self.new_node
3470
    node = new_node.name
3471

    
3472
    # for re-adds, reset the offline/drained/master-candidate flags;
3473
    # we need to reset here, otherwise offline would prevent RPC calls
3474
    # later in the procedure; this also means that if the re-add
3475
    # fails, we are left with a non-offlined, broken node
3476
    if self.op.readd:
3477
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3478
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3479
      # if we demote the node, we do cleanup later in the procedure
3480
      new_node.master_candidate = self.master_candidate
3481
      if self.changed_primary_ip:
3482
        new_node.primary_ip = self.op.primary_ip
3483

    
3484
    # notify the user about any possible mc promotion
3485
    if new_node.master_candidate:
3486
      self.LogInfo("Node will be a master candidate")
3487

    
3488
    # check connectivity
3489
    result = self.rpc.call_version([node])[node]
3490
    result.Raise("Can't get version information from node %s" % node)
3491
    if constants.PROTOCOL_VERSION == result.payload:
3492
      logging.info("Communication to node %s fine, sw version %s match",
3493
                   node, result.payload)
3494
    else:
3495
      raise errors.OpExecError("Version mismatch master version %s,"
3496
                               " node version %s" %
3497
                               (constants.PROTOCOL_VERSION, result.payload))
3498

    
3499
    # setup ssh on node
3500
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3501
      logging.info("Copy ssh key to node %s", node)
3502
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3503
      keyarray = []
3504
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3505
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3506
                  priv_key, pub_key]
3507

    
3508
      for i in keyfiles:
3509
        keyarray.append(utils.ReadFile(i))
3510

    
3511
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3512
                                      keyarray[2], keyarray[3], keyarray[4],
3513
                                      keyarray[5])
3514
      result.Raise("Cannot transfer ssh keys to the new node")
3515

    
3516
    # Add node to our /etc/hosts, and add key to known_hosts
3517
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3518
      # FIXME: this should be done via an rpc call to node daemon
3519
      utils.AddHostToEtcHosts(new_node.name)
3520

    
3521
    if new_node.secondary_ip != new_node.primary_ip:
3522
      result = self.rpc.call_node_has_ip_address(new_node.name,
3523
                                                 new_node.secondary_ip)
3524
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3525
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3526
      if not result.payload:
3527
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3528
                                 " you gave (%s). Please fix and re-run this"
3529
                                 " command." % new_node.secondary_ip)
3530

    
3531
    node_verify_list = [self.cfg.GetMasterNode()]
3532
    node_verify_param = {
3533
      constants.NV_NODELIST: [node],
3534
      # TODO: do a node-net-test as well?
3535
    }
3536

    
3537
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3538
                                       self.cfg.GetClusterName())
3539
    for verifier in node_verify_list:
3540
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3541
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3542
      if nl_payload:
3543
        for failed in nl_payload:
3544
          feedback_fn("ssh/hostname verification failed"
3545
                      " (checking from %s): %s" %
3546
                      (verifier, nl_payload[failed]))
3547
        raise errors.OpExecError("ssh/hostname verification failed.")
3548

    
3549
    if self.op.readd:
3550
      _RedistributeAncillaryFiles(self)
3551
      self.context.ReaddNode(new_node)
3552
      # make sure we redistribute the config
3553
      self.cfg.Update(new_node, feedback_fn)
3554
      # and make sure the new node will not have old files around
3555
      if not new_node.master_candidate:
3556
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3557
        msg = result.fail_msg
3558
        if msg:
3559
          self.LogWarning("Node failed to demote itself from master"
3560
                          " candidate status: %s" % msg)
3561
    else:
3562
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3563
      self.context.AddNode(new_node, self.proc.GetECId())
3564

    
3565

    
3566
class LUSetNodeParams(LogicalUnit):
3567
  """Modifies the parameters of a node.
3568

3569
  """
3570
  HPATH = "node-modify"
3571
  HTYPE = constants.HTYPE_NODE
3572
  _OP_REQP = ["node_name"]
3573
  REQ_BGL = False
3574

    
3575
  def CheckArguments(self):
3576
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3577
    _CheckBooleanOpField(self.op, 'master_candidate')
3578
    _CheckBooleanOpField(self.op, 'offline')
3579
    _CheckBooleanOpField(self.op, 'drained')
3580
    _CheckBooleanOpField(self.op, 'auto_promote')
3581
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3582
    if all_mods.count(None) == 3:
3583
      raise errors.OpPrereqError("Please pass at least one modification",
3584
                                 errors.ECODE_INVAL)
3585
    if all_mods.count(True) > 1:
3586
      raise errors.OpPrereqError("Can't set the node into more than one"
3587
                                 " state at the same time",
3588
                                 errors.ECODE_INVAL)
3589

    
3590
    # Boolean value that tells us whether we're offlining or draining the node
3591
    self.offline_or_drain = (self.op.offline == True or
3592
                             self.op.drained == True)
3593
    self.deoffline_or_drain = (self.op.offline == False or
3594
                               self.op.drained == False)
3595
    self.might_demote = (self.op.master_candidate == False or
3596
                         self.offline_or_drain)
3597

    
3598
    self.lock_all = self.op.auto_promote and self.might_demote
3599

    
3600

    
3601
  def ExpandNames(self):
3602
    if self.lock_all:
3603
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3604
    else:
3605
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3606

    
3607
  def BuildHooksEnv(self):
3608
    """Build hooks env.
3609

3610
    This runs on the master node.
3611

3612
    """
3613
    env = {
3614
      "OP_TARGET": self.op.node_name,
3615
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3616
      "OFFLINE": str(self.op.offline),
3617
      "DRAINED": str(self.op.drained),
3618
      }
3619
    nl = [self.cfg.GetMasterNode(),
3620
          self.op.node_name]
3621
    return env, nl, nl
3622

    
3623
  def CheckPrereq(self):
3624
    """Check prerequisites.
3625

3626
    This only checks the instance list against the existing names.
3627

3628
    """
3629
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3630

    
3631
    if (self.op.master_candidate is not None or
3632
        self.op.drained is not None or
3633
        self.op.offline is not None):
3634
      # we can't change the master's node flags
3635
      if self.op.node_name == self.cfg.GetMasterNode():
3636
        raise errors.OpPrereqError("The master role can be changed"
3637
                                   " only via masterfailover",
3638
                                   errors.ECODE_INVAL)
3639

    
3640

    
3641
    if node.master_candidate and self.might_demote and not self.lock_all:
3642
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3643
      # check if after removing the current node, we're missing master
3644
      # candidates
3645
      (mc_remaining, mc_should, _) = \
3646
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3647
      if mc_remaining < mc_should:
3648
        raise errors.OpPrereqError("Not enough master candidates, please"
3649
                                   " pass auto_promote to allow promotion",
3650
                                   errors.ECODE_INVAL)
3651

    
3652
    if (self.op.master_candidate == True and
3653
        ((node.offline and not self.op.offline == False) or
3654
         (node.drained and not self.op.drained == False))):
3655
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3656
                                 " to master_candidate" % node.name,
3657
                                 errors.ECODE_INVAL)
3658

    
3659
    # If we're being deofflined/drained, we'll MC ourself if needed
3660
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3661
        self.op.master_candidate == True and not node.master_candidate):
3662
      self.op.master_candidate = _DecideSelfPromotion(self)
3663
      if self.op.master_candidate:
3664
        self.LogInfo("Autopromoting node to master candidate")
3665

    
3666
    return
3667

    
3668
  def Exec(self, feedback_fn):
3669
    """Modifies a node.
3670

3671
    """
3672
    node = self.node
3673

    
3674
    result = []
3675
    changed_mc = False
3676

    
3677
    if self.op.offline is not None:
3678
      node.offline = self.op.offline
3679
      result.append(("offline", str(self.op.offline)))
3680
      if self.op.offline == True:
3681
        if node.master_candidate:
3682
          node.master_candidate = False
3683
          changed_mc = True
3684
          result.append(("master_candidate", "auto-demotion due to offline"))
3685
        if node.drained:
3686
          node.drained = False
3687
          result.append(("drained", "clear drained status due to offline"))
3688

    
3689
    if self.op.master_candidate is not None:
3690
      node.master_candidate = self.op.master_candidate
3691
      changed_mc = True
3692
      result.append(("master_candidate", str(self.op.master_candidate)))
3693
      if self.op.master_candidate == False:
3694
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3695
        msg = rrc.fail_msg
3696
        if msg:
3697
          self.LogWarning("Node failed to demote itself: %s" % msg)
3698

    
3699
    if self.op.drained is not None:
3700
      node.drained = self.op.drained
3701
      result.append(("drained", str(self.op.drained)))
3702
      if self.op.drained == True:
3703
        if node.master_candidate:
3704
          node.master_candidate = False
3705
          changed_mc = True
3706
          result.append(("master_candidate", "auto-demotion due to drain"))
3707
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3708
          msg = rrc.fail_msg
3709
          if msg:
3710
            self.LogWarning("Node failed to demote itself: %s" % msg)
3711
        if node.offline:
3712
          node.offline = False
3713
          result.append(("offline", "clear offline status due to drain"))
3714

    
3715
    # we locked all nodes, we adjust the CP before updating this node
3716
    if self.lock_all:
3717
      _AdjustCandidatePool(self, [node.name])
3718

    
3719
    # this will trigger configuration file update, if needed
3720
    self.cfg.Update(node, feedback_fn)
3721

    
3722
    # this will trigger job queue propagation or cleanup
3723
    if changed_mc:
3724
      self.context.ReaddNode(node)
3725

    
3726
    return result
3727

    
3728

    
3729
class LUPowercycleNode(NoHooksLU):
3730
  """Powercycles a node.
3731

3732
  """
3733
  _OP_REQP = ["node_name", "force"]
3734
  REQ_BGL = False
3735

    
3736
  def CheckArguments(self):
3737
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3738
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3739
      raise errors.OpPrereqError("The node is the master and the force"
3740
                                 " parameter was not set",
3741
                                 errors.ECODE_INVAL)
3742

    
3743
  def ExpandNames(self):
3744
    """Locking for PowercycleNode.
3745

3746
    This is a last-resort option and shouldn't block on other
3747
    jobs. Therefore, we grab no locks.
3748

3749
    """
3750
    self.needed_locks = {}
3751

    
3752
  def CheckPrereq(self):
3753
    """Check prerequisites.
3754

3755
    This LU has no prereqs.
3756

3757
    """
3758
    pass
3759

    
3760
  def Exec(self, feedback_fn):
3761
    """Reboots a node.
3762

3763
    """
3764
    result = self.rpc.call_node_powercycle(self.op.node_name,
3765
                                           self.cfg.GetHypervisorType())
3766
    result.Raise("Failed to schedule the reboot")
3767
    return result.payload
3768

    
3769

    
3770
class LUQueryClusterInfo(NoHooksLU):
3771
  """Query cluster configuration.
3772

3773
  """
3774
  _OP_REQP = []
3775
  REQ_BGL = False
3776

    
3777
  def ExpandNames(self):
3778
    self.needed_locks = {}
3779

    
3780
  def CheckPrereq(self):
3781
    """No prerequsites needed for this LU.
3782

3783
    """
3784
    pass
3785

    
3786
  def Exec(self, feedback_fn):
3787
    """Return cluster config.
3788

3789
    """
3790
    cluster = self.cfg.GetClusterInfo()
3791
    os_hvp = {}
3792

    
3793
    # Filter just for enabled hypervisors
3794
    for os_name, hv_dict in cluster.os_hvp.items():
3795
      os_hvp[os_name] = {}
3796
      for hv_name, hv_params in hv_dict.items():
3797
        if hv_name in cluster.enabled_hypervisors:
3798
          os_hvp[os_name][hv_name] = hv_params
3799

    
3800
    result = {
3801
      "software_version": constants.RELEASE_VERSION,
3802
      "protocol_version": constants.PROTOCOL_VERSION,
3803
      "config_version": constants.CONFIG_VERSION,
3804
      "os_api_version": max(constants.OS_API_VERSIONS),
3805
      "export_version": constants.EXPORT_VERSION,
3806
      "architecture": (platform.architecture()[0], platform.machine()),
3807
      "name": cluster.cluster_name,
3808
      "master": cluster.master_node,
3809
      "default_hypervisor": cluster.enabled_hypervisors[0],
3810
      "enabled_hypervisors": cluster.enabled_hypervisors,
3811
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3812
                        for hypervisor_name in cluster.enabled_hypervisors]),
3813
      "os_hvp": os_hvp,
3814
      "beparams": cluster.beparams,
3815
      "osparams": cluster.osparams,
3816
      "nicparams": cluster.nicparams,
3817
      "candidate_pool_size": cluster.candidate_pool_size,
3818
      "master_netdev": cluster.master_netdev,
3819
      "volume_group_name": cluster.volume_group_name,
3820
      "file_storage_dir": cluster.file_storage_dir,
3821
      "maintain_node_health": cluster.maintain_node_health,
3822
      "ctime": cluster.ctime,
3823
      "mtime": cluster.mtime,
3824
      "uuid": cluster.uuid,
3825
      "tags": list(cluster.GetTags()),
3826
      "uid_pool": cluster.uid_pool,
3827
      }
3828

    
3829
    return result
3830

    
3831

    
3832
class LUQueryConfigValues(NoHooksLU):
3833
  """Return configuration values.
3834

3835
  """
3836
  _OP_REQP = []
3837
  REQ_BGL = False
3838
  _FIELDS_DYNAMIC = utils.FieldSet()
3839
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3840
                                  "watcher_pause")
3841

    
3842
  def ExpandNames(self):
3843
    self.needed_locks = {}
3844

    
3845
    _CheckOutputFields(static=self._FIELDS_STATIC,
3846
                       dynamic=self._FIELDS_DYNAMIC,
3847
                       selected=self.op.output_fields)
3848

    
3849
  def CheckPrereq(self):
3850
    """No prerequisites.
3851

3852
    """
3853
    pass
3854

    
3855
  def Exec(self, feedback_fn):
3856
    """Dump a representation of the cluster config to the standard output.
3857

3858
    """
3859
    values = []
3860
    for field in self.op.output_fields:
3861
      if field == "cluster_name":
3862
        entry = self.cfg.GetClusterName()
3863
      elif field == "master_node":
3864
        entry = self.cfg.GetMasterNode()
3865
      elif field == "drain_flag":
3866
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3867
      elif field == "watcher_pause":
3868
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3869
      else:
3870
        raise errors.ParameterError(field)
3871
      values.append(entry)
3872
    return values
3873

    
3874

    
3875
class LUActivateInstanceDisks(NoHooksLU):
3876
  """Bring up an instance's disks.
3877

3878
  """
3879
  _OP_REQP = ["instance_name"]
3880
  REQ_BGL = False
3881

    
3882
  def ExpandNames(self):
3883
    self._ExpandAndLockInstance()
3884
    self.needed_locks[locking.LEVEL_NODE] = []
3885
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3886

    
3887
  def DeclareLocks(self, level):
3888
    if level == locking.LEVEL_NODE:
3889
      self._LockInstancesNodes()
3890

    
3891
  def CheckPrereq(self):
3892
    """Check prerequisites.
3893

3894
    This checks that the instance is in the cluster.
3895

3896
    """
3897
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3898
    assert self.instance is not None, \
3899
      "Cannot retrieve locked instance %s" % self.op.instance_name
3900
    _CheckNodeOnline(self, self.instance.primary_node)
3901
    if not hasattr(self.op, "ignore_size"):
3902
      self.op.ignore_size = False
3903

    
3904
  def Exec(self, feedback_fn):
3905
    """Activate the disks.
3906

3907
    """
3908
    disks_ok, disks_info = \
3909
              _AssembleInstanceDisks(self, self.instance,
3910
                                     ignore_size=self.op.ignore_size)
3911
    if not disks_ok:
3912
      raise errors.OpExecError("Cannot activate block devices")
3913

    
3914
    return disks_info
3915

    
3916

    
3917
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
3918
                           ignore_size=False):
3919
  """Prepare the block devices for an instance.
3920

3921
  This sets up the block devices on all nodes.
3922

3923
  @type lu: L{LogicalUnit}
3924
  @param lu: the logical unit on whose behalf we execute
3925
  @type instance: L{objects.Instance}
3926
  @param instance: the instance for whose disks we assemble
3927
  @type disks: list of L{objects.Disk} or None
3928
  @param disks: which disks to assemble (or all, if None)
3929
  @type ignore_secondaries: boolean
3930
  @param ignore_secondaries: if true, errors on secondary nodes
3931
      won't result in an error return from the function
3932
  @type ignore_size: boolean
3933
  @param ignore_size: if true, the current known size of the disk
3934
      will not be used during the disk activation, useful for cases
3935
      when the size is wrong
3936
  @return: False if the operation failed, otherwise a list of
3937
      (host, instance_visible_name, node_visible_name)
3938
      with the mapping from node devices to instance devices
3939

3940
  """
3941
  device_info = []
3942
  disks_ok = True
3943
  iname = instance.name
3944
  disks = _ExpandCheckDisks(instance, disks)
3945

    
3946
  # With the two passes mechanism we try to reduce the window of
3947
  # opportunity for the race condition of switching DRBD to primary
3948
  # before handshaking occured, but we do not eliminate it
3949

    
3950
  # The proper fix would be to wait (with some limits) until the
3951
  # connection has been made and drbd transitions from WFConnection
3952
  # into any other network-connected state (Connected, SyncTarget,
3953
  # SyncSource, etc.)
3954

    
3955
  # 1st pass, assemble on all nodes in secondary mode
3956
  for inst_disk in disks:
3957
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3958
      if ignore_size:
3959
        node_disk = node_disk.Copy()
3960
        node_disk.UnsetSize()
3961
      lu.cfg.SetDiskID(node_disk, node)
3962
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3963
      msg = result.fail_msg
3964
      if msg:
3965
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3966
                           " (is_primary=False, pass=1): %s",
3967
                           inst_disk.iv_name, node, msg)
3968
        if not ignore_secondaries:
3969
          disks_ok = False
3970

    
3971
  # FIXME: race condition on drbd migration to primary
3972

    
3973
  # 2nd pass, do only the primary node
3974
  for inst_disk in disks:
3975
    dev_path = None
3976

    
3977
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3978
      if node != instance.primary_node:
3979
        continue
3980
      if ignore_size:
3981
        node_disk = node_disk.Copy()
3982
        node_disk.UnsetSize()
3983
      lu.cfg.SetDiskID(node_disk, node)
3984
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3985
      msg = result.fail_msg
3986
      if msg:
3987
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3988
                           " (is_primary=True, pass=2): %s",
3989
                           inst_disk.iv_name, node, msg)
3990
        disks_ok = False
3991
      else:
3992
        dev_path = result.payload
3993

    
3994
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3995

    
3996
  # leave the disks configured for the primary node
3997
  # this is a workaround that would be fixed better by
3998
  # improving the logical/physical id handling
3999
  for disk in disks:
4000
    lu.cfg.SetDiskID(disk, instance.primary_node)
4001

    
4002
  return disks_ok, device_info
4003

    
4004

    
4005
def _StartInstanceDisks(lu, instance, force):
4006
  """Start the disks of an instance.
4007

4008
  """
4009
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4010
                                           ignore_secondaries=force)
4011
  if not disks_ok:
4012
    _ShutdownInstanceDisks(lu, instance)
4013
    if force is not None and not force:
4014
      lu.proc.LogWarning("", hint="If the message above refers to a"
4015
                         " secondary node,"
4016
                         " you can retry the operation using '--force'.")
4017
    raise errors.OpExecError("Disk consistency error")
4018

    
4019

    
4020
class LUDeactivateInstanceDisks(NoHooksLU):
4021
  """Shutdown an instance's disks.
4022

4023
  """
4024
  _OP_REQP = ["instance_name"]
4025
  REQ_BGL = False
4026

    
4027
  def ExpandNames(self):
4028
    self._ExpandAndLockInstance()
4029
    self.needed_locks[locking.LEVEL_NODE] = []
4030
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4031

    
4032
  def DeclareLocks(self, level):
4033
    if level == locking.LEVEL_NODE:
4034
      self._LockInstancesNodes()
4035

    
4036
  def CheckPrereq(self):
4037
    """Check prerequisites.
4038

4039
    This checks that the instance is in the cluster.
4040

4041
    """
4042
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4043
    assert self.instance is not None, \
4044
      "Cannot retrieve locked instance %s" % self.op.instance_name
4045

    
4046
  def Exec(self, feedback_fn):
4047
    """Deactivate the disks
4048

4049
    """
4050
    instance = self.instance
4051
    _SafeShutdownInstanceDisks(self, instance)
4052

    
4053

    
4054
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4055
  """Shutdown block devices of an instance.
4056

4057
  This function checks if an instance is running, before calling
4058
  _ShutdownInstanceDisks.
4059

4060
  """
4061
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4062
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4063

    
4064

    
4065
def _ExpandCheckDisks(instance, disks):
4066
  """Return the instance disks selected by the disks list
4067

4068
  @type disks: list of L{objects.Disk} or None
4069
  @param disks: selected disks
4070
  @rtype: list of L{objects.Disk}
4071
  @return: selected instance disks to act on
4072

4073
  """
4074
  if disks is None:
4075
    return instance.disks
4076
  else:
4077
    if not set(disks).issubset(instance.disks):
4078
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4079
                                   " target instance")
4080
    return disks
4081

    
4082

    
4083
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4084
  """Shutdown block devices of an instance.
4085

4086
  This does the shutdown on all nodes of the instance.
4087

4088
  If the ignore_primary is false, errors on the primary node are
4089
  ignored.
4090

4091
  """
4092
  all_result = True
4093
  disks = _ExpandCheckDisks(instance, disks)
4094

    
4095
  for disk in disks:
4096
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4097
      lu.cfg.SetDiskID(top_disk, node)
4098
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4099
      msg = result.fail_msg
4100
      if msg:
4101
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4102
                      disk.iv_name, node, msg)
4103
        if not ignore_primary or node != instance.primary_node:
4104
          all_result = False
4105
  return all_result
4106

    
4107

    
4108
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4109
  """Checks if a node has enough free memory.
4110

4111
  This function check if a given node has the needed amount of free
4112
  memory. In case the node has less memory or we cannot get the
4113
  information from the node, this function raise an OpPrereqError
4114
  exception.
4115

4116
  @type lu: C{LogicalUnit}
4117
  @param lu: a logical unit from which we get configuration data
4118
  @type node: C{str}
4119
  @param node: the node to check
4120
  @type reason: C{str}
4121
  @param reason: string to use in the error message
4122
  @type requested: C{int}
4123
  @param requested: the amount of memory in MiB to check for
4124
  @type hypervisor_name: C{str}
4125
  @param hypervisor_name: the hypervisor to ask for memory stats
4126
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4127
      we cannot check the node
4128

4129
  """
4130
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4131
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4132
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4133
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4134
  if not isinstance(free_mem, int):
4135
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4136
                               " was '%s'" % (node, free_mem),
4137
                               errors.ECODE_ENVIRON)
4138
  if requested > free_mem:
4139
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4140
                               " needed %s MiB, available %s MiB" %
4141
                               (node, reason, requested, free_mem),
4142
                               errors.ECODE_NORES)
4143

    
4144

    
4145
def _CheckNodesFreeDisk(lu, nodenames, requested):
4146
  """Checks if nodes have enough free disk space in the default VG.
4147

4148
  This function check if all given nodes have the needed amount of
4149
  free disk. In case any node has less disk or we cannot get the
4150
  information from the node, this function raise an OpPrereqError
4151
  exception.
4152

4153
  @type lu: C{LogicalUnit}
4154
  @param lu: a logical unit from which we get configuration data
4155
  @type nodenames: C{list}
4156
  @param nodenames: the list of node names to check
4157
  @type requested: C{int}
4158
  @param requested: the amount of disk in MiB to check for
4159
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4160
      we cannot check the node
4161

4162
  """
4163
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4164
                                   lu.cfg.GetHypervisorType())
4165
  for node in nodenames:
4166
    info = nodeinfo[node]
4167
    info.Raise("Cannot get current information from node %s" % node,
4168
               prereq=True, ecode=errors.ECODE_ENVIRON)
4169
    vg_free = info.payload.get("vg_free", None)
4170
    if not isinstance(vg_free, int):
4171
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4172
                                 " result was '%s'" % (node, vg_free),
4173
                                 errors.ECODE_ENVIRON)
4174
    if requested > vg_free:
4175
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4176
                                 " required %d MiB, available %d MiB" %
4177
                                 (node, requested, vg_free),
4178
                                 errors.ECODE_NORES)
4179

    
4180

    
4181
class LUStartupInstance(LogicalUnit):
4182
  """Starts an instance.
4183

4184
  """
4185
  HPATH = "instance-start"
4186
  HTYPE = constants.HTYPE_INSTANCE
4187
  _OP_REQP = ["instance_name", "force"]
4188
  REQ_BGL = False
4189

    
4190
  def ExpandNames(self):
4191
    self._ExpandAndLockInstance()
4192

    
4193
  def BuildHooksEnv(self):
4194
    """Build hooks env.
4195

4196
    This runs on master, primary and secondary nodes of the instance.
4197

4198
    """
4199
    env = {
4200
      "FORCE": self.op.force,
4201
      }
4202
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4203
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4204
    return env, nl, nl
4205

    
4206
  def CheckPrereq(self):
4207
    """Check prerequisites.
4208

4209
    This checks that the instance is in the cluster.
4210

4211
    """
4212
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4213
    assert self.instance is not None, \
4214
      "Cannot retrieve locked instance %s" % self.op.instance_name
4215

    
4216
    # extra beparams
4217
    self.beparams = getattr(self.op, "beparams", {})
4218
    if self.beparams:
4219
      if not isinstance(self.beparams, dict):
4220
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4221
                                   " dict" % (type(self.beparams), ),
4222
                                   errors.ECODE_INVAL)
4223
      # fill the beparams dict
4224
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4225
      self.op.beparams = self.beparams
4226

    
4227
    # extra hvparams
4228
    self.hvparams = getattr(self.op, "hvparams", {})
4229
    if self.hvparams:
4230
      if not isinstance(self.hvparams, dict):
4231
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4232
                                   " dict" % (type(self.hvparams), ),
4233
                                   errors.ECODE_INVAL)
4234

    
4235
      # check hypervisor parameter syntax (locally)
4236
      cluster = self.cfg.GetClusterInfo()
4237
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4238
      filled_hvp = cluster.FillHV(instance)
4239
      filled_hvp.update(self.hvparams)
4240
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4241
      hv_type.CheckParameterSyntax(filled_hvp)
4242
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4243
      self.op.hvparams = self.hvparams
4244

    
4245
    _CheckNodeOnline(self, instance.primary_node)
4246

    
4247
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4248
    # check bridges existence
4249
    _CheckInstanceBridgesExist(self, instance)
4250

    
4251
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4252
                                              instance.name,
4253
                                              instance.hypervisor)
4254
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4255
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4256
    if not remote_info.payload: # not running already
4257
      _CheckNodeFreeMemory(self, instance.primary_node,
4258
                           "starting instance %s" % instance.name,
4259
                           bep[constants.BE_MEMORY], instance.hypervisor)
4260

    
4261
  def Exec(self, feedback_fn):
4262
    """Start the instance.
4263

4264
    """
4265
    instance = self.instance
4266
    force = self.op.force
4267

    
4268
    self.cfg.MarkInstanceUp(instance.name)
4269

    
4270
    node_current = instance.primary_node
4271

    
4272
    _StartInstanceDisks(self, instance, force)
4273

    
4274
    result = self.rpc.call_instance_start(node_current, instance,
4275
                                          self.hvparams, self.beparams)
4276
    msg = result.fail_msg
4277
    if msg:
4278
      _ShutdownInstanceDisks(self, instance)
4279
      raise errors.OpExecError("Could not start instance: %s" % msg)
4280

    
4281

    
4282
class LURebootInstance(LogicalUnit):
4283
  """Reboot an instance.
4284

4285
  """
4286
  HPATH = "instance-reboot"
4287
  HTYPE = constants.HTYPE_INSTANCE
4288
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4289
  REQ_BGL = False
4290

    
4291
  def CheckArguments(self):
4292
    """Check the arguments.
4293

4294
    """
4295
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4296
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4297

    
4298
  def ExpandNames(self):
4299
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4300
                                   constants.INSTANCE_REBOOT_HARD,
4301
                                   constants.INSTANCE_REBOOT_FULL]:
4302
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4303
                                  (constants.INSTANCE_REBOOT_SOFT,
4304
                                   constants.INSTANCE_REBOOT_HARD,
4305
                                   constants.INSTANCE_REBOOT_FULL))
4306
    self._ExpandAndLockInstance()
4307

    
4308
  def BuildHooksEnv(self):
4309
    """Build hooks env.
4310

4311
    This runs on master, primary and secondary nodes of the instance.
4312

4313
    """
4314
    env = {
4315
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4316
      "REBOOT_TYPE": self.op.reboot_type,
4317
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4318
      }
4319
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4320
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4321
    return env, nl, nl
4322

    
4323
  def CheckPrereq(self):
4324
    """Check prerequisites.
4325

4326
    This checks that the instance is in the cluster.
4327

4328
    """
4329
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4330
    assert self.instance is not None, \
4331
      "Cannot retrieve locked instance %s" % self.op.instance_name
4332

    
4333
    _CheckNodeOnline(self, instance.primary_node)
4334

    
4335
    # check bridges existence
4336
    _CheckInstanceBridgesExist(self, instance)
4337

    
4338
  def Exec(self, feedback_fn):
4339
    """Reboot the instance.
4340

4341
    """
4342
    instance = self.instance
4343
    ignore_secondaries = self.op.ignore_secondaries
4344
    reboot_type = self.op.reboot_type
4345

    
4346
    node_current = instance.primary_node
4347

    
4348
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4349
                       constants.INSTANCE_REBOOT_HARD]:
4350
      for disk in instance.disks:
4351
        self.cfg.SetDiskID(disk, node_current)
4352
      result = self.rpc.call_instance_reboot(node_current, instance,
4353
                                             reboot_type,
4354
                                             self.shutdown_timeout)
4355
      result.Raise("Could not reboot instance")
4356
    else:
4357
      result = self.rpc.call_instance_shutdown(node_current, instance,
4358
                                               self.shutdown_timeout)
4359
      result.Raise("Could not shutdown instance for full reboot")
4360
      _ShutdownInstanceDisks(self, instance)
4361
      _StartInstanceDisks(self, instance, ignore_secondaries)
4362
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4363
      msg = result.fail_msg
4364
      if msg:
4365
        _ShutdownInstanceDisks(self, instance)
4366
        raise errors.OpExecError("Could not start instance for"
4367
                                 " full reboot: %s" % msg)
4368

    
4369
    self.cfg.MarkInstanceUp(instance.name)
4370

    
4371

    
4372
class LUShutdownInstance(LogicalUnit):
4373
  """Shutdown an instance.
4374

4375
  """
4376
  HPATH = "instance-stop"
4377
  HTYPE = constants.HTYPE_INSTANCE
4378
  _OP_REQP = ["instance_name"]
4379
  REQ_BGL = False
4380

    
4381
  def CheckArguments(self):
4382
    """Check the arguments.
4383

4384
    """
4385
    self.timeout = getattr(self.op, "timeout",
4386
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
4387

    
4388
  def ExpandNames(self):
4389
    self._ExpandAndLockInstance()
4390

    
4391
  def BuildHooksEnv(self):
4392
    """Build hooks env.
4393

4394
    This runs on master, primary and secondary nodes of the instance.
4395

4396
    """
4397
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4398
    env["TIMEOUT"] = self.timeout
4399
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4400
    return env, nl, nl
4401

    
4402
  def CheckPrereq(self):
4403
    """Check prerequisites.
4404

4405
    This checks that the instance is in the cluster.
4406

4407
    """
4408
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4409
    assert self.instance is not None, \
4410
      "Cannot retrieve locked instance %s" % self.op.instance_name
4411
    _CheckNodeOnline(self, self.instance.primary_node)
4412

    
4413
  def Exec(self, feedback_fn):
4414
    """Shutdown the instance.
4415

4416
    """
4417
    instance = self.instance
4418
    node_current = instance.primary_node
4419
    timeout = self.timeout
4420
    self.cfg.MarkInstanceDown(instance.name)
4421
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4422
    msg = result.fail_msg
4423
    if msg:
4424
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4425

    
4426
    _ShutdownInstanceDisks(self, instance)
4427

    
4428

    
4429
class LUReinstallInstance(LogicalUnit):
4430
  """Reinstall an instance.
4431

4432
  """
4433
  HPATH = "instance-reinstall"
4434
  HTYPE = constants.HTYPE_INSTANCE
4435
  _OP_REQP = ["instance_name"]
4436
  REQ_BGL = False
4437

    
4438
  def ExpandNames(self):
4439
    self._ExpandAndLockInstance()
4440

    
4441
  def BuildHooksEnv(self):
4442
    """Build hooks env.
4443

4444
    This runs on master, primary and secondary nodes of the instance.
4445

4446
    """
4447
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4448
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4449
    return env, nl, nl
4450

    
4451
  def CheckPrereq(self):
4452
    """Check prerequisites.
4453

4454
    This checks that the instance is in the cluster and is not running.
4455

4456
    """
4457
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4458
    assert instance is not None, \
4459
      "Cannot retrieve locked instance %s" % self.op.instance_name
4460
    _CheckNodeOnline(self, instance.primary_node)
4461

    
4462
    if instance.disk_template == constants.DT_DISKLESS:
4463
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4464
                                 self.op.instance_name,
4465
                                 errors.ECODE_INVAL)
4466
    _CheckInstanceDown(self, instance, "cannot reinstall")
4467

    
4468
    self.op.os_type = getattr(self.op, "os_type", None)
4469
    self.op.force_variant = getattr(self.op, "force_variant", False)
4470
    if self.op.os_type is not None:
4471
      # OS verification
4472
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4473
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4474

    
4475
    self.instance = instance
4476

    
4477
  def Exec(self, feedback_fn):
4478
    """Reinstall the instance.
4479

4480
    """
4481
    inst = self.instance
4482

    
4483
    if self.op.os_type is not None:
4484
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4485
      inst.os = self.op.os_type
4486
      self.cfg.Update(inst, feedback_fn)
4487

    
4488
    _StartInstanceDisks(self, inst, None)
4489
    try:
4490
      feedback_fn("Running the instance OS create scripts...")
4491
      # FIXME: pass debug option from opcode to backend
4492
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4493
                                             self.op.debug_level)
4494
      result.Raise("Could not install OS for instance %s on node %s" %
4495
                   (inst.name, inst.primary_node))
4496
    finally:
4497
      _ShutdownInstanceDisks(self, inst)
4498

    
4499

    
4500
class LURecreateInstanceDisks(LogicalUnit):
4501
  """Recreate an instance's missing disks.
4502

4503
  """
4504
  HPATH = "instance-recreate-disks"
4505
  HTYPE = constants.HTYPE_INSTANCE
4506
  _OP_REQP = ["instance_name", "disks"]
4507
  REQ_BGL = False
4508

    
4509
  def CheckArguments(self):
4510
    """Check the arguments.
4511

4512
    """
4513
    if not isinstance(self.op.disks, list):
4514
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4515
    for item in self.op.disks:
4516
      if (not isinstance(item, int) or
4517
          item < 0):
4518
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4519
                                   str(item), errors.ECODE_INVAL)
4520

    
4521
  def ExpandNames(self):
4522
    self._ExpandAndLockInstance()
4523

    
4524
  def BuildHooksEnv(self):
4525
    """Build hooks env.
4526

4527
    This runs on master, primary and secondary nodes of the instance.
4528

4529
    """
4530
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4531
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4532
    return env, nl, nl
4533

    
4534
  def CheckPrereq(self):
4535
    """Check prerequisites.
4536

4537
    This checks that the instance is in the cluster and is not running.
4538

4539
    """
4540
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4541
    assert instance is not None, \
4542
      "Cannot retrieve locked instance %s" % self.op.instance_name
4543
    _CheckNodeOnline(self, instance.primary_node)
4544

    
4545
    if instance.disk_template == constants.DT_DISKLESS:
4546
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4547
                                 self.op.instance_name, errors.ECODE_INVAL)
4548
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4549

    
4550
    if not self.op.disks:
4551
      self.op.disks = range(len(instance.disks))
4552
    else:
4553
      for idx in self.op.disks:
4554
        if idx >= len(instance.disks):
4555
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4556
                                     errors.ECODE_INVAL)
4557

    
4558
    self.instance = instance
4559

    
4560
  def Exec(self, feedback_fn):
4561
    """Recreate the disks.
4562

4563
    """
4564
    to_skip = []
4565
    for idx, _ in enumerate(self.instance.disks):
4566
      if idx not in self.op.disks: # disk idx has not been passed in
4567
        to_skip.append(idx)
4568
        continue
4569

    
4570
    _CreateDisks(self, self.instance, to_skip=to_skip)
4571

    
4572

    
4573
class LURenameInstance(LogicalUnit):
4574
  """Rename an instance.
4575

4576
  """
4577
  HPATH = "instance-rename"
4578
  HTYPE = constants.HTYPE_INSTANCE
4579
  _OP_REQP = ["instance_name", "new_name"]
4580

    
4581
  def BuildHooksEnv(self):
4582
    """Build hooks env.
4583

4584
    This runs on master, primary and secondary nodes of the instance.
4585

4586
    """
4587
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4588
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4589
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4590
    return env, nl, nl
4591

    
4592
  def CheckPrereq(self):
4593
    """Check prerequisites.
4594

4595
    This checks that the instance is in the cluster and is not running.
4596

4597
    """
4598
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4599
                                                self.op.instance_name)
4600
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4601
    assert instance is not None
4602
    _CheckNodeOnline(self, instance.primary_node)
4603
    _CheckInstanceDown(self, instance, "cannot rename")
4604
    self.instance = instance
4605

    
4606
    # new name verification
4607
    name_info = utils.GetHostInfo(self.op.new_name)
4608

    
4609
    self.op.new_name = new_name = name_info.name
4610
    instance_list = self.cfg.GetInstanceList()
4611
    if new_name in instance_list:
4612
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4613
                                 new_name, errors.ECODE_EXISTS)
4614

    
4615
    if not getattr(self.op, "ignore_ip", False):
4616
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4617
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4618
                                   (name_info.ip, new_name),
4619
                                   errors.ECODE_NOTUNIQUE)
4620

    
4621

    
4622
  def Exec(self, feedback_fn):
4623
    """Reinstall the instance.
4624

4625
    """
4626
    inst = self.instance
4627
    old_name = inst.name
4628

    
4629
    if inst.disk_template == constants.DT_FILE:
4630
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4631

    
4632
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4633
    # Change the instance lock. This is definitely safe while we hold the BGL
4634
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4635
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4636

    
4637
    # re-read the instance from the configuration after rename
4638
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4639

    
4640
    if inst.disk_template == constants.DT_FILE:
4641
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4642
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4643
                                                     old_file_storage_dir,
4644
                                                     new_file_storage_dir)
4645
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4646
                   " (but the instance has been renamed in Ganeti)" %
4647
                   (inst.primary_node, old_file_storage_dir,
4648
                    new_file_storage_dir))
4649

    
4650
    _StartInstanceDisks(self, inst, None)
4651
    try:
4652
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4653
                                                 old_name, self.op.debug_level)
4654
      msg = result.fail_msg
4655
      if msg:
4656
        msg = ("Could not run OS rename script for instance %s on node %s"
4657
               " (but the instance has been renamed in Ganeti): %s" %
4658
               (inst.name, inst.primary_node, msg))
4659
        self.proc.LogWarning(msg)
4660
    finally:
4661
      _ShutdownInstanceDisks(self, inst)
4662

    
4663

    
4664
class LURemoveInstance(LogicalUnit):
4665
  """Remove an instance.
4666

4667
  """
4668
  HPATH = "instance-remove"
4669
  HTYPE = constants.HTYPE_INSTANCE
4670
  _OP_REQP = ["instance_name", "ignore_failures"]
4671
  REQ_BGL = False
4672

    
4673
  def CheckArguments(self):
4674
    """Check the arguments.
4675

4676
    """
4677
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4678
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4679

    
4680
  def ExpandNames(self):
4681
    self._ExpandAndLockInstance()
4682
    self.needed_locks[locking.LEVEL_NODE] = []
4683
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4684

    
4685
  def DeclareLocks(self, level):
4686
    if level == locking.LEVEL_NODE:
4687
      self._LockInstancesNodes()
4688

    
4689
  def BuildHooksEnv(self):
4690
    """Build hooks env.
4691

4692
    This runs on master, primary and secondary nodes of the instance.
4693

4694
    """
4695
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4696
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4697
    nl = [self.cfg.GetMasterNode()]
4698
    nl_post = list(self.instance.all_nodes) + nl
4699
    return env, nl, nl_post
4700

    
4701
  def CheckPrereq(self):
4702
    """Check prerequisites.
4703

4704
    This checks that the instance is in the cluster.
4705

4706
    """
4707
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4708
    assert self.instance is not None, \
4709
      "Cannot retrieve locked instance %s" % self.op.instance_name
4710

    
4711
  def Exec(self, feedback_fn):
4712
    """Remove the instance.
4713

4714
    """
4715
    instance = self.instance
4716
    logging.info("Shutting down instance %s on node %s",
4717
                 instance.name, instance.primary_node)
4718

    
4719
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4720
                                             self.shutdown_timeout)
4721
    msg = result.fail_msg
4722
    if msg:
4723
      if self.op.ignore_failures:
4724
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4725
      else:
4726
        raise errors.OpExecError("Could not shutdown instance %s on"
4727
                                 " node %s: %s" %
4728
                                 (instance.name, instance.primary_node, msg))
4729

    
4730
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4731

    
4732

    
4733
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4734
  """Utility function to remove an instance.
4735

4736
  """
4737
  logging.info("Removing block devices for instance %s", instance.name)
4738

    
4739
  if not _RemoveDisks(lu, instance):
4740
    if not ignore_failures:
4741
      raise errors.OpExecError("Can't remove instance's disks")
4742
    feedback_fn("Warning: can't remove instance's disks")
4743

    
4744
  logging.info("Removing instance %s out of cluster config", instance.name)
4745

    
4746
  lu.cfg.RemoveInstance(instance.name)
4747

    
4748
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4749
    "Instance lock removal conflict"
4750

    
4751
  # Remove lock for the instance
4752
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4753

    
4754

    
4755
class LUQueryInstances(NoHooksLU):
4756
  """Logical unit for querying instances.
4757

4758
  """
4759
  # pylint: disable-msg=W0142
4760
  _OP_REQP = ["output_fields", "names", "use_locking"]
4761
  REQ_BGL = False
4762
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4763
                    "serial_no", "ctime", "mtime", "uuid"]
4764
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4765
                                    "admin_state",
4766
                                    "disk_template", "ip", "mac", "bridge",
4767
                                    "nic_mode", "nic_link",
4768
                                    "sda_size", "sdb_size", "vcpus", "tags",
4769
                                    "network_port", "beparams",
4770
                                    r"(disk)\.(size)/([0-9]+)",
4771
                                    r"(disk)\.(sizes)", "disk_usage",
4772
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4773
                                    r"(nic)\.(bridge)/([0-9]+)",
4774
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4775
                                    r"(disk|nic)\.(count)",
4776
                                    "hvparams",
4777
                                    ] + _SIMPLE_FIELDS +
4778
                                  ["hv/%s" % name
4779
                                   for name in constants.HVS_PARAMETERS
4780
                                   if name not in constants.HVC_GLOBALS] +
4781
                                  ["be/%s" % name
4782
                                   for name in constants.BES_PARAMETERS])
4783
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4784

    
4785

    
4786
  def ExpandNames(self):
4787
    _CheckOutputFields(static=self._FIELDS_STATIC,
4788
                       dynamic=self._FIELDS_DYNAMIC,
4789
                       selected=self.op.output_fields)
4790

    
4791
    self.needed_locks = {}
4792
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4793
    self.share_locks[locking.LEVEL_NODE] = 1
4794

    
4795
    if self.op.names:
4796
      self.wanted = _GetWantedInstances(self, self.op.names)
4797
    else:
4798
      self.wanted = locking.ALL_SET
4799

    
4800
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4801
    self.do_locking = self.do_node_query and self.op.use_locking
4802
    if self.do_locking:
4803
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4804
      self.needed_locks[locking.LEVEL_NODE] = []
4805
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4806

    
4807
  def DeclareLocks(self, level):
4808
    if level == locking.LEVEL_NODE and self.do_locking:
4809
      self._LockInstancesNodes()
4810

    
4811
  def CheckPrereq(self):
4812
    """Check prerequisites.
4813

4814
    """
4815
    pass
4816

    
4817
  def Exec(self, feedback_fn):
4818
    """Computes the list of nodes and their attributes.
4819

4820
    """
4821
    # pylint: disable-msg=R0912
4822
    # way too many branches here
4823
    all_info = self.cfg.GetAllInstancesInfo()
4824
    if self.wanted == locking.ALL_SET:
4825
      # caller didn't specify instance names, so ordering is not important
4826
      if self.do_locking:
4827
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4828
      else:
4829
        instance_names = all_info.keys()
4830
      instance_names = utils.NiceSort(instance_names)
4831
    else:
4832
      # caller did specify names, so we must keep the ordering
4833
      if self.do_locking:
4834
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4835
      else:
4836
        tgt_set = all_info.keys()
4837
      missing = set(self.wanted).difference(tgt_set)
4838
      if missing:
4839
        raise errors.OpExecError("Some instances were removed before"
4840
                                 " retrieving their data: %s" % missing)
4841
      instance_names = self.wanted
4842

    
4843
    instance_list = [all_info[iname] for iname in instance_names]
4844

    
4845
    # begin data gathering
4846

    
4847
    nodes = frozenset([inst.primary_node for inst in instance_list])
4848
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4849

    
4850
    bad_nodes = []
4851
    off_nodes = []
4852
    if self.do_node_query:
4853
      live_data = {}
4854
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4855
      for name in nodes:
4856
        result = node_data[name]
4857
        if result.offline:
4858
          # offline nodes will be in both lists
4859
          off_nodes.append(name)
4860
        if result.fail_msg:
4861
          bad_nodes.append(name)
4862
        else:
4863
          if result.payload:
4864
            live_data.update(result.payload)
4865
          # else no instance is alive
4866
    else:
4867
      live_data = dict([(name, {}) for name in instance_names])
4868

    
4869
    # end data gathering
4870

    
4871
    HVPREFIX = "hv/"
4872
    BEPREFIX = "be/"
4873
    output = []
4874
    cluster = self.cfg.GetClusterInfo()
4875
    for instance in instance_list:
4876
      iout = []
4877
      i_hv = cluster.FillHV(instance, skip_globals=True)
4878
      i_be = cluster.FillBE(instance)
4879
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
4880
      for field in self.op.output_fields:
4881
        st_match = self._FIELDS_STATIC.Matches(field)
4882
        if field in self._SIMPLE_FIELDS:
4883
          val = getattr(instance, field)
4884
        elif field == "pnode":
4885
          val = instance.primary_node
4886
        elif field == "snodes":
4887
          val = list(instance.secondary_nodes)
4888
        elif field == "admin_state":
4889
          val = instance.admin_up
4890
        elif field == "oper_state":
4891
          if instance.primary_node in bad_nodes:
4892
            val = None
4893
          else:
4894
            val = bool(live_data.get(instance.name))
4895
        elif field == "status":
4896
          if instance.primary_node in off_nodes:
4897
            val = "ERROR_nodeoffline"
4898
          elif instance.primary_node in bad_nodes:
4899
            val = "ERROR_nodedown"
4900
          else:
4901
            running = bool(live_data.get(instance.name))
4902
            if running:
4903
              if instance.admin_up:
4904
                val = "running"
4905
              else:
4906
                val = "ERROR_up"
4907
            else:
4908
              if instance.admin_up:
4909
                val = "ERROR_down"
4910
              else:
4911
                val = "ADMIN_down"
4912
        elif field == "oper_ram":
4913
          if instance.primary_node in bad_nodes:
4914
            val = None
4915
          elif instance.name in live_data:
4916
            val = live_data[instance.name].get("memory", "?")
4917
          else:
4918
            val = "-"
4919
        elif field == "vcpus":
4920
          val = i_be[constants.BE_VCPUS]
4921
        elif field == "disk_template":
4922
          val = instance.disk_template
4923
        elif field == "ip":
4924
          if instance.nics:
4925
            val = instance.nics[0].ip
4926
          else:
4927
            val = None
4928
        elif field == "nic_mode":
4929
          if instance.nics:
4930
            val = i_nicp[0][constants.NIC_MODE]
4931
          else:
4932
            val = None
4933
        elif field == "nic_link":
4934
          if instance.nics:
4935
            val = i_nicp[0][constants.NIC_LINK]
4936
          else:
4937
            val = None
4938
        elif field == "bridge":
4939
          if (instance.nics and
4940
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4941
            val = i_nicp[0][constants.NIC_LINK]
4942
          else:
4943
            val = None
4944
        elif field == "mac":
4945
          if instance.nics:
4946
            val = instance.nics[0].mac
4947
          else:
4948
            val = None
4949
        elif field == "sda_size" or field == "sdb_size":
4950
          idx = ord(field[2]) - ord('a')
4951
          try:
4952
            val = instance.FindDisk(idx).size
4953
          except errors.OpPrereqError:
4954
            val = None
4955
        elif field == "disk_usage": # total disk usage per node
4956
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4957
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4958
        elif field == "tags":
4959
          val = list(instance.GetTags())
4960
        elif field == "hvparams":
4961
          val = i_hv
4962
        elif (field.startswith(HVPREFIX) and
4963
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4964
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4965
          val = i_hv.get(field[len(HVPREFIX):], None)
4966
        elif field == "beparams":
4967
          val = i_be
4968
        elif (field.startswith(BEPREFIX) and
4969
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4970
          val = i_be.get(field[len(BEPREFIX):], None)
4971
        elif st_match and st_match.groups():
4972
          # matches a variable list
4973
          st_groups = st_match.groups()
4974
          if st_groups and st_groups[0] == "disk":
4975
            if st_groups[1] == "count":
4976
              val = len(instance.disks)
4977
            elif st_groups[1] == "sizes":
4978
              val = [disk.size for disk in instance.disks]
4979
            elif st_groups[1] == "size":
4980
              try:
4981
                val = instance.FindDisk(st_groups[2]).size
4982
              except errors.OpPrereqError:
4983
                val = None
4984
            else:
4985
              assert False, "Unhandled disk parameter"
4986
          elif st_groups[0] == "nic":
4987
            if st_groups[1] == "count":
4988
              val = len(instance.nics)
4989
            elif st_groups[1] == "macs":
4990
              val = [nic.mac for nic in instance.nics]
4991
            elif st_groups[1] == "ips":
4992
              val = [nic.ip for nic in instance.nics]
4993
            elif st_groups[1] == "modes":
4994
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4995
            elif st_groups[1] == "links":
4996
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4997
            elif st_groups[1] == "bridges":
4998
              val = []
4999
              for nicp in i_nicp:
5000
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5001
                  val.append(nicp[constants.NIC_LINK])
5002
                else:
5003
                  val.append(None)
5004
            else:
5005
              # index-based item
5006
              nic_idx = int(st_groups[2])
5007
              if nic_idx >= len(instance.nics):
5008
                val = None
5009
              else:
5010
                if st_groups[1] == "mac":
5011
                  val = instance.nics[nic_idx].mac
5012
                elif st_groups[1] == "ip":
5013
                  val = instance.nics[nic_idx].ip
5014
                elif st_groups[1] == "mode":
5015
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5016
                elif st_groups[1] == "link":
5017
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5018
                elif st_groups[1] == "bridge":
5019
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5020
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5021
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5022
                  else:
5023
                    val = None
5024
                else:
5025
                  assert False, "Unhandled NIC parameter"
5026
          else:
5027
            assert False, ("Declared but unhandled variable parameter '%s'" %
5028
                           field)
5029
        else:
5030
          assert False, "Declared but unhandled parameter '%s'" % field
5031
        iout.append(val)
5032
      output.append(iout)
5033

    
5034
    return output
5035

    
5036

    
5037
class LUFailoverInstance(LogicalUnit):
5038
  """Failover an instance.
5039

5040
  """
5041
  HPATH = "instance-failover"
5042
  HTYPE = constants.HTYPE_INSTANCE
5043
  _OP_REQP = ["instance_name", "ignore_consistency"]
5044
  REQ_BGL = False
5045

    
5046
  def CheckArguments(self):
5047
    """Check the arguments.
5048

5049
    """
5050
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5051
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
5052

    
5053
  def ExpandNames(self):
5054
    self._ExpandAndLockInstance()
5055
    self.needed_locks[locking.LEVEL_NODE] = []
5056
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5057

    
5058
  def DeclareLocks(self, level):
5059
    if level == locking.LEVEL_NODE:
5060
      self._LockInstancesNodes()
5061

    
5062
  def BuildHooksEnv(self):
5063
    """Build hooks env.
5064

5065
    This runs on master, primary and secondary nodes of the instance.
5066

5067
    """
5068
    instance = self.instance
5069
    source_node = instance.primary_node
5070
    target_node = instance.secondary_nodes[0]
5071
    env = {
5072
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5073
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5074
      "OLD_PRIMARY": source_node,
5075
      "OLD_SECONDARY": target_node,
5076
      "NEW_PRIMARY": target_node,
5077
      "NEW_SECONDARY": source_node,
5078
      }
5079
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5080
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5081
    nl_post = list(nl)
5082
    nl_post.append(source_node)
5083
    return env, nl, nl_post
5084

    
5085
  def CheckPrereq(self):
5086
    """Check prerequisites.
5087

5088
    This checks that the instance is in the cluster.
5089

5090
    """
5091
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5092
    assert self.instance is not None, \
5093
      "Cannot retrieve locked instance %s" % self.op.instance_name
5094

    
5095
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5096
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5097
      raise errors.OpPrereqError("Instance's disk layout is not"
5098
                                 " network mirrored, cannot failover.",
5099
                                 errors.ECODE_STATE)
5100

    
5101
    secondary_nodes = instance.secondary_nodes
5102
    if not secondary_nodes:
5103
      raise errors.ProgrammerError("no secondary node but using "
5104
                                   "a mirrored disk template")
5105

    
5106
    target_node = secondary_nodes[0]
5107
    _CheckNodeOnline(self, target_node)
5108
    _CheckNodeNotDrained(self, target_node)
5109
    if instance.admin_up:
5110
      # check memory requirements on the secondary node
5111
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5112
                           instance.name, bep[constants.BE_MEMORY],
5113
                           instance.hypervisor)
5114
    else:
5115
      self.LogInfo("Not checking memory on the secondary node as"
5116
                   " instance will not be started")
5117

    
5118
    # check bridge existance
5119
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5120

    
5121
  def Exec(self, feedback_fn):
5122
    """Failover an instance.
5123

5124
    The failover is done by shutting it down on its present node and
5125
    starting it on the secondary.
5126

5127
    """
5128
    instance = self.instance
5129

    
5130
    source_node = instance.primary_node
5131
    target_node = instance.secondary_nodes[0]
5132

    
5133
    if instance.admin_up:
5134
      feedback_fn("* checking disk consistency between source and target")
5135
      for dev in instance.disks:
5136
        # for drbd, these are drbd over lvm
5137
        if not _CheckDiskConsistency(self, dev, target_node, False):
5138
          if not self.op.ignore_consistency:
5139
            raise errors.OpExecError("Disk %s is degraded on target node,"
5140
                                     " aborting failover." % dev.iv_name)
5141
    else:
5142
      feedback_fn("* not checking disk consistency as instance is not running")
5143

    
5144
    feedback_fn("* shutting down instance on source node")
5145
    logging.info("Shutting down instance %s on node %s",
5146
                 instance.name, source_node)
5147

    
5148
    result = self.rpc.call_instance_shutdown(source_node, instance,
5149
                                             self.shutdown_timeout)
5150
    msg = result.fail_msg
5151
    if msg:
5152
      if self.op.ignore_consistency:
5153
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5154
                             " Proceeding anyway. Please make sure node"
5155
                             " %s is down. Error details: %s",
5156
                             instance.name, source_node, source_node, msg)
5157
      else:
5158
        raise errors.OpExecError("Could not shutdown instance %s on"
5159
                                 " node %s: %s" %
5160
                                 (instance.name, source_node, msg))
5161

    
5162
    feedback_fn("* deactivating the instance's disks on source node")
5163
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5164
      raise errors.OpExecError("Can't shut down the instance's disks.")
5165

    
5166
    instance.primary_node = target_node
5167
    # distribute new instance config to the other nodes
5168
    self.cfg.Update(instance, feedback_fn)
5169

    
5170
    # Only start the instance if it's marked as up
5171
    if instance.admin_up:
5172
      feedback_fn("* activating the instance's disks on target node")
5173
      logging.info("Starting instance %s on node %s",
5174
                   instance.name, target_node)
5175

    
5176
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5177
                                               ignore_secondaries=True)
5178
      if not disks_ok:
5179
        _ShutdownInstanceDisks(self, instance)
5180
        raise errors.OpExecError("Can't activate the instance's disks")
5181

    
5182
      feedback_fn("* starting the instance on the target node")
5183
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5184
      msg = result.fail_msg
5185
      if msg:
5186
        _ShutdownInstanceDisks(self, instance)
5187
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5188
                                 (instance.name, target_node, msg))
5189

    
5190

    
5191
class LUMigrateInstance(LogicalUnit):
5192
  """Migrate an instance.
5193

5194
  This is migration without shutting down, compared to the failover,
5195
  which is done with shutdown.
5196

5197
  """
5198
  HPATH = "instance-migrate"
5199
  HTYPE = constants.HTYPE_INSTANCE
5200
  _OP_REQP = ["instance_name", "live", "cleanup"]
5201

    
5202
  REQ_BGL = False
5203

    
5204
  def ExpandNames(self):
5205
    self._ExpandAndLockInstance()
5206

    
5207
    self.needed_locks[locking.LEVEL_NODE] = []
5208
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5209

    
5210
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5211
                                       self.op.live, self.op.cleanup)
5212
    self.tasklets = [self._migrater]
5213

    
5214
  def DeclareLocks(self, level):
5215
    if level == locking.LEVEL_NODE:
5216
      self._LockInstancesNodes()
5217

    
5218
  def BuildHooksEnv(self):
5219
    """Build hooks env.
5220

5221
    This runs on master, primary and secondary nodes of the instance.
5222

5223
    """
5224
    instance = self._migrater.instance
5225
    source_node = instance.primary_node
5226
    target_node = instance.secondary_nodes[0]
5227
    env = _BuildInstanceHookEnvByObject(self, instance)
5228
    env["MIGRATE_LIVE"] = self.op.live
5229
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5230
    env.update({
5231
        "OLD_PRIMARY": source_node,
5232
        "OLD_SECONDARY": target_node,
5233
        "NEW_PRIMARY": target_node,
5234
        "NEW_SECONDARY": source_node,
5235
        })
5236
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5237
    nl_post = list(nl)
5238
    nl_post.append(source_node)
5239
    return env, nl, nl_post
5240

    
5241

    
5242
class LUMoveInstance(LogicalUnit):
5243
  """Move an instance by data-copying.
5244

5245
  """
5246
  HPATH = "instance-move"
5247
  HTYPE = constants.HTYPE_INSTANCE
5248
  _OP_REQP = ["instance_name", "target_node"]
5249
  REQ_BGL = False
5250

    
5251
  def CheckArguments(self):
5252
    """Check the arguments.
5253

5254
    """
5255
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5256
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
5257

    
5258
  def ExpandNames(self):
5259
    self._ExpandAndLockInstance()
5260
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5261
    self.op.target_node = target_node
5262
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5263
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5264

    
5265
  def DeclareLocks(self, level):
5266
    if level == locking.LEVEL_NODE:
5267
      self._LockInstancesNodes(primary_only=True)
5268

    
5269
  def BuildHooksEnv(self):
5270
    """Build hooks env.
5271

5272
    This runs on master, primary and secondary nodes of the instance.
5273

5274
    """
5275
    env = {
5276
      "TARGET_NODE": self.op.target_node,
5277
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5278
      }
5279
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5280
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5281
                                       self.op.target_node]
5282
    return env, nl, nl
5283

    
5284
  def CheckPrereq(self):
5285
    """Check prerequisites.
5286

5287
    This checks that the instance is in the cluster.
5288

5289
    """
5290
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5291
    assert self.instance is not None, \
5292
      "Cannot retrieve locked instance %s" % self.op.instance_name
5293

    
5294
    node = self.cfg.GetNodeInfo(self.op.target_node)
5295
    assert node is not None, \
5296
      "Cannot retrieve locked node %s" % self.op.target_node
5297

    
5298
    self.target_node = target_node = node.name
5299

    
5300
    if target_node == instance.primary_node:
5301
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5302
                                 (instance.name, target_node),
5303
                                 errors.ECODE_STATE)
5304

    
5305
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5306

    
5307
    for idx, dsk in enumerate(instance.disks):
5308
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5309
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5310
                                   " cannot copy" % idx, errors.ECODE_STATE)
5311

    
5312
    _CheckNodeOnline(self, target_node)
5313
    _CheckNodeNotDrained(self, target_node)
5314

    
5315
    if instance.admin_up:
5316
      # check memory requirements on the secondary node
5317
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5318
                           instance.name, bep[constants.BE_MEMORY],
5319
                           instance.hypervisor)
5320
    else:
5321
      self.LogInfo("Not checking memory on the secondary node as"
5322
                   " instance will not be started")
5323

    
5324
    # check bridge existance
5325
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5326

    
5327
  def Exec(self, feedback_fn):
5328
    """Move an instance.
5329

5330
    The move is done by shutting it down on its present node, copying
5331
    the data over (slow) and starting it on the new node.
5332

5333
    """
5334
    instance = self.instance
5335

    
5336
    source_node = instance.primary_node
5337
    target_node = self.target_node
5338

    
5339
    self.LogInfo("Shutting down instance %s on source node %s",
5340
                 instance.name, source_node)
5341

    
5342
    result = self.rpc.call_instance_shutdown(source_node, instance,
5343
                                             self.shutdown_timeout)
5344
    msg = result.fail_msg
5345
    if msg:
5346
      if self.op.ignore_consistency:
5347
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5348
                             " Proceeding anyway. Please make sure node"
5349
                             " %s is down. Error details: %s",
5350
                             instance.name, source_node, source_node, msg)
5351
      else:
5352
        raise errors.OpExecError("Could not shutdown instance %s on"
5353
                                 " node %s: %s" %
5354
                                 (instance.name, source_node, msg))
5355

    
5356
    # create the target disks
5357
    try:
5358
      _CreateDisks(self, instance, target_node=target_node)
5359
    except errors.OpExecError:
5360
      self.LogWarning("Device creation failed, reverting...")
5361
      try:
5362
        _RemoveDisks(self, instance, target_node=target_node)
5363
      finally:
5364
        self.cfg.ReleaseDRBDMinors(instance.name)
5365
        raise
5366

    
5367
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5368

    
5369
    errs = []
5370
    # activate, get path, copy the data over
5371
    for idx, disk in enumerate(instance.disks):
5372
      self.LogInfo("Copying data for disk %d", idx)
5373
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5374
                                               instance.name, True)
5375
      if result.fail_msg:
5376
        self.LogWarning("Can't assemble newly created disk %d: %s",
5377
                        idx, result.fail_msg)
5378
        errs.append(result.fail_msg)
5379
        break
5380
      dev_path = result.payload
5381
      result = self.rpc.call_blockdev_export(source_node, disk,
5382
                                             target_node, dev_path,
5383
                                             cluster_name)
5384
      if result.fail_msg:
5385
        self.LogWarning("Can't copy data over for disk %d: %s",
5386
                        idx, result.fail_msg)
5387
        errs.append(result.fail_msg)
5388
        break
5389

    
5390
    if errs:
5391
      self.LogWarning("Some disks failed to copy, aborting")
5392
      try:
5393
        _RemoveDisks(self, instance, target_node=target_node)
5394
      finally:
5395
        self.cfg.ReleaseDRBDMinors(instance.name)
5396
        raise errors.OpExecError("Errors during disk copy: %s" %
5397
                                 (",".join(errs),))
5398

    
5399
    instance.primary_node = target_node
5400
    self.cfg.Update(instance, feedback_fn)
5401

    
5402
    self.LogInfo("Removing the disks on the original node")
5403
    _RemoveDisks(self, instance, target_node=source_node)
5404

    
5405
    # Only start the instance if it's marked as up
5406
    if instance.admin_up:
5407
      self.LogInfo("Starting instance %s on node %s",
5408
                   instance.name, target_node)
5409

    
5410
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5411
                                           ignore_secondaries=True)
5412
      if not disks_ok:
5413
        _ShutdownInstanceDisks(self, instance)
5414
        raise errors.OpExecError("Can't activate the instance's disks")
5415

    
5416
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5417
      msg = result.fail_msg
5418
      if msg:
5419
        _ShutdownInstanceDisks(self, instance)
5420
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5421
                                 (instance.name, target_node, msg))
5422

    
5423

    
5424
class LUMigrateNode(LogicalUnit):
5425
  """Migrate all instances from a node.
5426

5427
  """
5428
  HPATH = "node-migrate"
5429
  HTYPE = constants.HTYPE_NODE
5430
  _OP_REQP = ["node_name", "live"]
5431
  REQ_BGL = False
5432

    
5433
  def ExpandNames(self):
5434
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5435

    
5436
    self.needed_locks = {
5437
      locking.LEVEL_NODE: [self.op.node_name],
5438
      }
5439

    
5440
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5441

    
5442
    # Create tasklets for migrating instances for all instances on this node
5443
    names = []
5444
    tasklets = []
5445

    
5446
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5447
      logging.debug("Migrating instance %s", inst.name)
5448
      names.append(inst.name)
5449

    
5450
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5451

    
5452
    self.tasklets = tasklets
5453

    
5454
    # Declare instance locks
5455
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5456

    
5457
  def DeclareLocks(self, level):
5458
    if level == locking.LEVEL_NODE:
5459
      self._LockInstancesNodes()
5460

    
5461
  def BuildHooksEnv(self):
5462
    """Build hooks env.
5463

5464
    This runs on the master, the primary and all the secondaries.
5465

5466
    """
5467
    env = {
5468
      "NODE_NAME": self.op.node_name,
5469
      }
5470

    
5471
    nl = [self.cfg.GetMasterNode()]
5472

    
5473
    return (env, nl, nl)
5474

    
5475

    
5476
class TLMigrateInstance(Tasklet):
5477
  def __init__(self, lu, instance_name, live, cleanup):
5478
    """Initializes this class.
5479

5480
    """
5481
    Tasklet.__init__(self, lu)
5482

    
5483
    # Parameters
5484
    self.instance_name = instance_name
5485
    self.live = live
5486
    self.cleanup = cleanup
5487

    
5488
  def CheckPrereq(self):
5489
    """Check prerequisites.
5490

5491
    This checks that the instance is in the cluster.
5492

5493
    """
5494
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5495
    instance = self.cfg.GetInstanceInfo(instance_name)
5496
    assert instance is not None
5497

    
5498
    if instance.disk_template != constants.DT_DRBD8:
5499
      raise errors.OpPrereqError("Instance's disk layout is not"
5500
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5501

    
5502
    secondary_nodes = instance.secondary_nodes
5503
    if not secondary_nodes:
5504
      raise errors.ConfigurationError("No secondary node but using"
5505
                                      " drbd8 disk template")
5506

    
5507
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5508

    
5509
    target_node = secondary_nodes[0]
5510
    # check memory requirements on the secondary node
5511
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5512
                         instance.name, i_be[constants.BE_MEMORY],
5513
                         instance.hypervisor)
5514

    
5515
    # check bridge existance
5516
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5517

    
5518
    if not self.cleanup:
5519
      _CheckNodeNotDrained(self.lu, target_node)
5520
      result = self.rpc.call_instance_migratable(instance.primary_node,
5521
                                                 instance)
5522
      result.Raise("Can't migrate, please use failover",
5523
                   prereq=True, ecode=errors.ECODE_STATE)
5524

    
5525
    self.instance = instance
5526

    
5527
  def _WaitUntilSync(self):
5528
    """Poll with custom rpc for disk sync.
5529

5530
    This uses our own step-based rpc call.
5531

5532
    """
5533
    self.feedback_fn("* wait until resync is done")
5534
    all_done = False
5535
    while not all_done:
5536
      all_done = True
5537
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5538
                                            self.nodes_ip,
5539
                                            self.instance.disks)
5540
      min_percent = 100
5541
      for node, nres in result.items():
5542
        nres.Raise("Cannot resync disks on node %s" % node)
5543
        node_done, node_percent = nres.payload
5544
        all_done = all_done and node_done
5545
        if node_percent is not None:
5546
          min_percent = min(min_percent, node_percent)
5547
      if not all_done:
5548
        if min_percent < 100:
5549
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5550
        time.sleep(2)
5551

    
5552
  def _EnsureSecondary(self, node):
5553
    """Demote a node to secondary.
5554

5555
    """
5556
    self.feedback_fn("* switching node %s to secondary mode" % node)
5557

    
5558
    for dev in self.instance.disks:
5559
      self.cfg.SetDiskID(dev, node)
5560

    
5561
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5562
                                          self.instance.disks)
5563
    result.Raise("Cannot change disk to secondary on node %s" % node)
5564

    
5565
  def _GoStandalone(self):
5566
    """Disconnect from the network.
5567

5568
    """
5569
    self.feedback_fn("* changing into standalone mode")
5570
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5571
                                               self.instance.disks)
5572
    for node, nres in result.items():
5573
      nres.Raise("Cannot disconnect disks node %s" % node)
5574

    
5575
  def _GoReconnect(self, multimaster):
5576
    """Reconnect to the network.
5577

5578
    """
5579
    if multimaster:
5580
      msg = "dual-master"
5581
    else:
5582
      msg = "single-master"
5583
    self.feedback_fn("* changing disks into %s mode" % msg)
5584
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5585
                                           self.instance.disks,
5586
                                           self.instance.name, multimaster)
5587
    for node, nres in result.items():
5588
      nres.Raise("Cannot change disks config on node %s" % node)
5589

    
5590
  def _ExecCleanup(self):
5591
    """Try to cleanup after a failed migration.
5592

5593
    The cleanup is done by:
5594
      - check that the instance is running only on one node
5595
        (and update the config if needed)
5596
      - change disks on its secondary node to secondary
5597
      - wait until disks are fully synchronized
5598
      - disconnect from the network
5599
      - change disks into single-master mode
5600
      - wait again until disks are fully synchronized
5601

5602
    """
5603
    instance = self.instance
5604
    target_node = self.target_node
5605
    source_node = self.source_node
5606

    
5607
    # check running on only one node
5608
    self.feedback_fn("* checking where the instance actually runs"
5609
                     " (if this hangs, the hypervisor might be in"
5610
                     " a bad state)")
5611
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5612
    for node, result in ins_l.items():
5613
      result.Raise("Can't contact node %s" % node)
5614

    
5615
    runningon_source = instance.name in ins_l[source_node].payload
5616
    runningon_target = instance.name in ins_l[target_node].payload
5617

    
5618
    if runningon_source and runningon_target:
5619
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5620
                               " or the hypervisor is confused. You will have"
5621
                               " to ensure manually that it runs only on one"
5622
                               " and restart this operation.")
5623

    
5624
    if not (runningon_source or runningon_target):
5625
      raise errors.OpExecError("Instance does not seem to be running at all."
5626
                               " In this case, it's safer to repair by"
5627
                               " running 'gnt-instance stop' to ensure disk"
5628
                               " shutdown, and then restarting it.")
5629

    
5630
    if runningon_target:
5631
      # the migration has actually succeeded, we need to update the config
5632
      self.feedback_fn("* instance running on secondary node (%s),"
5633
                       " updating config" % target_node)
5634
      instance.primary_node = target_node
5635
      self.cfg.Update(instance, self.feedback_fn)
5636
      demoted_node = source_node
5637
    else:
5638
      self.feedback_fn("* instance confirmed to be running on its"
5639
                       " primary node (%s)" % source_node)
5640
      demoted_node = target_node
5641

    
5642
    self._EnsureSecondary(demoted_node)
5643
    try:
5644
      self._WaitUntilSync()
5645
    except errors.OpExecError:
5646
      # we ignore here errors, since if the device is standalone, it
5647
      # won't be able to sync
5648
      pass
5649
    self._GoStandalone()
5650
    self._GoReconnect(False)
5651
    self._WaitUntilSync()
5652

    
5653
    self.feedback_fn("* done")
5654

    
5655
  def _RevertDiskStatus(self):
5656
    """Try to revert the disk status after a failed migration.
5657

5658
    """
5659
    target_node = self.target_node
5660
    try:
5661
      self._EnsureSecondary(target_node)
5662
      self._GoStandalone()
5663
      self._GoReconnect(False)
5664
      self._WaitUntilSync()
5665
    except errors.OpExecError, err:
5666
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5667
                         " drives: error '%s'\n"
5668
                         "Please look and recover the instance status" %
5669
                         str(err))
5670

    
5671
  def _AbortMigration(self):
5672
    """Call the hypervisor code to abort a started migration.
5673

5674
    """
5675
    instance = self.instance
5676
    target_node = self.target_node
5677
    migration_info = self.migration_info
5678

    
5679
    abort_result = self.rpc.call_finalize_migration(target_node,
5680
                                                    instance,
5681
                                                    migration_info,
5682
                                                    False)
5683
    abort_msg = abort_result.fail_msg
5684
    if abort_msg:
5685
      logging.error("Aborting migration failed on target node %s: %s",
5686
                    target_node, abort_msg)
5687
      # Don't raise an exception here, as we stil have to try to revert the
5688
      # disk status, even if this step failed.
5689

    
5690
  def _ExecMigration(self):
5691
    """Migrate an instance.
5692

5693
    The migrate is done by:
5694
      - change the disks into dual-master mode
5695
      - wait until disks are fully synchronized again
5696
      - migrate the instance
5697
      - change disks on the new secondary node (the old primary) to secondary
5698
      - wait until disks are fully synchronized
5699
      - change disks into single-master mode
5700

5701
    """
5702
    instance = self.instance
5703
    target_node = self.target_node
5704
    source_node = self.source_node
5705

    
5706
    self.feedback_fn("* checking disk consistency between source and target")
5707
    for dev in instance.disks:
5708
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5709
        raise errors.OpExecError("Disk %s is degraded or not fully"
5710
                                 " synchronized on target node,"
5711
                                 " aborting migrate." % dev.iv_name)
5712

    
5713
    # First get the migration information from the remote node
5714
    result = self.rpc.call_migration_info(source_node, instance)
5715
    msg = result.fail_msg
5716
    if msg:
5717
      log_err = ("Failed fetching source migration information from %s: %s" %
5718
                 (source_node, msg))
5719
      logging.error(log_err)
5720
      raise errors.OpExecError(log_err)
5721

    
5722
    self.migration_info = migration_info = result.payload
5723

    
5724
    # Then switch the disks to master/master mode
5725
    self._EnsureSecondary(target_node)
5726
    self._GoStandalone()
5727
    self._GoReconnect(True)
5728
    self._WaitUntilSync()
5729

    
5730
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5731
    result = self.rpc.call_accept_instance(target_node,
5732
                                           instance,
5733
                                           migration_info,
5734
                                           self.nodes_ip[target_node])
5735

    
5736
    msg = result.fail_msg
5737
    if msg:
5738
      logging.error("Instance pre-migration failed, trying to revert"
5739
                    " disk status: %s", msg)
5740
      self.feedback_fn("Pre-migration failed, aborting")
5741
      self._AbortMigration()
5742
      self._RevertDiskStatus()
5743
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5744
                               (instance.name, msg))
5745

    
5746
    self.feedback_fn("* migrating instance to %s" % target_node)
5747
    time.sleep(10)
5748
    result = self.rpc.call_instance_migrate(source_node, instance,
5749
                                            self.nodes_ip[target_node],
5750
                                            self.live)
5751
    msg = result.fail_msg
5752
    if msg:
5753
      logging.error("Instance migration failed, trying to revert"
5754
                    " disk status: %s", msg)
5755
      self.feedback_fn("Migration failed, aborting")
5756
      self._AbortMigration()
5757
      self._RevertDiskStatus()
5758
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5759
                               (instance.name, msg))
5760
    time.sleep(10)
5761

    
5762
    instance.primary_node = target_node
5763
    # distribute new instance config to the other nodes
5764
    self.cfg.Update(instance, self.feedback_fn)
5765

    
5766
    result = self.rpc.call_finalize_migration(target_node,
5767
                                              instance,
5768
                                              migration_info,
5769
                                              True)
5770
    msg = result.fail_msg
5771
    if msg:
5772
      logging.error("Instance migration succeeded, but finalization failed:"
5773
                    " %s", msg)
5774
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5775
                               msg)
5776

    
5777
    self._EnsureSecondary(source_node)
5778
    self._WaitUntilSync()
5779
    self._GoStandalone()
5780
    self._GoReconnect(False)
5781
    self._WaitUntilSync()
5782

    
5783
    self.feedback_fn("* done")
5784

    
5785
  def Exec(self, feedback_fn):
5786
    """Perform the migration.
5787

5788
    """
5789
    feedback_fn("Migrating instance %s" % self.instance.name)
5790

    
5791
    self.feedback_fn = feedback_fn
5792

    
5793
    self.source_node = self.instance.primary_node
5794
    self.target_node = self.instance.secondary_nodes[0]
5795
    self.all_nodes = [self.source_node, self.target_node]
5796
    self.nodes_ip = {
5797
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5798
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5799
      }
5800

    
5801
    if self.cleanup:
5802
      return self._ExecCleanup()
5803
    else:
5804
      return self._ExecMigration()
5805

    
5806

    
5807
def _CreateBlockDev(lu, node, instance, device, force_create,
5808
                    info, force_open):
5809
  """Create a tree of block devices on a given node.
5810

5811
  If this device type has to be created on secondaries, create it and
5812
  all its children.
5813

5814
  If not, just recurse to children keeping the same 'force' value.
5815

5816
  @param lu: the lu on whose behalf we execute
5817
  @param node: the node on which to create the device
5818
  @type instance: L{objects.Instance}
5819
  @param instance: the instance which owns the device
5820
  @type device: L{objects.Disk}
5821
  @param device: the device to create
5822
  @type force_create: boolean
5823
  @param force_create: whether to force creation of this device; this
5824
      will be change to True whenever we find a device which has
5825
      CreateOnSecondary() attribute
5826
  @param info: the extra 'metadata' we should attach to the device
5827
      (this will be represented as a LVM tag)
5828
  @type force_open: boolean
5829
  @param force_open: this parameter will be passes to the
5830
      L{backend.BlockdevCreate} function where it specifies
5831
      whether we run on primary or not, and it affects both
5832
      the child assembly and the device own Open() execution
5833

5834
  """
5835
  if device.CreateOnSecondary():
5836
    force_create = True
5837

    
5838
  if device.children:
5839
    for child in device.children:
5840
      _CreateBlockDev(lu, node, instance, child, force_create,
5841
                      info, force_open)
5842

    
5843
  if not force_create:
5844
    return
5845

    
5846
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5847

    
5848

    
5849
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5850
  """Create a single block device on a given node.
5851

5852
  This will not recurse over children of the device, so they must be
5853
  created in advance.
5854

5855
  @param lu: the lu on whose behalf we execute
5856
  @param node: the node on which to create the device
5857
  @type instance: L{objects.Instance}
5858
  @param instance: the instance which owns the device
5859
  @type device: L{objects.Disk}
5860
  @param device: the device to create
5861
  @param info: the extra 'metadata' we should attach to the device
5862
      (this will be represented as a LVM tag)
5863
  @type force_open: boolean
5864
  @param force_open: this parameter will be passes to the
5865
      L{backend.BlockdevCreate} function where it specifies
5866
      whether we run on primary or not, and it affects both
5867
      the child assembly and the device own Open() execution
5868

5869
  """
5870
  lu.cfg.SetDiskID(device, node)
5871
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5872
                                       instance.name, force_open, info)
5873
  result.Raise("Can't create block device %s on"
5874
               " node %s for instance %s" % (device, node, instance.name))
5875
  if device.physical_id is None:
5876
    device.physical_id = result.payload
5877

    
5878

    
5879
def _GenerateUniqueNames(lu, exts):
5880
  """Generate a suitable LV name.
5881

5882
  This will generate a logical volume name for the given instance.
5883

5884
  """
5885
  results = []
5886
  for val in exts:
5887
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5888
    results.append("%s%s" % (new_id, val))
5889
  return results
5890

    
5891

    
5892
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5893
                         p_minor, s_minor):
5894
  """Generate a drbd8 device complete with its children.
5895

5896
  """
5897
  port = lu.cfg.AllocatePort()
5898
  vgname = lu.cfg.GetVGName()
5899
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5900
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5901
                          logical_id=(vgname, names[0]))
5902
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5903
                          logical_id=(vgname, names[1]))
5904
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5905
                          logical_id=(primary, secondary, port,
5906
                                      p_minor, s_minor,
5907
                                      shared_secret),
5908
                          children=[dev_data, dev_meta],
5909
                          iv_name=iv_name)
5910
  return drbd_dev
5911

    
5912

    
5913
def _GenerateDiskTemplate(lu, template_name,
5914
                          instance_name, primary_node,
5915
                          secondary_nodes, disk_info,
5916
                          file_storage_dir, file_driver,
5917
                          base_index):
5918
  """Generate the entire disk layout for a given template type.
5919

5920
  """
5921
  #TODO: compute space requirements
5922

    
5923
  vgname = lu.cfg.GetVGName()
5924
  disk_count = len(disk_info)
5925
  disks = []
5926
  if template_name == constants.DT_DISKLESS:
5927
    pass
5928
  elif template_name == constants.DT_PLAIN:
5929
    if len(secondary_nodes) != 0:
5930
      raise errors.ProgrammerError("Wrong template configuration")
5931

    
5932
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5933
                                      for i in range(disk_count)])
5934
    for idx, disk in enumerate(disk_info):
5935
      disk_index = idx + base_index
5936
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5937
                              logical_id=(vgname, names[idx]),
5938
                              iv_name="disk/%d" % disk_index,
5939
                              mode=disk["mode"])
5940
      disks.append(disk_dev)
5941
  elif template_name == constants.DT_DRBD8:
5942
    if len(secondary_nodes) != 1:
5943
      raise errors.ProgrammerError("Wrong template configuration")
5944
    remote_node = secondary_nodes[0]
5945
    minors = lu.cfg.AllocateDRBDMinor(
5946
      [primary_node, remote_node] * len(disk_info), instance_name)
5947

    
5948
    names = []
5949
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5950
                                               for i in range(disk_count)]):
5951
      names.append(lv_prefix + "_data")
5952
      names.append(lv_prefix + "_meta")
5953
    for idx, disk in enumerate(disk_info):
5954
      disk_index = idx + base_index
5955
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5956
                                      disk["size"], names[idx*2:idx*2+2],
5957
                                      "disk/%d" % disk_index,
5958
                                      minors[idx*2], minors[idx*2+1])
5959
      disk_dev.mode = disk["mode"]
5960
      disks.append(disk_dev)
5961
  elif template_name == constants.DT_FILE:
5962
    if len(secondary_nodes) != 0:
5963
      raise errors.ProgrammerError("Wrong template configuration")
5964

    
5965
    _RequireFileStorage()
5966

    
5967
    for idx, disk in enumerate(disk_info):
5968
      disk_index = idx + base_index
5969
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5970
                              iv_name="disk/%d" % disk_index,
5971
                              logical_id=(file_driver,
5972
                                          "%s/disk%d" % (file_storage_dir,
5973
                                                         disk_index)),
5974
                              mode=disk["mode"])
5975
      disks.append(disk_dev)
5976
  else:
5977
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5978
  return disks
5979

    
5980

    
5981
def _GetInstanceInfoText(instance):
5982
  """Compute that text that should be added to the disk's metadata.
5983

5984
  """
5985
  return "originstname+%s" % instance.name
5986

    
5987

    
5988
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5989
  """Create all disks for an instance.
5990

5991
  This abstracts away some work from AddInstance.
5992

5993
  @type lu: L{LogicalUnit}
5994
  @param lu: the logical unit on whose behalf we execute
5995
  @type instance: L{objects.Instance}
5996
  @param instance: the instance whose disks we should create
5997
  @type to_skip: list
5998
  @param to_skip: list of indices to skip
5999
  @type target_node: string
6000
  @param target_node: if passed, overrides the target node for creation
6001
  @rtype: boolean
6002
  @return: the success of the creation
6003

6004
  """
6005
  info = _GetInstanceInfoText(instance)
6006
  if target_node is None:
6007
    pnode = instance.primary_node
6008
    all_nodes = instance.all_nodes
6009
  else:
6010
    pnode = target_node
6011
    all_nodes = [pnode]
6012

    
6013
  if instance.disk_template == constants.DT_FILE:
6014
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6015
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6016

    
6017
    result.Raise("Failed to create directory '%s' on"
6018
                 " node %s" % (file_storage_dir, pnode))
6019

    
6020
  # Note: this needs to be kept in sync with adding of disks in
6021
  # LUSetInstanceParams
6022
  for idx, device in enumerate(instance.disks):
6023
    if to_skip and idx in to_skip:
6024
      continue
6025
    logging.info("Creating volume %s for instance %s",
6026
                 device.iv_name, instance.name)
6027
    #HARDCODE
6028
    for node in all_nodes:
6029
      f_create = node == pnode
6030
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6031

    
6032

    
6033
def _RemoveDisks(lu, instance, target_node=None):
6034
  """Remove all disks for an instance.
6035

6036
  This abstracts away some work from `AddInstance()` and
6037
  `RemoveInstance()`. Note that in case some of the devices couldn't
6038
  be removed, the removal will continue with the other ones (compare
6039
  with `_CreateDisks()`).
6040

6041
  @type lu: L{LogicalUnit}
6042
  @param lu: the logical unit on whose behalf we execute
6043
  @type instance: L{objects.Instance}
6044
  @param instance: the instance whose disks we should remove
6045
  @type target_node: string
6046
  @param target_node: used to override the node on which to remove the disks
6047
  @rtype: boolean
6048
  @return: the success of the removal
6049

6050
  """
6051
  logging.info("Removing block devices for instance %s", instance.name)
6052

    
6053
  all_result = True
6054
  for device in instance.disks:
6055
    if target_node:
6056
      edata = [(target_node, device)]
6057
    else:
6058
      edata = device.ComputeNodeTree(instance.primary_node)
6059
    for node, disk in edata:
6060
      lu.cfg.SetDiskID(disk, node)
6061
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6062
      if msg:
6063
        lu.LogWarning("Could not remove block device %s on node %s,"
6064
                      " continuing anyway: %s", device.iv_name, node, msg)
6065
        all_result = False
6066

    
6067
  if instance.disk_template == constants.DT_FILE:
6068
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6069
    if target_node:
6070
      tgt = target_node
6071
    else:
6072
      tgt = instance.primary_node
6073
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6074
    if result.fail_msg:
6075
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6076
                    file_storage_dir, instance.primary_node, result.fail_msg)
6077
      all_result = False
6078

    
6079
  return all_result
6080

    
6081

    
6082
def _ComputeDiskSize(disk_template, disks):
6083
  """Compute disk size requirements in the volume group
6084

6085
  """
6086
  # Required free disk space as a function of disk and swap space
6087
  req_size_dict = {
6088
    constants.DT_DISKLESS: None,
6089
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6090
    # 128 MB are added for drbd metadata for each disk
6091
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6092
    constants.DT_FILE: None,
6093
  }
6094

    
6095
  if disk_template not in req_size_dict:
6096
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6097
                                 " is unknown" %  disk_template)
6098

    
6099
  return req_size_dict[disk_template]
6100

    
6101

    
6102
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6103
  """Hypervisor parameter validation.
6104

6105
  This function abstract the hypervisor parameter validation to be
6106
  used in both instance create and instance modify.
6107

6108
  @type lu: L{LogicalUnit}
6109
  @param lu: the logical unit for which we check
6110
  @type nodenames: list
6111
  @param nodenames: the list of nodes on which we should check
6112
  @type hvname: string
6113
  @param hvname: the name of the hypervisor we should use
6114
  @type hvparams: dict
6115
  @param hvparams: the parameters which we need to check
6116
  @raise errors.OpPrereqError: if the parameters are not valid
6117

6118
  """
6119
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6120
                                                  hvname,
6121
                                                  hvparams)
6122
  for node in nodenames:
6123
    info = hvinfo[node]
6124
    if info.offline:
6125
      continue
6126
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6127

    
6128

    
6129
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6130
  """OS parameters validation.
6131

6132
  @type lu: L{LogicalUnit}
6133
  @param lu: the logical unit for which we check
6134
  @type required: boolean
6135
  @param required: whether the validation should fail if the OS is not
6136
      found
6137
  @type nodenames: list
6138
  @param nodenames: the list of nodes on which we should check
6139
  @type osname: string
6140
  @param osname: the name of the hypervisor we should use
6141
  @type osparams: dict
6142
  @param osparams: the parameters which we need to check
6143
  @raise errors.OpPrereqError: if the parameters are not valid
6144

6145
  """
6146
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6147
                                   [constants.OS_VALIDATE_PARAMETERS],
6148
                                   osparams)
6149
  for node, nres in result.items():
6150
    # we don't check for offline cases since this should be run only
6151
    # against the master node and/or an instance's nodes
6152
    nres.Raise("OS Parameters validation failed on node %s" % node)
6153
    if not nres.payload:
6154
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6155
                 osname, node)
6156

    
6157

    
6158
class LUCreateInstance(LogicalUnit):
6159
  """Create an instance.
6160

6161
  """
6162
  HPATH = "instance-add"
6163
  HTYPE = constants.HTYPE_INSTANCE
6164
  _OP_REQP = ["instance_name", "disks",
6165
              "mode", "start",
6166
              "wait_for_sync", "ip_check", "nics",
6167
              "hvparams", "beparams", "osparams"]
6168
  REQ_BGL = False
6169

    
6170
  def CheckArguments(self):
6171
    """Check arguments.
6172

6173
    """
6174
    # set optional parameters to none if they don't exist
6175
    for attr in ["pnode", "snode", "iallocator", "hypervisor",
6176
                 "disk_template", "identify_defaults"]:
6177
      if not hasattr(self.op, attr):
6178
        setattr(self.op, attr, None)
6179

    
6180
    # do not require name_check to ease forward/backward compatibility
6181
    # for tools
6182
    if not hasattr(self.op, "name_check"):
6183
      self.op.name_check = True
6184
    if not hasattr(self.op, "no_install"):
6185
      self.op.no_install = False
6186
    if self.op.no_install and self.op.start:
6187
      self.LogInfo("No-installation mode selected, disabling startup")
6188
      self.op.start = False
6189
    # validate/normalize the instance name
6190
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6191
    if self.op.ip_check and not self.op.name_check:
6192
      # TODO: make the ip check more flexible and not depend on the name check
6193
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6194
                                 errors.ECODE_INVAL)
6195

    
6196
    # check nics' parameter names
6197
    for nic in self.op.nics:
6198
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6199

    
6200
    # check disks. parameter names and consistent adopt/no-adopt strategy
6201
    has_adopt = has_no_adopt = False
6202
    for disk in self.op.disks:
6203
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6204
      if "adopt" in disk:
6205
        has_adopt = True
6206
      else:
6207
        has_no_adopt = True
6208
    if has_adopt and has_no_adopt:
6209
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6210
                                 errors.ECODE_INVAL)
6211
    if has_adopt:
6212
      if self.op.disk_template != constants.DT_PLAIN:
6213
        raise errors.OpPrereqError("Disk adoption is only supported for the"
6214
                                   " 'plain' disk template",
6215
                                   errors.ECODE_INVAL)
6216
      if self.op.iallocator is not None:
6217
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6218
                                   " iallocator script", errors.ECODE_INVAL)
6219
      if self.op.mode == constants.INSTANCE_IMPORT:
6220
        raise errors.OpPrereqError("Disk adoption not allowed for"
6221
                                   " instance import", errors.ECODE_INVAL)
6222

    
6223
    self.adopt_disks = has_adopt
6224

    
6225
    # verify creation mode
6226
    if self.op.mode not in constants.INSTANCE_CREATE_MODES:
6227
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6228
                                 self.op.mode, errors.ECODE_INVAL)
6229

    
6230
    # instance name verification
6231
    if self.op.name_check:
6232
      self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6233
      self.op.instance_name = self.hostname1.name
6234
      # used in CheckPrereq for ip ping check
6235
      self.check_ip = self.hostname1.ip
6236
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6237
      raise errors.OpPrereqError("Remote imports require names to be checked" %
6238
                                 errors.ECODE_INVAL)
6239
    else:
6240
      self.check_ip = None
6241

    
6242
    # file storage checks
6243
    if (self.op.file_driver and
6244
        not self.op.file_driver in constants.FILE_DRIVER):
6245
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6246
                                 self.op.file_driver, errors.ECODE_INVAL)
6247

    
6248
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6249
      raise errors.OpPrereqError("File storage directory path not absolute",
6250
                                 errors.ECODE_INVAL)
6251

    
6252
    ### Node/iallocator related checks
6253
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6254
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6255
                                 " node must be given",
6256
                                 errors.ECODE_INVAL)
6257

    
6258
    self._cds = _GetClusterDomainSecret()
6259

    
6260
    if self.op.mode == constants.INSTANCE_IMPORT:
6261
      # On import force_variant must be True, because if we forced it at
6262
      # initial install, our only chance when importing it back is that it
6263
      # works again!
6264
      self.op.force_variant = True
6265

    
6266
      if self.op.no_install:
6267
        self.LogInfo("No-installation mode has no effect during import")
6268

    
6269
    elif self.op.mode == constants.INSTANCE_CREATE:
6270
      if getattr(self.op, "os_type", None) is None:
6271
        raise errors.OpPrereqError("No guest OS specified",
6272
                                   errors.ECODE_INVAL)
6273
      self.op.force_variant = getattr(self.op, "force_variant", False)
6274
      if self.op.disk_template is None:
6275
        raise errors.OpPrereqError("No disk template specified",
6276
                                   errors.ECODE_INVAL)
6277

    
6278
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6279
      # Check handshake to ensure both clusters have the same domain secret
6280
      src_handshake = getattr(self.op, "source_handshake", None)
6281
      if not src_handshake:
6282
        raise errors.OpPrereqError("Missing source handshake",
6283
                                   errors.ECODE_INVAL)
6284

    
6285
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6286
                                                           src_handshake)
6287
      if errmsg:
6288
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6289
                                   errors.ECODE_INVAL)
6290

    
6291
      # Load and check source CA
6292
      self.source_x509_ca_pem = getattr(self.op, "source_x509_ca", None)
6293
      if not self.source_x509_ca_pem:
6294
        raise errors.OpPrereqError("Missing source X509 CA",
6295
                                   errors.ECODE_INVAL)
6296

    
6297
      try:
6298
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6299
                                                    self._cds)
6300
      except OpenSSL.crypto.Error, err:
6301
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6302
                                   (err, ), errors.ECODE_INVAL)
6303

    
6304
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6305
      if errcode is not None:
6306
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6307
                                   errors.ECODE_INVAL)
6308

    
6309
      self.source_x509_ca = cert
6310

    
6311
      src_instance_name = getattr(self.op, "source_instance_name", None)
6312
      if not src_instance_name:
6313
        raise errors.OpPrereqError("Missing source instance name",
6314
                                   errors.ECODE_INVAL)
6315

    
6316
      self.source_instance_name = \
6317
        utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6318

    
6319
    else:
6320
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6321
                                 self.op.mode, errors.ECODE_INVAL)
6322

    
6323
  def ExpandNames(self):
6324
    """ExpandNames for CreateInstance.
6325

6326
    Figure out the right locks for instance creation.
6327

6328
    """
6329
    self.needed_locks = {}
6330

    
6331
    instance_name = self.op.instance_name
6332
    # this is just a preventive check, but someone might still add this
6333
    # instance in the meantime, and creation will fail at lock-add time
6334
    if instance_name in self.cfg.GetInstanceList():
6335
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6336
                                 instance_name, errors.ECODE_EXISTS)
6337

    
6338
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6339

    
6340
    if self.op.iallocator:
6341
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6342
    else:
6343
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6344
      nodelist = [self.op.pnode]
6345
      if self.op.snode is not None:
6346
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6347
        nodelist.append(self.op.snode)
6348
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6349

    
6350
    # in case of import lock the source node too
6351
    if self.op.mode == constants.INSTANCE_IMPORT:
6352
      src_node = getattr(self.op, "src_node", None)
6353
      src_path = getattr(self.op, "src_path", None)
6354

    
6355
      if src_path is None:
6356
        self.op.src_path = src_path = self.op.instance_name
6357

    
6358
      if src_node is None:
6359
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6360
        self.op.src_node = None
6361
        if os.path.isabs(src_path):
6362
          raise errors.OpPrereqError("Importing an instance from an absolute"
6363
                                     " path requires a source node option.",
6364
                                     errors.ECODE_INVAL)
6365
      else:
6366
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6367
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6368
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6369
        if not os.path.isabs(src_path):
6370
          self.op.src_path = src_path = \
6371
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6372

    
6373
  def _RunAllocator(self):
6374
    """Run the allocator based on input opcode.
6375

6376
    """
6377
    nics = [n.ToDict() for n in self.nics]
6378
    ial = IAllocator(self.cfg, self.rpc,
6379
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6380
                     name=self.op.instance_name,
6381
                     disk_template=self.op.disk_template,
6382
                     tags=[],
6383
                     os=self.op.os_type,
6384
                     vcpus=self.be_full[constants.BE_VCPUS],
6385
                     mem_size=self.be_full[constants.BE_MEMORY],
6386
                     disks=self.disks,
6387
                     nics=nics,
6388
                     hypervisor=self.op.hypervisor,
6389
                     )
6390

    
6391
    ial.Run(self.op.iallocator)
6392

    
6393
    if not ial.success:
6394
      raise errors.OpPrereqError("Can't compute nodes using"
6395
                                 " iallocator '%s': %s" %
6396
                                 (self.op.iallocator, ial.info),
6397
                                 errors.ECODE_NORES)
6398
    if len(ial.result) != ial.required_nodes:
6399
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6400
                                 " of nodes (%s), required %s" %
6401
                                 (self.op.iallocator, len(ial.result),
6402
                                  ial.required_nodes), errors.ECODE_FAULT)
6403
    self.op.pnode = ial.result[0]
6404
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6405
                 self.op.instance_name, self.op.iallocator,
6406
                 utils.CommaJoin(ial.result))
6407
    if ial.required_nodes == 2:
6408
      self.op.snode = ial.result[1]
6409

    
6410
  def BuildHooksEnv(self):
6411
    """Build hooks env.
6412

6413
    This runs on master, primary and secondary nodes of the instance.
6414

6415
    """
6416
    env = {
6417
      "ADD_MODE": self.op.mode,
6418
      }
6419
    if self.op.mode == constants.INSTANCE_IMPORT:
6420
      env["SRC_NODE"] = self.op.src_node
6421
      env["SRC_PATH"] = self.op.src_path
6422
      env["SRC_IMAGES"] = self.src_images
6423

    
6424
    env.update(_BuildInstanceHookEnv(
6425
      name=self.op.instance_name,
6426
      primary_node=self.op.pnode,
6427
      secondary_nodes=self.secondaries,
6428
      status=self.op.start,
6429
      os_type=self.op.os_type,
6430
      memory=self.be_full[constants.BE_MEMORY],
6431
      vcpus=self.be_full[constants.BE_VCPUS],
6432
      nics=_NICListToTuple(self, self.nics),
6433
      disk_template=self.op.disk_template,
6434
      disks=[(d["size"], d["mode"]) for d in self.disks],
6435
      bep=self.be_full,
6436
      hvp=self.hv_full,
6437
      hypervisor_name=self.op.hypervisor,
6438
    ))
6439

    
6440
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6441
          self.secondaries)
6442
    return env, nl, nl
6443

    
6444
  def _ReadExportInfo(self):
6445
    """Reads the export information from disk.
6446

6447
    It will override the opcode source node and path with the actual
6448
    information, if these two were not specified before.
6449

6450
    @return: the export information
6451

6452
    """
6453
    assert self.op.mode == constants.INSTANCE_IMPORT
6454

    
6455
    src_node = self.op.src_node
6456
    src_path = self.op.src_path
6457

    
6458
    if src_node is None:
6459
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6460
      exp_list = self.rpc.call_export_list(locked_nodes)
6461
      found = False
6462
      for node in exp_list:
6463
        if exp_list[node].fail_msg:
6464
          continue
6465
        if src_path in exp_list[node].payload:
6466
          found = True
6467
          self.op.src_node = src_node = node
6468
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6469
                                                       src_path)
6470
          break
6471
      if not found:
6472
        raise errors.OpPrereqError("No export found for relative path %s" %
6473
                                    src_path, errors.ECODE_INVAL)
6474

    
6475
    _CheckNodeOnline(self, src_node)
6476
    result = self.rpc.call_export_info(src_node, src_path)
6477
    result.Raise("No export or invalid export found in dir %s" % src_path)
6478

    
6479
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6480
    if not export_info.has_section(constants.INISECT_EXP):
6481
      raise errors.ProgrammerError("Corrupted export config",
6482
                                   errors.ECODE_ENVIRON)
6483

    
6484
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6485
    if (int(ei_version) != constants.EXPORT_VERSION):
6486
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6487
                                 (ei_version, constants.EXPORT_VERSION),
6488
                                 errors.ECODE_ENVIRON)
6489
    return export_info
6490

    
6491
  def _ReadExportParams(self, einfo):
6492
    """Use export parameters as defaults.
6493

6494
    In case the opcode doesn't specify (as in override) some instance
6495
    parameters, then try to use them from the export information, if
6496
    that declares them.
6497

6498
    """
6499
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6500

    
6501
    if self.op.disk_template is None:
6502
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6503
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6504
                                          "disk_template")
6505
      else:
6506
        raise errors.OpPrereqError("No disk template specified and the export"
6507
                                   " is missing the disk_template information",
6508
                                   errors.ECODE_INVAL)
6509

    
6510
    if not self.op.disks:
6511
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6512
        disks = []
6513
        # TODO: import the disk iv_name too
6514
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6515
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6516
          disks.append({"size": disk_sz})
6517
        self.op.disks = disks
6518
      else:
6519
        raise errors.OpPrereqError("No disk info specified and the export"
6520
                                   " is missing the disk information",
6521
                                   errors.ECODE_INVAL)
6522

    
6523
    if (not self.op.nics and
6524
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6525
      nics = []
6526
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6527
        ndict = {}
6528
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6529
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6530
          ndict[name] = v
6531
        nics.append(ndict)
6532
      self.op.nics = nics
6533

    
6534
    if (self.op.hypervisor is None and
6535
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6536
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6537
    if einfo.has_section(constants.INISECT_HYP):
6538
      # use the export parameters but do not override the ones
6539
      # specified by the user
6540
      for name, value in einfo.items(constants.INISECT_HYP):
6541
        if name not in self.op.hvparams:
6542
          self.op.hvparams[name] = value
6543

    
6544
    if einfo.has_section(constants.INISECT_BEP):
6545
      # use the parameters, without overriding
6546
      for name, value in einfo.items(constants.INISECT_BEP):
6547
        if name not in self.op.beparams:
6548
          self.op.beparams[name] = value
6549
    else:
6550
      # try to read the parameters old style, from the main section
6551
      for name in constants.BES_PARAMETERS:
6552
        if (name not in self.op.beparams and
6553
            einfo.has_option(constants.INISECT_INS, name)):
6554
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6555

    
6556
  def _RevertToDefaults(self, cluster):
6557
    """Revert the instance parameters to the default values.
6558

6559
    """
6560
    # hvparams
6561
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6562
    for name in self.op.hvparams.keys():
6563
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6564
        del self.op.hvparams[name]
6565
    # beparams
6566
    be_defs = cluster.SimpleFillBE({})
6567
    for name in self.op.beparams.keys():
6568
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6569
        del self.op.beparams[name]
6570
    # nic params
6571
    nic_defs = cluster.SimpleFillNIC({})
6572
    for nic in self.op.nics:
6573
      for name in constants.NICS_PARAMETERS:
6574
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6575
          del nic[name]
6576

    
6577
  def CheckPrereq(self):
6578
    """Check prerequisites.
6579

6580
    """
6581
    if self.op.mode == constants.INSTANCE_IMPORT:
6582
      export_info = self._ReadExportInfo()
6583
      self._ReadExportParams(export_info)
6584

    
6585
    _CheckDiskTemplate(self.op.disk_template)
6586

    
6587
    if (not self.cfg.GetVGName() and
6588
        self.op.disk_template not in constants.DTS_NOT_LVM):
6589
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6590
                                 " instances", errors.ECODE_STATE)
6591

    
6592
    if self.op.hypervisor is None:
6593
      self.op.hypervisor = self.cfg.GetHypervisorType()
6594

    
6595
    cluster = self.cfg.GetClusterInfo()
6596
    enabled_hvs = cluster.enabled_hypervisors
6597
    if self.op.hypervisor not in enabled_hvs:
6598
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6599
                                 " cluster (%s)" % (self.op.hypervisor,
6600
                                  ",".join(enabled_hvs)),
6601
                                 errors.ECODE_STATE)
6602

    
6603
    # check hypervisor parameter syntax (locally)
6604
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6605
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6606
                                      self.op.hvparams)
6607
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6608
    hv_type.CheckParameterSyntax(filled_hvp)
6609
    self.hv_full = filled_hvp
6610
    # check that we don't specify global parameters on an instance
6611
    _CheckGlobalHvParams(self.op.hvparams)
6612

    
6613
    # fill and remember the beparams dict
6614
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6615
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
6616

    
6617
    # build os parameters
6618
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6619

    
6620
    # now that hvp/bep are in final format, let's reset to defaults,
6621
    # if told to do so
6622
    if self.op.identify_defaults:
6623
      self._RevertToDefaults(cluster)
6624

    
6625
    # NIC buildup
6626
    self.nics = []
6627
    for idx, nic in enumerate(self.op.nics):
6628
      nic_mode_req = nic.get("mode", None)
6629
      nic_mode = nic_mode_req
6630
      if nic_mode is None:
6631
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6632

    
6633
      # in routed mode, for the first nic, the default ip is 'auto'
6634
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6635
        default_ip_mode = constants.VALUE_AUTO
6636
      else:
6637
        default_ip_mode = constants.VALUE_NONE
6638

    
6639
      # ip validity checks
6640
      ip = nic.get("ip", default_ip_mode)
6641
      if ip is None or ip.lower() == constants.VALUE_NONE:
6642
        nic_ip = None
6643
      elif ip.lower() == constants.VALUE_AUTO:
6644
        if not self.op.name_check:
6645
          raise errors.OpPrereqError("IP address set to auto but name checks"
6646
                                     " have been skipped. Aborting.",
6647
                                     errors.ECODE_INVAL)
6648
        nic_ip = self.hostname1.ip
6649
      else:
6650
        if not utils.IsValidIP(ip):
6651
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6652
                                     " like a valid IP" % ip,
6653
                                     errors.ECODE_INVAL)
6654
        nic_ip = ip
6655

    
6656
      # TODO: check the ip address for uniqueness
6657
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6658
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6659
                                   errors.ECODE_INVAL)
6660

    
6661
      # MAC address verification
6662
      mac = nic.get("mac", constants.VALUE_AUTO)
6663
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6664
        mac = utils.NormalizeAndValidateMac(mac)
6665

    
6666
        try:
6667
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6668
        except errors.ReservationError:
6669
          raise errors.OpPrereqError("MAC address %s already in use"
6670
                                     " in cluster" % mac,
6671
                                     errors.ECODE_NOTUNIQUE)
6672

    
6673
      # bridge verification
6674
      bridge = nic.get("bridge", None)
6675
      link = nic.get("link", None)
6676
      if bridge and link:
6677
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6678
                                   " at the same time", errors.ECODE_INVAL)
6679
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6680
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6681
                                   errors.ECODE_INVAL)
6682
      elif bridge:
6683
        link = bridge
6684

    
6685
      nicparams = {}
6686
      if nic_mode_req:
6687
        nicparams[constants.NIC_MODE] = nic_mode_req
6688
      if link:
6689
        nicparams[constants.NIC_LINK] = link
6690

    
6691
      check_params = cluster.SimpleFillNIC(nicparams)
6692
      objects.NIC.CheckParameterSyntax(check_params)
6693
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6694

    
6695
    # disk checks/pre-build
6696
    self.disks = []
6697
    for disk in self.op.disks:
6698
      mode = disk.get("mode", constants.DISK_RDWR)
6699
      if mode not in constants.DISK_ACCESS_SET:
6700
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6701
                                   mode, errors.ECODE_INVAL)
6702
      size = disk.get("size", None)
6703
      if size is None:
6704
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6705
      try:
6706
        size = int(size)
6707
      except (TypeError, ValueError):
6708
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6709
                                   errors.ECODE_INVAL)
6710
      new_disk = {"size": size, "mode": mode}
6711
      if "adopt" in disk:
6712
        new_disk["adopt"] = disk["adopt"]
6713
      self.disks.append(new_disk)
6714

    
6715
    if self.op.mode == constants.INSTANCE_IMPORT:
6716

    
6717
      # Check that the new instance doesn't have less disks than the export
6718
      instance_disks = len(self.disks)
6719
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6720
      if instance_disks < export_disks:
6721
        raise errors.OpPrereqError("Not enough disks to import."
6722
                                   " (instance: %d, export: %d)" %
6723
                                   (instance_disks, export_disks),
6724
                                   errors.ECODE_INVAL)
6725

    
6726
      disk_images = []
6727
      for idx in range(export_disks):
6728
        option = 'disk%d_dump' % idx
6729
        if export_info.has_option(constants.INISECT_INS, option):
6730
          # FIXME: are the old os-es, disk sizes, etc. useful?
6731
          export_name = export_info.get(constants.INISECT_INS, option)
6732
          image = utils.PathJoin(self.op.src_path, export_name)
6733
          disk_images.append(image)
6734
        else:
6735
          disk_images.append(False)
6736

    
6737
      self.src_images = disk_images
6738

    
6739
      old_name = export_info.get(constants.INISECT_INS, 'name')
6740
      try:
6741
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6742
      except (TypeError, ValueError), err:
6743
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
6744
                                   " an integer: %s" % str(err),
6745
                                   errors.ECODE_STATE)
6746
      if self.op.instance_name == old_name:
6747
        for idx, nic in enumerate(self.nics):
6748
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6749
            nic_mac_ini = 'nic%d_mac' % idx
6750
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6751

    
6752
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6753

    
6754
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6755
    if self.op.ip_check:
6756
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6757
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6758
                                   (self.check_ip, self.op.instance_name),
6759
                                   errors.ECODE_NOTUNIQUE)
6760

    
6761
    #### mac address generation
6762
    # By generating here the mac address both the allocator and the hooks get
6763
    # the real final mac address rather than the 'auto' or 'generate' value.
6764
    # There is a race condition between the generation and the instance object
6765
    # creation, which means that we know the mac is valid now, but we're not
6766
    # sure it will be when we actually add the instance. If things go bad
6767
    # adding the instance will abort because of a duplicate mac, and the
6768
    # creation job will fail.
6769
    for nic in self.nics:
6770
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6771
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6772

    
6773
    #### allocator run
6774

    
6775
    if self.op.iallocator is not None:
6776
      self._RunAllocator()
6777

    
6778
    #### node related checks
6779

    
6780
    # check primary node
6781
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6782
    assert self.pnode is not None, \
6783
      "Cannot retrieve locked node %s" % self.op.pnode
6784
    if pnode.offline:
6785
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6786
                                 pnode.name, errors.ECODE_STATE)
6787
    if pnode.drained:
6788
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6789
                                 pnode.name, errors.ECODE_STATE)
6790

    
6791
    self.secondaries = []
6792

    
6793
    # mirror node verification
6794
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6795
      if self.op.snode is None:
6796
        raise errors.OpPrereqError("The networked disk templates need"
6797
                                   " a mirror node", errors.ECODE_INVAL)
6798
      if self.op.snode == pnode.name:
6799
        raise errors.OpPrereqError("The secondary node cannot be the"
6800
                                   " primary node.", errors.ECODE_INVAL)
6801
      _CheckNodeOnline(self, self.op.snode)
6802
      _CheckNodeNotDrained(self, self.op.snode)
6803
      self.secondaries.append(self.op.snode)
6804

    
6805
    nodenames = [pnode.name] + self.secondaries
6806

    
6807
    req_size = _ComputeDiskSize(self.op.disk_template,
6808
                                self.disks)
6809

    
6810
    # Check lv size requirements, if not adopting
6811
    if req_size is not None and not self.adopt_disks:
6812
      _CheckNodesFreeDisk(self, nodenames, req_size)
6813

    
6814
    if self.adopt_disks: # instead, we must check the adoption data
6815
      all_lvs = set([i["adopt"] for i in self.disks])
6816
      if len(all_lvs) != len(self.disks):
6817
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6818
                                   errors.ECODE_INVAL)
6819
      for lv_name in all_lvs:
6820
        try:
6821
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6822
        except errors.ReservationError:
6823
          raise errors.OpPrereqError("LV named %s used by another instance" %
6824
                                     lv_name, errors.ECODE_NOTUNIQUE)
6825

    
6826
      node_lvs = self.rpc.call_lv_list([pnode.name],
6827
                                       self.cfg.GetVGName())[pnode.name]
6828
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6829
      node_lvs = node_lvs.payload
6830
      delta = all_lvs.difference(node_lvs.keys())
6831
      if delta:
6832
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6833
                                   utils.CommaJoin(delta),
6834
                                   errors.ECODE_INVAL)
6835
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6836
      if online_lvs:
6837
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6838
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6839
                                   errors.ECODE_STATE)
6840
      # update the size of disk based on what is found
6841
      for dsk in self.disks:
6842
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6843

    
6844
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6845

    
6846
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6847
    # check OS parameters (remotely)
6848
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
6849

    
6850
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6851

    
6852
    # memory check on primary node
6853
    if self.op.start:
6854
      _CheckNodeFreeMemory(self, self.pnode.name,
6855
                           "creating instance %s" % self.op.instance_name,
6856
                           self.be_full[constants.BE_MEMORY],
6857
                           self.op.hypervisor)
6858

    
6859
    self.dry_run_result = list(nodenames)
6860

    
6861
  def Exec(self, feedback_fn):
6862
    """Create and add the instance to the cluster.
6863

6864
    """
6865
    instance = self.op.instance_name
6866
    pnode_name = self.pnode.name
6867

    
6868
    ht_kind = self.op.hypervisor
6869
    if ht_kind in constants.HTS_REQ_PORT:
6870
      network_port = self.cfg.AllocatePort()
6871
    else:
6872
      network_port = None
6873

    
6874
    if constants.ENABLE_FILE_STORAGE:
6875
      # this is needed because os.path.join does not accept None arguments
6876
      if self.op.file_storage_dir is None:
6877
        string_file_storage_dir = ""
6878
      else:
6879
        string_file_storage_dir = self.op.file_storage_dir
6880

    
6881
      # build the full file storage dir path
6882
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6883
                                        string_file_storage_dir, instance)
6884
    else:
6885
      file_storage_dir = ""
6886

    
6887
    disks = _GenerateDiskTemplate(self,
6888
                                  self.op.disk_template,
6889
                                  instance, pnode_name,
6890
                                  self.secondaries,
6891
                                  self.disks,
6892
                                  file_storage_dir,
6893
                                  self.op.file_driver,
6894
                                  0)
6895

    
6896
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6897
                            primary_node=pnode_name,
6898
                            nics=self.nics, disks=disks,
6899
                            disk_template=self.op.disk_template,
6900
                            admin_up=False,
6901
                            network_port=network_port,
6902
                            beparams=self.op.beparams,
6903
                            hvparams=self.op.hvparams,
6904
                            hypervisor=self.op.hypervisor,
6905
                            osparams=self.op.osparams,
6906
                            )
6907

    
6908
    if self.adopt_disks:
6909
      # rename LVs to the newly-generated names; we need to construct
6910
      # 'fake' LV disks with the old data, plus the new unique_id
6911
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6912
      rename_to = []
6913
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6914
        rename_to.append(t_dsk.logical_id)
6915
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6916
        self.cfg.SetDiskID(t_dsk, pnode_name)
6917
      result = self.rpc.call_blockdev_rename(pnode_name,
6918
                                             zip(tmp_disks, rename_to))
6919
      result.Raise("Failed to rename adoped LVs")
6920
    else:
6921
      feedback_fn("* creating instance disks...")
6922
      try:
6923
        _CreateDisks(self, iobj)
6924
      except errors.OpExecError:
6925
        self.LogWarning("Device creation failed, reverting...")
6926
        try:
6927
          _RemoveDisks(self, iobj)
6928
        finally:
6929
          self.cfg.ReleaseDRBDMinors(instance)
6930
          raise
6931

    
6932
    feedback_fn("adding instance %s to cluster config" % instance)
6933

    
6934
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6935

    
6936
    # Declare that we don't want to remove the instance lock anymore, as we've
6937
    # added the instance to the config
6938
    del self.remove_locks[locking.LEVEL_INSTANCE]
6939
    # Unlock all the nodes
6940
    if self.op.mode == constants.INSTANCE_IMPORT:
6941
      nodes_keep = [self.op.src_node]
6942
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6943
                       if node != self.op.src_node]
6944
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6945
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6946
    else:
6947
      self.context.glm.release(locking.LEVEL_NODE)
6948
      del self.acquired_locks[locking.LEVEL_NODE]
6949

    
6950
    if self.op.wait_for_sync:
6951
      disk_abort = not _WaitForSync(self, iobj)
6952
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6953
      # make sure the disks are not degraded (still sync-ing is ok)
6954
      time.sleep(15)
6955
      feedback_fn("* checking mirrors status")
6956
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6957
    else:
6958
      disk_abort = False
6959

    
6960
    if disk_abort:
6961
      _RemoveDisks(self, iobj)
6962
      self.cfg.RemoveInstance(iobj.name)
6963
      # Make sure the instance lock gets removed
6964
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6965
      raise errors.OpExecError("There are some degraded disks for"
6966
                               " this instance")
6967

    
6968
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6969
      if self.op.mode == constants.INSTANCE_CREATE:
6970
        if not self.op.no_install:
6971
          feedback_fn("* running the instance OS create scripts...")
6972
          # FIXME: pass debug option from opcode to backend
6973
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6974
                                                 self.op.debug_level)
6975
          result.Raise("Could not add os for instance %s"
6976
                       " on node %s" % (instance, pnode_name))
6977

    
6978
      elif self.op.mode == constants.INSTANCE_IMPORT:
6979
        feedback_fn("* running the instance OS import scripts...")
6980

    
6981
        transfers = []
6982

    
6983
        for idx, image in enumerate(self.src_images):
6984
          if not image:
6985
            continue
6986

    
6987
          # FIXME: pass debug option from opcode to backend
6988
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
6989
                                             constants.IEIO_FILE, (image, ),
6990
                                             constants.IEIO_SCRIPT,
6991
                                             (iobj.disks[idx], idx),
6992
                                             None)
6993
          transfers.append(dt)
6994

    
6995
        import_result = \
6996
          masterd.instance.TransferInstanceData(self, feedback_fn,
6997
                                                self.op.src_node, pnode_name,
6998
                                                self.pnode.secondary_ip,
6999
                                                iobj, transfers)
7000
        if not compat.all(import_result):
7001
          self.LogWarning("Some disks for instance %s on node %s were not"
7002
                          " imported successfully" % (instance, pnode_name))
7003

    
7004
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7005
        feedback_fn("* preparing remote import...")
7006
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
7007
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7008

    
7009
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7010
                                                     self.source_x509_ca,
7011
                                                     self._cds, timeouts)
7012
        if not compat.all(disk_results):
7013
          # TODO: Should the instance still be started, even if some disks
7014
          # failed to import (valid for local imports, too)?
7015
          self.LogWarning("Some disks for instance %s on node %s were not"
7016
                          " imported successfully" % (instance, pnode_name))
7017

    
7018
        # Run rename script on newly imported instance
7019
        assert iobj.name == instance
7020
        feedback_fn("Running rename script for %s" % instance)
7021
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7022
                                                   self.source_instance_name,
7023
                                                   self.op.debug_level)
7024
        if result.fail_msg:
7025
          self.LogWarning("Failed to run rename script for %s on node"
7026
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7027

    
7028
      else:
7029
        # also checked in the prereq part
7030
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7031
                                     % self.op.mode)
7032

    
7033
    if self.op.start:
7034
      iobj.admin_up = True
7035
      self.cfg.Update(iobj, feedback_fn)
7036
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7037
      feedback_fn("* starting instance...")
7038
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7039
      result.Raise("Could not start instance")
7040

    
7041
    return list(iobj.all_nodes)
7042

    
7043

    
7044
class LUConnectConsole(NoHooksLU):
7045
  """Connect to an instance's console.
7046

7047
  This is somewhat special in that it returns the command line that
7048
  you need to run on the master node in order to connect to the
7049
  console.
7050

7051
  """
7052
  _OP_REQP = ["instance_name"]
7053
  REQ_BGL = False
7054

    
7055
  def ExpandNames(self):
7056
    self._ExpandAndLockInstance()
7057

    
7058
  def CheckPrereq(self):
7059
    """Check prerequisites.
7060

7061
    This checks that the instance is in the cluster.
7062

7063
    """
7064
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7065
    assert self.instance is not None, \
7066
      "Cannot retrieve locked instance %s" % self.op.instance_name
7067
    _CheckNodeOnline(self, self.instance.primary_node)
7068

    
7069
  def Exec(self, feedback_fn):
7070
    """Connect to the console of an instance
7071

7072
    """
7073
    instance = self.instance
7074
    node = instance.primary_node
7075

    
7076
    node_insts = self.rpc.call_instance_list([node],
7077
                                             [instance.hypervisor])[node]
7078
    node_insts.Raise("Can't get node information from %s" % node)
7079

    
7080
    if instance.name not in node_insts.payload:
7081
      raise errors.OpExecError("Instance %s is not running." % instance.name)
7082

    
7083
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7084

    
7085
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7086
    cluster = self.cfg.GetClusterInfo()
7087
    # beparams and hvparams are passed separately, to avoid editing the
7088
    # instance and then saving the defaults in the instance itself.
7089
    hvparams = cluster.FillHV(instance)
7090
    beparams = cluster.FillBE(instance)
7091
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7092

    
7093
    # build ssh cmdline
7094
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7095

    
7096

    
7097
class LUReplaceDisks(LogicalUnit):
7098
  """Replace the disks of an instance.
7099

7100
  """
7101
  HPATH = "mirrors-replace"
7102
  HTYPE = constants.HTYPE_INSTANCE
7103
  _OP_REQP = ["instance_name", "mode", "disks"]
7104
  REQ_BGL = False
7105

    
7106
  def CheckArguments(self):
7107
    if not hasattr(self.op, "remote_node"):
7108
      self.op.remote_node = None
7109
    if not hasattr(self.op, "iallocator"):
7110
      self.op.iallocator = None
7111
    if not hasattr(self.op, "early_release"):
7112
      self.op.early_release = False
7113

    
7114
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7115
                                  self.op.iallocator)
7116

    
7117
  def ExpandNames(self):
7118
    self._ExpandAndLockInstance()
7119

    
7120
    if self.op.iallocator is not None:
7121
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7122

    
7123
    elif self.op.remote_node is not None:
7124
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7125
      self.op.remote_node = remote_node
7126

    
7127
      # Warning: do not remove the locking of the new secondary here
7128
      # unless DRBD8.AddChildren is changed to work in parallel;
7129
      # currently it doesn't since parallel invocations of
7130
      # FindUnusedMinor will conflict
7131
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7132
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7133

    
7134
    else:
7135
      self.needed_locks[locking.LEVEL_NODE] = []
7136
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7137

    
7138
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7139
                                   self.op.iallocator, self.op.remote_node,
7140
                                   self.op.disks, False, self.op.early_release)
7141

    
7142
    self.tasklets = [self.replacer]
7143

    
7144
  def DeclareLocks(self, level):
7145
    # If we're not already locking all nodes in the set we have to declare the
7146
    # instance's primary/secondary nodes.
7147
    if (level == locking.LEVEL_NODE and
7148
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7149
      self._LockInstancesNodes()
7150

    
7151
  def BuildHooksEnv(self):
7152
    """Build hooks env.
7153

7154
    This runs on the master, the primary and all the secondaries.
7155

7156
    """
7157
    instance = self.replacer.instance
7158
    env = {
7159
      "MODE": self.op.mode,
7160
      "NEW_SECONDARY": self.op.remote_node,
7161
      "OLD_SECONDARY": instance.secondary_nodes[0],
7162
      }
7163
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7164
    nl = [
7165
      self.cfg.GetMasterNode(),
7166
      instance.primary_node,
7167
      ]
7168
    if self.op.remote_node is not None:
7169
      nl.append(self.op.remote_node)
7170
    return env, nl, nl
7171

    
7172

    
7173
class LUEvacuateNode(LogicalUnit):
7174
  """Relocate the secondary instances from a node.
7175

7176
  """
7177
  HPATH = "node-evacuate"
7178
  HTYPE = constants.HTYPE_NODE
7179
  _OP_REQP = ["node_name"]
7180
  REQ_BGL = False
7181

    
7182
  def CheckArguments(self):
7183
    if not hasattr(self.op, "remote_node"):
7184
      self.op.remote_node = None
7185
    if not hasattr(self.op, "iallocator"):
7186
      self.op.iallocator = None
7187
    if not hasattr(self.op, "early_release"):
7188
      self.op.early_release = False
7189

    
7190
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
7191
                                  self.op.remote_node,
7192
                                  self.op.iallocator)
7193

    
7194
  def ExpandNames(self):
7195
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7196

    
7197
    self.needed_locks = {}
7198

    
7199
    # Declare node locks
7200
    if self.op.iallocator is not None:
7201
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7202

    
7203
    elif self.op.remote_node is not None:
7204
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7205

    
7206
      # Warning: do not remove the locking of the new secondary here
7207
      # unless DRBD8.AddChildren is changed to work in parallel;
7208
      # currently it doesn't since parallel invocations of
7209
      # FindUnusedMinor will conflict
7210
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7211
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7212

    
7213
    else:
7214
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7215

    
7216
    # Create tasklets for replacing disks for all secondary instances on this
7217
    # node
7218
    names = []
7219
    tasklets = []
7220

    
7221
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7222
      logging.debug("Replacing disks for instance %s", inst.name)
7223
      names.append(inst.name)
7224

    
7225
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7226
                                self.op.iallocator, self.op.remote_node, [],
7227
                                True, self.op.early_release)
7228
      tasklets.append(replacer)
7229

    
7230
    self.tasklets = tasklets
7231
    self.instance_names = names
7232

    
7233
    # Declare instance locks
7234
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7235

    
7236
  def DeclareLocks(self, level):
7237
    # If we're not already locking all nodes in the set we have to declare the
7238
    # instance's primary/secondary nodes.
7239
    if (level == locking.LEVEL_NODE and
7240
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7241
      self._LockInstancesNodes()
7242

    
7243
  def BuildHooksEnv(self):
7244
    """Build hooks env.
7245

7246
    This runs on the master, the primary and all the secondaries.
7247

7248
    """
7249
    env = {
7250
      "NODE_NAME": self.op.node_name,
7251
      }
7252

    
7253
    nl = [self.cfg.GetMasterNode()]
7254

    
7255
    if self.op.remote_node is not None:
7256
      env["NEW_SECONDARY"] = self.op.remote_node
7257
      nl.append(self.op.remote_node)
7258

    
7259
    return (env, nl, nl)
7260

    
7261

    
7262
class TLReplaceDisks(Tasklet):
7263
  """Replaces disks for an instance.
7264

7265
  Note: Locking is not within the scope of this class.
7266

7267
  """
7268
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7269
               disks, delay_iallocator, early_release):
7270
    """Initializes this class.
7271

7272
    """
7273
    Tasklet.__init__(self, lu)
7274

    
7275
    # Parameters
7276
    self.instance_name = instance_name
7277
    self.mode = mode
7278
    self.iallocator_name = iallocator_name
7279
    self.remote_node = remote_node
7280
    self.disks = disks
7281
    self.delay_iallocator = delay_iallocator
7282
    self.early_release = early_release
7283

    
7284
    # Runtime data
7285
    self.instance = None
7286
    self.new_node = None
7287
    self.target_node = None
7288
    self.other_node = None
7289
    self.remote_node_info = None
7290
    self.node_secondary_ip = None
7291

    
7292
  @staticmethod
7293
  def CheckArguments(mode, remote_node, iallocator):
7294
    """Helper function for users of this class.
7295

7296
    """
7297
    # check for valid parameter combination
7298
    if mode == constants.REPLACE_DISK_CHG:
7299
      if remote_node is None and iallocator is None:
7300
        raise errors.OpPrereqError("When changing the secondary either an"
7301
                                   " iallocator script must be used or the"
7302
                                   " new node given", errors.ECODE_INVAL)
7303

    
7304
      if remote_node is not None and iallocator is not None:
7305
        raise errors.OpPrereqError("Give either the iallocator or the new"
7306
                                   " secondary, not both", errors.ECODE_INVAL)
7307

    
7308
    elif remote_node is not None or iallocator is not None:
7309
      # Not replacing the secondary
7310
      raise errors.OpPrereqError("The iallocator and new node options can"
7311
                                 " only be used when changing the"
7312
                                 " secondary node", errors.ECODE_INVAL)
7313

    
7314
  @staticmethod
7315
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7316
    """Compute a new secondary node using an IAllocator.
7317

7318
    """
7319
    ial = IAllocator(lu.cfg, lu.rpc,
7320
                     mode=constants.IALLOCATOR_MODE_RELOC,
7321
                     name=instance_name,
7322
                     relocate_from=relocate_from)
7323

    
7324
    ial.Run(iallocator_name)
7325

    
7326
    if not ial.success:
7327
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7328
                                 " %s" % (iallocator_name, ial.info),
7329
                                 errors.ECODE_NORES)
7330

    
7331
    if len(ial.result) != ial.required_nodes:
7332
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7333
                                 " of nodes (%s), required %s" %
7334
                                 (iallocator_name,
7335
                                  len(ial.result), ial.required_nodes),
7336
                                 errors.ECODE_FAULT)
7337

    
7338
    remote_node_name = ial.result[0]
7339

    
7340
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7341
               instance_name, remote_node_name)
7342

    
7343
    return remote_node_name
7344

    
7345
  def _FindFaultyDisks(self, node_name):
7346
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7347
                                    node_name, True)
7348

    
7349
  def CheckPrereq(self):
7350
    """Check prerequisites.
7351

7352
    This checks that the instance is in the cluster.
7353

7354
    """
7355
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7356
    assert instance is not None, \
7357
      "Cannot retrieve locked instance %s" % self.instance_name
7358

    
7359
    if instance.disk_template != constants.DT_DRBD8:
7360
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7361
                                 " instances", errors.ECODE_INVAL)
7362

    
7363
    if len(instance.secondary_nodes) != 1:
7364
      raise errors.OpPrereqError("The instance has a strange layout,"
7365
                                 " expected one secondary but found %d" %
7366
                                 len(instance.secondary_nodes),
7367
                                 errors.ECODE_FAULT)
7368

    
7369
    if not self.delay_iallocator:
7370
      self._CheckPrereq2()
7371

    
7372
  def _CheckPrereq2(self):
7373
    """Check prerequisites, second part.
7374

7375
    This function should always be part of CheckPrereq. It was separated and is
7376
    now called from Exec because during node evacuation iallocator was only
7377
    called with an unmodified cluster model, not taking planned changes into
7378
    account.
7379

7380
    """
7381
    instance = self.instance
7382
    secondary_node = instance.secondary_nodes[0]
7383

    
7384
    if self.iallocator_name is None:
7385
      remote_node = self.remote_node
7386
    else:
7387
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7388
                                       instance.name, instance.secondary_nodes)
7389

    
7390
    if remote_node is not None:
7391
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7392
      assert self.remote_node_info is not None, \
7393
        "Cannot retrieve locked node %s" % remote_node
7394
    else:
7395
      self.remote_node_info = None
7396

    
7397
    if remote_node == self.instance.primary_node:
7398
      raise errors.OpPrereqError("The specified node is the primary node of"
7399
                                 " the instance.", errors.ECODE_INVAL)
7400

    
7401
    if remote_node == secondary_node:
7402
      raise errors.OpPrereqError("The specified node is already the"
7403
                                 " secondary node of the instance.",
7404
                                 errors.ECODE_INVAL)
7405

    
7406
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7407
                                    constants.REPLACE_DISK_CHG):
7408
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7409
                                 errors.ECODE_INVAL)
7410

    
7411
    if self.mode == constants.REPLACE_DISK_AUTO:
7412
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7413
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7414

    
7415
      if faulty_primary and faulty_secondary:
7416
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7417
                                   " one node and can not be repaired"
7418
                                   " automatically" % self.instance_name,
7419
                                   errors.ECODE_STATE)
7420

    
7421
      if faulty_primary:
7422
        self.disks = faulty_primary
7423
        self.target_node = instance.primary_node
7424
        self.other_node = secondary_node
7425
        check_nodes = [self.target_node, self.other_node]
7426
      elif faulty_secondary:
7427
        self.disks = faulty_secondary
7428
        self.target_node = secondary_node
7429
        self.other_node = instance.primary_node
7430
        check_nodes = [self.target_node, self.other_node]
7431
      else:
7432
        self.disks = []
7433
        check_nodes = []
7434

    
7435
    else:
7436
      # Non-automatic modes
7437
      if self.mode == constants.REPLACE_DISK_PRI:
7438
        self.target_node = instance.primary_node
7439
        self.other_node = secondary_node
7440
        check_nodes = [self.target_node, self.other_node]
7441

    
7442
      elif self.mode == constants.REPLACE_DISK_SEC:
7443
        self.target_node = secondary_node
7444
        self.other_node = instance.primary_node
7445
        check_nodes = [self.target_node, self.other_node]
7446

    
7447
      elif self.mode == constants.REPLACE_DISK_CHG:
7448
        self.new_node = remote_node
7449
        self.other_node = instance.primary_node
7450
        self.target_node = secondary_node
7451
        check_nodes = [self.new_node, self.other_node]
7452

    
7453
        _CheckNodeNotDrained(self.lu, remote_node)
7454

    
7455
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7456
        assert old_node_info is not None
7457
        if old_node_info.offline and not self.early_release:
7458
          # doesn't make sense to delay the release
7459
          self.early_release = True
7460
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7461
                          " early-release mode", secondary_node)
7462

    
7463
      else:
7464
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7465
                                     self.mode)
7466

    
7467
      # If not specified all disks should be replaced
7468
      if not self.disks:
7469
        self.disks = range(len(self.instance.disks))
7470

    
7471
    for node in check_nodes:
7472
      _CheckNodeOnline(self.lu, node)
7473

    
7474
    # Check whether disks are valid
7475
    for disk_idx in self.disks:
7476
      instance.FindDisk(disk_idx)
7477

    
7478
    # Get secondary node IP addresses
7479
    node_2nd_ip = {}
7480

    
7481
    for node_name in [self.target_node, self.other_node, self.new_node]:
7482
      if node_name is not None:
7483
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7484

    
7485
    self.node_secondary_ip = node_2nd_ip
7486

    
7487
  def Exec(self, feedback_fn):
7488
    """Execute disk replacement.
7489

7490
    This dispatches the disk replacement to the appropriate handler.
7491

7492
    """
7493
    if self.delay_iallocator:
7494
      self._CheckPrereq2()
7495

    
7496
    if not self.disks:
7497
      feedback_fn("No disks need replacement")
7498
      return
7499

    
7500
    feedback_fn("Replacing disk(s) %s for %s" %
7501
                (utils.CommaJoin(self.disks), self.instance.name))
7502

    
7503
    activate_disks = (not self.instance.admin_up)
7504

    
7505
    # Activate the instance disks if we're replacing them on a down instance
7506
    if activate_disks:
7507
      _StartInstanceDisks(self.lu, self.instance, True)
7508

    
7509
    try:
7510
      # Should we replace the secondary node?
7511
      if self.new_node is not None:
7512
        fn = self._ExecDrbd8Secondary
7513
      else:
7514
        fn = self._ExecDrbd8DiskOnly
7515

    
7516
      return fn(feedback_fn)
7517

    
7518
    finally:
7519
      # Deactivate the instance disks if we're replacing them on a
7520
      # down instance
7521
      if activate_disks:
7522
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7523

    
7524
  def _CheckVolumeGroup(self, nodes):
7525
    self.lu.LogInfo("Checking volume groups")
7526

    
7527
    vgname = self.cfg.GetVGName()
7528

    
7529
    # Make sure volume group exists on all involved nodes
7530
    results = self.rpc.call_vg_list(nodes)
7531
    if not results:
7532
      raise errors.OpExecError("Can't list volume groups on the nodes")
7533

    
7534
    for node in nodes:
7535
      res = results[node]
7536
      res.Raise("Error checking node %s" % node)
7537
      if vgname not in res.payload:
7538
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7539
                                 (vgname, node))
7540

    
7541
  def _CheckDisksExistence(self, nodes):
7542
    # Check disk existence
7543
    for idx, dev in enumerate(self.instance.disks):
7544
      if idx not in self.disks:
7545
        continue
7546

    
7547
      for node in nodes:
7548
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7549
        self.cfg.SetDiskID(dev, node)
7550

    
7551
        result = self.rpc.call_blockdev_find(node, dev)
7552

    
7553
        msg = result.fail_msg
7554
        if msg or not result.payload:
7555
          if not msg:
7556
            msg = "disk not found"
7557
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7558
                                   (idx, node, msg))
7559

    
7560
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7561
    for idx, dev in enumerate(self.instance.disks):
7562
      if idx not in self.disks:
7563
        continue
7564

    
7565
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7566
                      (idx, node_name))
7567

    
7568
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7569
                                   ldisk=ldisk):
7570
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7571
                                 " replace disks for instance %s" %
7572
                                 (node_name, self.instance.name))
7573

    
7574
  def _CreateNewStorage(self, node_name):
7575
    vgname = self.cfg.GetVGName()
7576
    iv_names = {}
7577

    
7578
    for idx, dev in enumerate(self.instance.disks):
7579
      if idx not in self.disks:
7580
        continue
7581

    
7582
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7583

    
7584
      self.cfg.SetDiskID(dev, node_name)
7585

    
7586
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7587
      names = _GenerateUniqueNames(self.lu, lv_names)
7588

    
7589
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7590
                             logical_id=(vgname, names[0]))
7591
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7592
                             logical_id=(vgname, names[1]))
7593

    
7594
      new_lvs = [lv_data, lv_meta]
7595
      old_lvs = dev.children
7596
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7597

    
7598
      # we pass force_create=True to force the LVM creation
7599
      for new_lv in new_lvs:
7600
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7601
                        _GetInstanceInfoText(self.instance), False)
7602

    
7603
    return iv_names
7604

    
7605
  def _CheckDevices(self, node_name, iv_names):
7606
    for name, (dev, _, _) in iv_names.iteritems():
7607
      self.cfg.SetDiskID(dev, node_name)
7608

    
7609
      result = self.rpc.call_blockdev_find(node_name, dev)
7610

    
7611
      msg = result.fail_msg
7612
      if msg or not result.payload:
7613
        if not msg:
7614
          msg = "disk not found"
7615
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7616
                                 (name, msg))
7617

    
7618
      if result.payload.is_degraded:
7619
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7620

    
7621
  def _RemoveOldStorage(self, node_name, iv_names):
7622
    for name, (_, old_lvs, _) in iv_names.iteritems():
7623
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7624

    
7625
      for lv in old_lvs:
7626
        self.cfg.SetDiskID(lv, node_name)
7627

    
7628
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7629
        if msg:
7630
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7631
                             hint="remove unused LVs manually")
7632

    
7633
  def _ReleaseNodeLock(self, node_name):
7634
    """Releases the lock for a given node."""
7635
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7636

    
7637
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7638
    """Replace a disk on the primary or secondary for DRBD 8.
7639

7640
    The algorithm for replace is quite complicated:
7641

7642
      1. for each disk to be replaced:
7643

7644
        1. create new LVs on the target node with unique names
7645
        1. detach old LVs from the drbd device
7646
        1. rename old LVs to name_replaced.<time_t>
7647
        1. rename new LVs to old LVs
7648
        1. attach the new LVs (with the old names now) to the drbd device
7649

7650
      1. wait for sync across all devices
7651

7652
      1. for each modified disk:
7653

7654
        1. remove old LVs (which have the name name_replaces.<time_t>)
7655

7656
    Failures are not very well handled.
7657

7658
    """
7659
    steps_total = 6
7660

    
7661
    # Step: check device activation
7662
    self.lu.LogStep(1, steps_total, "Check device existence")
7663
    self._CheckDisksExistence([self.other_node, self.target_node])
7664
    self._CheckVolumeGroup([self.target_node, self.other_node])
7665

    
7666
    # Step: check other node consistency
7667
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7668
    self._CheckDisksConsistency(self.other_node,
7669
                                self.other_node == self.instance.primary_node,
7670
                                False)
7671

    
7672
    # Step: create new storage
7673
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7674
    iv_names = self._CreateNewStorage(self.target_node)
7675

    
7676
    # Step: for each lv, detach+rename*2+attach
7677
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7678
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7679
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7680

    
7681
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7682
                                                     old_lvs)
7683
      result.Raise("Can't detach drbd from local storage on node"
7684
                   " %s for device %s" % (self.target_node, dev.iv_name))
7685
      #dev.children = []
7686
      #cfg.Update(instance)
7687

    
7688
      # ok, we created the new LVs, so now we know we have the needed
7689
      # storage; as such, we proceed on the target node to rename
7690
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7691
      # using the assumption that logical_id == physical_id (which in
7692
      # turn is the unique_id on that node)
7693

    
7694
      # FIXME(iustin): use a better name for the replaced LVs
7695
      temp_suffix = int(time.time())
7696
      ren_fn = lambda d, suff: (d.physical_id[0],
7697
                                d.physical_id[1] + "_replaced-%s" % suff)
7698

    
7699
      # Build the rename list based on what LVs exist on the node
7700
      rename_old_to_new = []
7701
      for to_ren in old_lvs:
7702
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7703
        if not result.fail_msg and result.payload:
7704
          # device exists
7705
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7706

    
7707
      self.lu.LogInfo("Renaming the old LVs on the target node")
7708
      result = self.rpc.call_blockdev_rename(self.target_node,
7709
                                             rename_old_to_new)
7710
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7711

    
7712
      # Now we rename the new LVs to the old LVs
7713
      self.lu.LogInfo("Renaming the new LVs on the target node")
7714
      rename_new_to_old = [(new, old.physical_id)
7715
                           for old, new in zip(old_lvs, new_lvs)]
7716
      result = self.rpc.call_blockdev_rename(self.target_node,
7717
                                             rename_new_to_old)
7718
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7719

    
7720
      for old, new in zip(old_lvs, new_lvs):
7721
        new.logical_id = old.logical_id
7722
        self.cfg.SetDiskID(new, self.target_node)
7723

    
7724
      for disk in old_lvs:
7725
        disk.logical_id = ren_fn(disk, temp_suffix)
7726
        self.cfg.SetDiskID(disk, self.target_node)
7727

    
7728
      # Now that the new lvs have the old name, we can add them to the device
7729
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7730
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7731
                                                  new_lvs)
7732
      msg = result.fail_msg
7733
      if msg:
7734
        for new_lv in new_lvs:
7735
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7736
                                               new_lv).fail_msg
7737
          if msg2:
7738
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7739
                               hint=("cleanup manually the unused logical"
7740
                                     "volumes"))
7741
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7742

    
7743
      dev.children = new_lvs
7744

    
7745
      self.cfg.Update(self.instance, feedback_fn)
7746

    
7747
    cstep = 5
7748
    if self.early_release:
7749
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7750
      cstep += 1
7751
      self._RemoveOldStorage(self.target_node, iv_names)
7752
      # WARNING: we release both node locks here, do not do other RPCs
7753
      # than WaitForSync to the primary node
7754
      self._ReleaseNodeLock([self.target_node, self.other_node])
7755

    
7756
    # Wait for sync
7757
    # This can fail as the old devices are degraded and _WaitForSync
7758
    # does a combined result over all disks, so we don't check its return value
7759
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7760
    cstep += 1
7761
    _WaitForSync(self.lu, self.instance)
7762

    
7763
    # Check all devices manually
7764
    self._CheckDevices(self.instance.primary_node, iv_names)
7765

    
7766
    # Step: remove old storage
7767
    if not self.early_release:
7768
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7769
      cstep += 1
7770
      self._RemoveOldStorage(self.target_node, iv_names)
7771

    
7772
  def _ExecDrbd8Secondary(self, feedback_fn):
7773
    """Replace the secondary node for DRBD 8.
7774

7775
    The algorithm for replace is quite complicated:
7776
      - for all disks of the instance:
7777
        - create new LVs on the new node with same names
7778
        - shutdown the drbd device on the old secondary
7779
        - disconnect the drbd network on the primary
7780
        - create the drbd device on the new secondary
7781
        - network attach the drbd on the primary, using an artifice:
7782
          the drbd code for Attach() will connect to the network if it
7783
          finds a device which is connected to the good local disks but
7784
          not network enabled
7785
      - wait for sync across all devices
7786
      - remove all disks from the old secondary
7787

7788
    Failures are not very well handled.
7789

7790
    """
7791
    steps_total = 6
7792

    
7793
    # Step: check device activation
7794
    self.lu.LogStep(1, steps_total, "Check device existence")
7795
    self._CheckDisksExistence([self.instance.primary_node])
7796
    self._CheckVolumeGroup([self.instance.primary_node])
7797

    
7798
    # Step: check other node consistency
7799
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7800
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7801

    
7802
    # Step: create new storage
7803
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7804
    for idx, dev in enumerate(self.instance.disks):
7805
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7806
                      (self.new_node, idx))
7807
      # we pass force_create=True to force LVM creation
7808
      for new_lv in dev.children:
7809
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7810
                        _GetInstanceInfoText(self.instance), False)
7811

    
7812
    # Step 4: dbrd minors and drbd setups changes
7813
    # after this, we must manually remove the drbd minors on both the
7814
    # error and the success paths
7815
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7816
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7817
                                         for dev in self.instance.disks],
7818
                                        self.instance.name)
7819
    logging.debug("Allocated minors %r", minors)
7820

    
7821
    iv_names = {}
7822
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7823
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7824
                      (self.new_node, idx))
7825
      # create new devices on new_node; note that we create two IDs:
7826
      # one without port, so the drbd will be activated without
7827
      # networking information on the new node at this stage, and one
7828
      # with network, for the latter activation in step 4
7829
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7830
      if self.instance.primary_node == o_node1:
7831
        p_minor = o_minor1
7832
      else:
7833
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7834
        p_minor = o_minor2
7835

    
7836
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7837
                      p_minor, new_minor, o_secret)
7838
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7839
                    p_minor, new_minor, o_secret)
7840

    
7841
      iv_names[idx] = (dev, dev.children, new_net_id)
7842
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7843
                    new_net_id)
7844
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7845
                              logical_id=new_alone_id,
7846
                              children=dev.children,
7847
                              size=dev.size)
7848
      try:
7849
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7850
                              _GetInstanceInfoText(self.instance), False)
7851
      except errors.GenericError:
7852
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7853
        raise
7854

    
7855
    # We have new devices, shutdown the drbd on the old secondary
7856
    for idx, dev in enumerate(self.instance.disks):
7857
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7858
      self.cfg.SetDiskID(dev, self.target_node)
7859
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7860
      if msg:
7861
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7862
                           "node: %s" % (idx, msg),
7863
                           hint=("Please cleanup this device manually as"
7864
                                 " soon as possible"))
7865

    
7866
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7867
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7868
                                               self.node_secondary_ip,
7869
                                               self.instance.disks)\
7870
                                              [self.instance.primary_node]
7871

    
7872
    msg = result.fail_msg
7873
    if msg:
7874
      # detaches didn't succeed (unlikely)
7875
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7876
      raise errors.OpExecError("Can't detach the disks from the network on"
7877
                               " old node: %s" % (msg,))
7878

    
7879
    # if we managed to detach at least one, we update all the disks of
7880
    # the instance to point to the new secondary
7881
    self.lu.LogInfo("Updating instance configuration")
7882
    for dev, _, new_logical_id in iv_names.itervalues():
7883
      dev.logical_id = new_logical_id
7884
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7885

    
7886
    self.cfg.Update(self.instance, feedback_fn)
7887

    
7888
    # and now perform the drbd attach
7889
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7890
                    " (standalone => connected)")
7891
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7892
                                            self.new_node],
7893
                                           self.node_secondary_ip,
7894
                                           self.instance.disks,
7895
                                           self.instance.name,
7896
                                           False)
7897
    for to_node, to_result in result.items():
7898
      msg = to_result.fail_msg
7899
      if msg:
7900
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7901
                           to_node, msg,
7902
                           hint=("please do a gnt-instance info to see the"
7903
                                 " status of disks"))
7904
    cstep = 5
7905
    if self.early_release:
7906
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7907
      cstep += 1
7908
      self._RemoveOldStorage(self.target_node, iv_names)
7909
      # WARNING: we release all node locks here, do not do other RPCs
7910
      # than WaitForSync to the primary node
7911
      self._ReleaseNodeLock([self.instance.primary_node,
7912
                             self.target_node,
7913
                             self.new_node])
7914

    
7915
    # Wait for sync
7916
    # This can fail as the old devices are degraded and _WaitForSync
7917
    # does a combined result over all disks, so we don't check its return value
7918
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7919
    cstep += 1
7920
    _WaitForSync(self.lu, self.instance)
7921

    
7922
    # Check all devices manually
7923
    self._CheckDevices(self.instance.primary_node, iv_names)
7924

    
7925
    # Step: remove old storage
7926
    if not self.early_release:
7927
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7928
      self._RemoveOldStorage(self.target_node, iv_names)
7929

    
7930

    
7931
class LURepairNodeStorage(NoHooksLU):
7932
  """Repairs the volume group on a node.
7933

7934
  """
7935
  _OP_REQP = ["node_name"]
7936
  REQ_BGL = False
7937

    
7938
  def CheckArguments(self):
7939
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7940

    
7941
    _CheckStorageType(self.op.storage_type)
7942

    
7943
  def ExpandNames(self):
7944
    self.needed_locks = {
7945
      locking.LEVEL_NODE: [self.op.node_name],
7946
      }
7947

    
7948
  def _CheckFaultyDisks(self, instance, node_name):
7949
    """Ensure faulty disks abort the opcode or at least warn."""
7950
    try:
7951
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7952
                                  node_name, True):
7953
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7954
                                   " node '%s'" % (instance.name, node_name),
7955
                                   errors.ECODE_STATE)
7956
    except errors.OpPrereqError, err:
7957
      if self.op.ignore_consistency:
7958
        self.proc.LogWarning(str(err.args[0]))
7959
      else:
7960
        raise
7961

    
7962
  def CheckPrereq(self):
7963
    """Check prerequisites.
7964

7965
    """
7966
    storage_type = self.op.storage_type
7967

    
7968
    if (constants.SO_FIX_CONSISTENCY not in
7969
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7970
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7971
                                 " repaired" % storage_type,
7972
                                 errors.ECODE_INVAL)
7973

    
7974
    # Check whether any instance on this node has faulty disks
7975
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7976
      if not inst.admin_up:
7977
        continue
7978
      check_nodes = set(inst.all_nodes)
7979
      check_nodes.discard(self.op.node_name)
7980
      for inst_node_name in check_nodes:
7981
        self._CheckFaultyDisks(inst, inst_node_name)
7982

    
7983
  def Exec(self, feedback_fn):
7984
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7985
                (self.op.name, self.op.node_name))
7986

    
7987
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7988
    result = self.rpc.call_storage_execute(self.op.node_name,
7989
                                           self.op.storage_type, st_args,
7990
                                           self.op.name,
7991
                                           constants.SO_FIX_CONSISTENCY)
7992
    result.Raise("Failed to repair storage unit '%s' on %s" %
7993
                 (self.op.name, self.op.node_name))
7994

    
7995

    
7996
class LUNodeEvacuationStrategy(NoHooksLU):
7997
  """Computes the node evacuation strategy.
7998

7999
  """
8000
  _OP_REQP = ["nodes"]
8001
  REQ_BGL = False
8002

    
8003
  def CheckArguments(self):
8004
    if not hasattr(self.op, "remote_node"):
8005
      self.op.remote_node = None
8006
    if not hasattr(self.op, "iallocator"):
8007
      self.op.iallocator = None
8008
    if self.op.remote_node is not None and self.op.iallocator is not None:
8009
      raise errors.OpPrereqError("Give either the iallocator or the new"
8010
                                 " secondary, not both", errors.ECODE_INVAL)
8011

    
8012
  def ExpandNames(self):
8013
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8014
    self.needed_locks = locks = {}
8015
    if self.op.remote_node is None:
8016
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8017
    else:
8018
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8019
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8020

    
8021
  def CheckPrereq(self):
8022
    pass
8023

    
8024
  def Exec(self, feedback_fn):
8025
    if self.op.remote_node is not None:
8026
      instances = []
8027
      for node in self.op.nodes:
8028
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8029
      result = []
8030
      for i in instances:
8031
        if i.primary_node == self.op.remote_node:
8032
          raise errors.OpPrereqError("Node %s is the primary node of"
8033
                                     " instance %s, cannot use it as"
8034
                                     " secondary" %
8035
                                     (self.op.remote_node, i.name),
8036
                                     errors.ECODE_INVAL)
8037
        result.append([i.name, self.op.remote_node])
8038
    else:
8039
      ial = IAllocator(self.cfg, self.rpc,
8040
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8041
                       evac_nodes=self.op.nodes)
8042
      ial.Run(self.op.iallocator, validate=True)
8043
      if not ial.success:
8044
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8045
                                 errors.ECODE_NORES)
8046
      result = ial.result
8047
    return result
8048

    
8049

    
8050
class LUGrowDisk(LogicalUnit):
8051
  """Grow a disk of an instance.
8052

8053
  """
8054
  HPATH = "disk-grow"
8055
  HTYPE = constants.HTYPE_INSTANCE
8056
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
8057
  REQ_BGL = False
8058

    
8059
  def ExpandNames(self):
8060
    self._ExpandAndLockInstance()
8061
    self.needed_locks[locking.LEVEL_NODE] = []
8062
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8063

    
8064
  def DeclareLocks(self, level):
8065
    if level == locking.LEVEL_NODE:
8066
      self._LockInstancesNodes()
8067

    
8068
  def BuildHooksEnv(self):
8069
    """Build hooks env.
8070

8071
    This runs on the master, the primary and all the secondaries.
8072

8073
    """
8074
    env = {
8075
      "DISK": self.op.disk,
8076
      "AMOUNT": self.op.amount,
8077
      }
8078
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8079
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8080
    return env, nl, nl
8081

    
8082
  def CheckPrereq(self):
8083
    """Check prerequisites.
8084

8085
    This checks that the instance is in the cluster.
8086

8087
    """
8088
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8089
    assert instance is not None, \
8090
      "Cannot retrieve locked instance %s" % self.op.instance_name
8091
    nodenames = list(instance.all_nodes)
8092
    for node in nodenames:
8093
      _CheckNodeOnline(self, node)
8094

    
8095

    
8096
    self.instance = instance
8097

    
8098
    if instance.disk_template not in constants.DTS_GROWABLE:
8099
      raise errors.OpPrereqError("Instance's disk layout does not support"
8100
                                 " growing.", errors.ECODE_INVAL)
8101

    
8102
    self.disk = instance.FindDisk(self.op.disk)
8103

    
8104
    if instance.disk_template != constants.DT_FILE:
8105
      # TODO: check the free disk space for file, when that feature will be
8106
      # supported
8107
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8108

    
8109
  def Exec(self, feedback_fn):
8110
    """Execute disk grow.
8111

8112
    """
8113
    instance = self.instance
8114
    disk = self.disk
8115

    
8116
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8117
    if not disks_ok:
8118
      raise errors.OpExecError("Cannot activate block device to grow")
8119

    
8120
    for node in instance.all_nodes:
8121
      self.cfg.SetDiskID(disk, node)
8122
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8123
      result.Raise("Grow request failed to node %s" % node)
8124

    
8125
      # TODO: Rewrite code to work properly
8126
      # DRBD goes into sync mode for a short amount of time after executing the
8127
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8128
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8129
      # time is a work-around.
8130
      time.sleep(5)
8131

    
8132
    disk.RecordGrow(self.op.amount)
8133
    self.cfg.Update(instance, feedback_fn)
8134
    if self.op.wait_for_sync:
8135
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8136
      if disk_abort:
8137
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8138
                             " status.\nPlease check the instance.")
8139
      if not instance.admin_up:
8140
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8141
    elif not instance.admin_up:
8142
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8143
                           " not supposed to be running because no wait for"
8144
                           " sync mode was requested.")
8145

    
8146

    
8147
class LUQueryInstanceData(NoHooksLU):
8148
  """Query runtime instance data.
8149

8150
  """
8151
  _OP_REQP = ["instances", "static"]
8152
  REQ_BGL = False
8153

    
8154
  def ExpandNames(self):
8155
    self.needed_locks = {}
8156
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8157

    
8158
    if not isinstance(self.op.instances, list):
8159
      raise errors.OpPrereqError("Invalid argument type 'instances'",
8160
                                 errors.ECODE_INVAL)
8161

    
8162
    if self.op.instances:
8163
      self.wanted_names = []
8164
      for name in self.op.instances:
8165
        full_name = _ExpandInstanceName(self.cfg, name)
8166
        self.wanted_names.append(full_name)
8167
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8168
    else:
8169
      self.wanted_names = None
8170
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8171

    
8172
    self.needed_locks[locking.LEVEL_NODE] = []
8173
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8174

    
8175
  def DeclareLocks(self, level):
8176
    if level == locking.LEVEL_NODE:
8177
      self._LockInstancesNodes()
8178

    
8179
  def CheckPrereq(self):
8180
    """Check prerequisites.
8181

8182
    This only checks the optional instance list against the existing names.
8183

8184
    """
8185
    if self.wanted_names is None:
8186
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8187

    
8188
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8189
                             in self.wanted_names]
8190
    return
8191

    
8192
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8193
    """Returns the status of a block device
8194

8195
    """
8196
    if self.op.static or not node:
8197
      return None
8198

    
8199
    self.cfg.SetDiskID(dev, node)
8200

    
8201
    result = self.rpc.call_blockdev_find(node, dev)
8202
    if result.offline:
8203
      return None
8204

    
8205
    result.Raise("Can't compute disk status for %s" % instance_name)
8206

    
8207
    status = result.payload
8208
    if status is None:
8209
      return None
8210

    
8211
    return (status.dev_path, status.major, status.minor,
8212
            status.sync_percent, status.estimated_time,
8213
            status.is_degraded, status.ldisk_status)
8214

    
8215
  def _ComputeDiskStatus(self, instance, snode, dev):
8216
    """Compute block device status.
8217

8218
    """
8219
    if dev.dev_type in constants.LDS_DRBD:
8220
      # we change the snode then (otherwise we use the one passed in)
8221
      if dev.logical_id[0] == instance.primary_node:
8222
        snode = dev.logical_id[1]
8223
      else:
8224
        snode = dev.logical_id[0]
8225

    
8226
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8227
                                              instance.name, dev)
8228
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8229

    
8230
    if dev.children:
8231
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8232
                      for child in dev.children]
8233
    else:
8234
      dev_children = []
8235

    
8236
    data = {
8237
      "iv_name": dev.iv_name,
8238
      "dev_type": dev.dev_type,
8239
      "logical_id": dev.logical_id,
8240
      "physical_id": dev.physical_id,
8241
      "pstatus": dev_pstatus,
8242
      "sstatus": dev_sstatus,
8243
      "children": dev_children,
8244
      "mode": dev.mode,
8245
      "size": dev.size,
8246
      }
8247

    
8248
    return data
8249

    
8250
  def Exec(self, feedback_fn):
8251
    """Gather and return data"""
8252
    result = {}
8253

    
8254
    cluster = self.cfg.GetClusterInfo()
8255

    
8256
    for instance in self.wanted_instances:
8257
      if not self.op.static:
8258
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8259
                                                  instance.name,
8260
                                                  instance.hypervisor)
8261
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8262
        remote_info = remote_info.payload
8263
        if remote_info and "state" in remote_info:
8264
          remote_state = "up"
8265
        else:
8266
          remote_state = "down"
8267
      else:
8268
        remote_state = None
8269
      if instance.admin_up:
8270
        config_state = "up"
8271
      else:
8272
        config_state = "down"
8273

    
8274
      disks = [self._ComputeDiskStatus(instance, None, device)
8275
               for device in instance.disks]
8276

    
8277
      idict = {
8278
        "name": instance.name,
8279
        "config_state": config_state,
8280
        "run_state": remote_state,
8281
        "pnode": instance.primary_node,
8282
        "snodes": instance.secondary_nodes,
8283
        "os": instance.os,
8284
        # this happens to be the same format used for hooks
8285
        "nics": _NICListToTuple(self, instance.nics),
8286
        "disk_template": instance.disk_template,
8287
        "disks": disks,
8288
        "hypervisor": instance.hypervisor,
8289
        "network_port": instance.network_port,
8290
        "hv_instance": instance.hvparams,
8291
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8292
        "be_instance": instance.beparams,
8293
        "be_actual": cluster.FillBE(instance),
8294
        "os_instance": instance.osparams,
8295
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8296
        "serial_no": instance.serial_no,
8297
        "mtime": instance.mtime,
8298
        "ctime": instance.ctime,
8299
        "uuid": instance.uuid,
8300
        }
8301

    
8302
      result[instance.name] = idict
8303

    
8304
    return result
8305

    
8306

    
8307
class LUSetInstanceParams(LogicalUnit):
8308
  """Modifies an instances's parameters.
8309

8310
  """
8311
  HPATH = "instance-modify"
8312
  HTYPE = constants.HTYPE_INSTANCE
8313
  _OP_REQP = ["instance_name"]
8314
  REQ_BGL = False
8315

    
8316
  def CheckArguments(self):
8317
    if not hasattr(self.op, 'nics'):
8318
      self.op.nics = []
8319
    if not hasattr(self.op, 'disks'):
8320
      self.op.disks = []
8321
    if not hasattr(self.op, 'beparams'):
8322
      self.op.beparams = {}
8323
    if not hasattr(self.op, 'hvparams'):
8324
      self.op.hvparams = {}
8325
    if not hasattr(self.op, "disk_template"):
8326
      self.op.disk_template = None
8327
    if not hasattr(self.op, "remote_node"):
8328
      self.op.remote_node = None
8329
    if not hasattr(self.op, "os_name"):
8330
      self.op.os_name = None
8331
    if not hasattr(self.op, "force_variant"):
8332
      self.op.force_variant = False
8333
    if not hasattr(self.op, "osparams"):
8334
      self.op.osparams = None
8335
    self.op.force = getattr(self.op, "force", False)
8336
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8337
            self.op.hvparams or self.op.beparams or self.op.os_name):
8338
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8339

    
8340
    if self.op.hvparams:
8341
      _CheckGlobalHvParams(self.op.hvparams)
8342

    
8343
    # Disk validation
8344
    disk_addremove = 0
8345
    for disk_op, disk_dict in self.op.disks:
8346
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8347
      if disk_op == constants.DDM_REMOVE:
8348
        disk_addremove += 1
8349
        continue
8350
      elif disk_op == constants.DDM_ADD:
8351
        disk_addremove += 1
8352
      else:
8353
        if not isinstance(disk_op, int):
8354
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8355
        if not isinstance(disk_dict, dict):
8356
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8357
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8358

    
8359
      if disk_op == constants.DDM_ADD:
8360
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8361
        if mode not in constants.DISK_ACCESS_SET:
8362
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8363
                                     errors.ECODE_INVAL)
8364
        size = disk_dict.get('size', None)
8365
        if size is None:
8366
          raise errors.OpPrereqError("Required disk parameter size missing",
8367
                                     errors.ECODE_INVAL)
8368
        try:
8369
          size = int(size)
8370
        except (TypeError, ValueError), err:
8371
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8372
                                     str(err), errors.ECODE_INVAL)
8373
        disk_dict['size'] = size
8374
      else:
8375
        # modification of disk
8376
        if 'size' in disk_dict:
8377
          raise errors.OpPrereqError("Disk size change not possible, use"
8378
                                     " grow-disk", errors.ECODE_INVAL)
8379

    
8380
    if disk_addremove > 1:
8381
      raise errors.OpPrereqError("Only one disk add or remove operation"
8382
                                 " supported at a time", errors.ECODE_INVAL)
8383

    
8384
    if self.op.disks and self.op.disk_template is not None:
8385
      raise errors.OpPrereqError("Disk template conversion and other disk"
8386
                                 " changes not supported at the same time",
8387
                                 errors.ECODE_INVAL)
8388

    
8389
    if self.op.disk_template:
8390
      _CheckDiskTemplate(self.op.disk_template)
8391
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8392
          self.op.remote_node is None):
8393
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8394
                                   " one requires specifying a secondary node",
8395
                                   errors.ECODE_INVAL)
8396

    
8397
    # NIC validation
8398
    nic_addremove = 0
8399
    for nic_op, nic_dict in self.op.nics:
8400
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8401
      if nic_op == constants.DDM_REMOVE:
8402
        nic_addremove += 1
8403
        continue
8404
      elif nic_op == constants.DDM_ADD:
8405
        nic_addremove += 1
8406
      else:
8407
        if not isinstance(nic_op, int):
8408
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8409
        if not isinstance(nic_dict, dict):
8410
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8411
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8412

    
8413
      # nic_dict should be a dict
8414
      nic_ip = nic_dict.get('ip', None)
8415
      if nic_ip is not None:
8416
        if nic_ip.lower() == constants.VALUE_NONE:
8417
          nic_dict['ip'] = None
8418
        else:
8419
          if not utils.IsValidIP(nic_ip):
8420
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8421
                                       errors.ECODE_INVAL)
8422

    
8423
      nic_bridge = nic_dict.get('bridge', None)
8424
      nic_link = nic_dict.get('link', None)
8425
      if nic_bridge and nic_link:
8426
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8427
                                   " at the same time", errors.ECODE_INVAL)
8428
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8429
        nic_dict['bridge'] = None
8430
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8431
        nic_dict['link'] = None
8432

    
8433
      if nic_op == constants.DDM_ADD:
8434
        nic_mac = nic_dict.get('mac', None)
8435
        if nic_mac is None:
8436
          nic_dict['mac'] = constants.VALUE_AUTO
8437

    
8438
      if 'mac' in nic_dict:
8439
        nic_mac = nic_dict['mac']
8440
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8441
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8442

    
8443
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8444
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8445
                                     " modifying an existing nic",
8446
                                     errors.ECODE_INVAL)
8447

    
8448
    if nic_addremove > 1:
8449
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8450
                                 " supported at a time", errors.ECODE_INVAL)
8451

    
8452
  def ExpandNames(self):
8453
    self._ExpandAndLockInstance()
8454
    self.needed_locks[locking.LEVEL_NODE] = []
8455
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8456

    
8457
  def DeclareLocks(self, level):
8458
    if level == locking.LEVEL_NODE:
8459
      self._LockInstancesNodes()
8460
      if self.op.disk_template and self.op.remote_node:
8461
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8462
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8463

    
8464
  def BuildHooksEnv(self):
8465
    """Build hooks env.
8466

8467
    This runs on the master, primary and secondaries.
8468

8469
    """
8470
    args = dict()
8471
    if constants.BE_MEMORY in self.be_new:
8472
      args['memory'] = self.be_new[constants.BE_MEMORY]
8473
    if constants.BE_VCPUS in self.be_new:
8474
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8475
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8476
    # information at all.
8477
    if self.op.nics:
8478
      args['nics'] = []
8479
      nic_override = dict(self.op.nics)
8480
      for idx, nic in enumerate(self.instance.nics):
8481
        if idx in nic_override:
8482
          this_nic_override = nic_override[idx]
8483
        else:
8484
          this_nic_override = {}
8485
        if 'ip' in this_nic_override:
8486
          ip = this_nic_override['ip']
8487
        else:
8488
          ip = nic.ip
8489
        if 'mac' in this_nic_override:
8490
          mac = this_nic_override['mac']
8491
        else:
8492
          mac = nic.mac
8493
        if idx in self.nic_pnew:
8494
          nicparams = self.nic_pnew[idx]
8495
        else:
8496
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8497
        mode = nicparams[constants.NIC_MODE]
8498
        link = nicparams[constants.NIC_LINK]
8499
        args['nics'].append((ip, mac, mode, link))
8500
      if constants.DDM_ADD in nic_override:
8501
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8502
        mac = nic_override[constants.DDM_ADD]['mac']
8503
        nicparams = self.nic_pnew[constants.DDM_ADD]
8504
        mode = nicparams[constants.NIC_MODE]
8505
        link = nicparams[constants.NIC_LINK]
8506
        args['nics'].append((ip, mac, mode, link))
8507
      elif constants.DDM_REMOVE in nic_override:
8508
        del args['nics'][-1]
8509

    
8510
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8511
    if self.op.disk_template:
8512
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8513
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8514
    return env, nl, nl
8515

    
8516
  def CheckPrereq(self):
8517
    """Check prerequisites.
8518

8519
    This only checks the instance list against the existing names.
8520

8521
    """
8522
    self.force = self.op.force
8523

    
8524
    # checking the new params on the primary/secondary nodes
8525

    
8526
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8527
    cluster = self.cluster = self.cfg.GetClusterInfo()
8528
    assert self.instance is not None, \
8529
      "Cannot retrieve locked instance %s" % self.op.instance_name
8530
    pnode = instance.primary_node
8531
    nodelist = list(instance.all_nodes)
8532

    
8533
    # OS change
8534
    if self.op.os_name and not self.op.force:
8535
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8536
                      self.op.force_variant)
8537
      instance_os = self.op.os_name
8538
    else:
8539
      instance_os = instance.os
8540

    
8541
    if self.op.disk_template:
8542
      if instance.disk_template == self.op.disk_template:
8543
        raise errors.OpPrereqError("Instance already has disk template %s" %
8544
                                   instance.disk_template, errors.ECODE_INVAL)
8545

    
8546
      if (instance.disk_template,
8547
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8548
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8549
                                   " %s to %s" % (instance.disk_template,
8550
                                                  self.op.disk_template),
8551
                                   errors.ECODE_INVAL)
8552
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8553
        _CheckNodeOnline(self, self.op.remote_node)
8554
        _CheckNodeNotDrained(self, self.op.remote_node)
8555
        disks = [{"size": d.size} for d in instance.disks]
8556
        required = _ComputeDiskSize(self.op.disk_template, disks)
8557
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8558
        _CheckInstanceDown(self, instance, "cannot change disk template")
8559

    
8560
    # hvparams processing
8561
    if self.op.hvparams:
8562
      hv_type = instance.hypervisor
8563
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8564
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8565
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8566

    
8567
      # local check
8568
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8569
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8570
      self.hv_new = hv_new # the new actual values
8571
      self.hv_inst = i_hvdict # the new dict (without defaults)
8572
    else:
8573
      self.hv_new = self.hv_inst = {}
8574

    
8575
    # beparams processing
8576
    if self.op.beparams:
8577
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8578
                                   use_none=True)
8579
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8580
      be_new = cluster.SimpleFillBE(i_bedict)
8581
      self.be_new = be_new # the new actual values
8582
      self.be_inst = i_bedict # the new dict (without defaults)
8583
    else:
8584
      self.be_new = self.be_inst = {}
8585

    
8586
    # osparams processing
8587
    if self.op.osparams:
8588
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8589
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8590
      self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8591
      self.os_inst = i_osdict # the new dict (without defaults)
8592
    else:
8593
      self.os_new = self.os_inst = {}
8594

    
8595
    self.warn = []
8596

    
8597
    if constants.BE_MEMORY in self.op.beparams and not self.force:
8598
      mem_check_list = [pnode]
8599
      if be_new[constants.BE_AUTO_BALANCE]:
8600
        # either we changed auto_balance to yes or it was from before
8601
        mem_check_list.extend(instance.secondary_nodes)
8602
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8603
                                                  instance.hypervisor)
8604
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8605
                                         instance.hypervisor)
8606
      pninfo = nodeinfo[pnode]
8607
      msg = pninfo.fail_msg
8608
      if msg:
8609
        # Assume the primary node is unreachable and go ahead
8610
        self.warn.append("Can't get info from primary node %s: %s" %
8611
                         (pnode,  msg))
8612
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8613
        self.warn.append("Node data from primary node %s doesn't contain"
8614
                         " free memory information" % pnode)
8615
      elif instance_info.fail_msg:
8616
        self.warn.append("Can't get instance runtime information: %s" %
8617
                        instance_info.fail_msg)
8618
      else:
8619
        if instance_info.payload:
8620
          current_mem = int(instance_info.payload['memory'])
8621
        else:
8622
          # Assume instance not running
8623
          # (there is a slight race condition here, but it's not very probable,
8624
          # and we have no other way to check)
8625
          current_mem = 0
8626
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8627
                    pninfo.payload['memory_free'])
8628
        if miss_mem > 0:
8629
          raise errors.OpPrereqError("This change will prevent the instance"
8630
                                     " from starting, due to %d MB of memory"
8631
                                     " missing on its primary node" % miss_mem,
8632
                                     errors.ECODE_NORES)
8633

    
8634
      if be_new[constants.BE_AUTO_BALANCE]:
8635
        for node, nres in nodeinfo.items():
8636
          if node not in instance.secondary_nodes:
8637
            continue
8638
          msg = nres.fail_msg
8639
          if msg:
8640
            self.warn.append("Can't get info from secondary node %s: %s" %
8641
                             (node, msg))
8642
          elif not isinstance(nres.payload.get('memory_free', None), int):
8643
            self.warn.append("Secondary node %s didn't return free"
8644
                             " memory information" % node)
8645
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8646
            self.warn.append("Not enough memory to failover instance to"
8647
                             " secondary node %s" % node)
8648

    
8649
    # NIC processing
8650
    self.nic_pnew = {}
8651
    self.nic_pinst = {}
8652
    for nic_op, nic_dict in self.op.nics:
8653
      if nic_op == constants.DDM_REMOVE:
8654
        if not instance.nics:
8655
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8656
                                     errors.ECODE_INVAL)
8657
        continue
8658
      if nic_op != constants.DDM_ADD:
8659
        # an existing nic
8660
        if not instance.nics:
8661
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8662
                                     " no NICs" % nic_op,
8663
                                     errors.ECODE_INVAL)
8664
        if nic_op < 0 or nic_op >= len(instance.nics):
8665
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8666
                                     " are 0 to %d" %
8667
                                     (nic_op, len(instance.nics) - 1),
8668
                                     errors.ECODE_INVAL)
8669
        old_nic_params = instance.nics[nic_op].nicparams
8670
        old_nic_ip = instance.nics[nic_op].ip
8671
      else:
8672
        old_nic_params = {}
8673
        old_nic_ip = None
8674

    
8675
      update_params_dict = dict([(key, nic_dict[key])
8676
                                 for key in constants.NICS_PARAMETERS
8677
                                 if key in nic_dict])
8678

    
8679
      if 'bridge' in nic_dict:
8680
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8681

    
8682
      new_nic_params = _GetUpdatedParams(old_nic_params,
8683
                                         update_params_dict)
8684
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8685
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8686
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8687
      self.nic_pinst[nic_op] = new_nic_params
8688
      self.nic_pnew[nic_op] = new_filled_nic_params
8689
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8690

    
8691
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8692
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8693
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8694
        if msg:
8695
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8696
          if self.force:
8697
            self.warn.append(msg)
8698
          else:
8699
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8700
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8701
        if 'ip' in nic_dict:
8702
          nic_ip = nic_dict['ip']
8703
        else:
8704
          nic_ip = old_nic_ip
8705
        if nic_ip is None:
8706
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8707
                                     ' on a routed nic', errors.ECODE_INVAL)
8708
      if 'mac' in nic_dict:
8709
        nic_mac = nic_dict['mac']
8710
        if nic_mac is None:
8711
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8712
                                     errors.ECODE_INVAL)
8713
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8714
          # otherwise generate the mac
8715
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8716
        else:
8717
          # or validate/reserve the current one
8718
          try:
8719
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8720
          except errors.ReservationError:
8721
            raise errors.OpPrereqError("MAC address %s already in use"
8722
                                       " in cluster" % nic_mac,
8723
                                       errors.ECODE_NOTUNIQUE)
8724

    
8725
    # DISK processing
8726
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8727
      raise errors.OpPrereqError("Disk operations not supported for"
8728
                                 " diskless instances",
8729
                                 errors.ECODE_INVAL)
8730
    for disk_op, _ in self.op.disks:
8731
      if disk_op == constants.DDM_REMOVE:
8732
        if len(instance.disks) == 1:
8733
          raise errors.OpPrereqError("Cannot remove the last disk of"
8734
                                     " an instance", errors.ECODE_INVAL)
8735
        _CheckInstanceDown(self, instance, "cannot remove disks")
8736

    
8737
      if (disk_op == constants.DDM_ADD and
8738
          len(instance.nics) >= constants.MAX_DISKS):
8739
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8740
                                   " add more" % constants.MAX_DISKS,
8741
                                   errors.ECODE_STATE)
8742
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8743
        # an existing disk
8744
        if disk_op < 0 or disk_op >= len(instance.disks):
8745
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8746
                                     " are 0 to %d" %
8747
                                     (disk_op, len(instance.disks)),
8748
                                     errors.ECODE_INVAL)
8749

    
8750
    return
8751

    
8752
  def _ConvertPlainToDrbd(self, feedback_fn):
8753
    """Converts an instance from plain to drbd.
8754

8755
    """
8756
    feedback_fn("Converting template to drbd")
8757
    instance = self.instance
8758
    pnode = instance.primary_node
8759
    snode = self.op.remote_node
8760

    
8761
    # create a fake disk info for _GenerateDiskTemplate
8762
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8763
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8764
                                      instance.name, pnode, [snode],
8765
                                      disk_info, None, None, 0)
8766
    info = _GetInstanceInfoText(instance)
8767
    feedback_fn("Creating aditional volumes...")
8768
    # first, create the missing data and meta devices
8769
    for disk in new_disks:
8770
      # unfortunately this is... not too nice
8771
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8772
                            info, True)
8773
      for child in disk.children:
8774
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8775
    # at this stage, all new LVs have been created, we can rename the
8776
    # old ones
8777
    feedback_fn("Renaming original volumes...")
8778
    rename_list = [(o, n.children[0].logical_id)
8779
                   for (o, n) in zip(instance.disks, new_disks)]
8780
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8781
    result.Raise("Failed to rename original LVs")
8782

    
8783
    feedback_fn("Initializing DRBD devices...")
8784
    # all child devices are in place, we can now create the DRBD devices
8785
    for disk in new_disks:
8786
      for node in [pnode, snode]:
8787
        f_create = node == pnode
8788
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8789

    
8790
    # at this point, the instance has been modified
8791
    instance.disk_template = constants.DT_DRBD8
8792
    instance.disks = new_disks
8793
    self.cfg.Update(instance, feedback_fn)
8794

    
8795
    # disks are created, waiting for sync
8796
    disk_abort = not _WaitForSync(self, instance)
8797
    if disk_abort:
8798
      raise errors.OpExecError("There are some degraded disks for"
8799
                               " this instance, please cleanup manually")
8800

    
8801
  def _ConvertDrbdToPlain(self, feedback_fn):
8802
    """Converts an instance from drbd to plain.
8803

8804
    """
8805
    instance = self.instance
8806
    assert len(instance.secondary_nodes) == 1
8807
    pnode = instance.primary_node
8808
    snode = instance.secondary_nodes[0]
8809
    feedback_fn("Converting template to plain")
8810

    
8811
    old_disks = instance.disks
8812
    new_disks = [d.children[0] for d in old_disks]
8813

    
8814
    # copy over size and mode
8815
    for parent, child in zip(old_disks, new_disks):
8816
      child.size = parent.size
8817
      child.mode = parent.mode
8818

    
8819
    # update instance structure
8820
    instance.disks = new_disks
8821
    instance.disk_template = constants.DT_PLAIN
8822
    self.cfg.Update(instance, feedback_fn)
8823

    
8824
    feedback_fn("Removing volumes on the secondary node...")
8825
    for disk in old_disks:
8826
      self.cfg.SetDiskID(disk, snode)
8827
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8828
      if msg:
8829
        self.LogWarning("Could not remove block device %s on node %s,"
8830
                        " continuing anyway: %s", disk.iv_name, snode, msg)
8831

    
8832
    feedback_fn("Removing unneeded volumes on the primary node...")
8833
    for idx, disk in enumerate(old_disks):
8834
      meta = disk.children[1]
8835
      self.cfg.SetDiskID(meta, pnode)
8836
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8837
      if msg:
8838
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
8839
                        " continuing anyway: %s", idx, pnode, msg)
8840

    
8841

    
8842
  def Exec(self, feedback_fn):
8843
    """Modifies an instance.
8844

8845
    All parameters take effect only at the next restart of the instance.
8846

8847
    """
8848
    # Process here the warnings from CheckPrereq, as we don't have a
8849
    # feedback_fn there.
8850
    for warn in self.warn:
8851
      feedback_fn("WARNING: %s" % warn)
8852

    
8853
    result = []
8854
    instance = self.instance
8855
    # disk changes
8856
    for disk_op, disk_dict in self.op.disks:
8857
      if disk_op == constants.DDM_REMOVE:
8858
        # remove the last disk
8859
        device = instance.disks.pop()
8860
        device_idx = len(instance.disks)
8861
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8862
          self.cfg.SetDiskID(disk, node)
8863
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8864
          if msg:
8865
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8866
                            " continuing anyway", device_idx, node, msg)
8867
        result.append(("disk/%d" % device_idx, "remove"))
8868
      elif disk_op == constants.DDM_ADD:
8869
        # add a new disk
8870
        if instance.disk_template == constants.DT_FILE:
8871
          file_driver, file_path = instance.disks[0].logical_id
8872
          file_path = os.path.dirname(file_path)
8873
        else:
8874
          file_driver = file_path = None
8875
        disk_idx_base = len(instance.disks)
8876
        new_disk = _GenerateDiskTemplate(self,
8877
                                         instance.disk_template,
8878
                                         instance.name, instance.primary_node,
8879
                                         instance.secondary_nodes,
8880
                                         [disk_dict],
8881
                                         file_path,
8882
                                         file_driver,
8883
                                         disk_idx_base)[0]
8884
        instance.disks.append(new_disk)
8885
        info = _GetInstanceInfoText(instance)
8886

    
8887
        logging.info("Creating volume %s for instance %s",
8888
                     new_disk.iv_name, instance.name)
8889
        # Note: this needs to be kept in sync with _CreateDisks
8890
        #HARDCODE
8891
        for node in instance.all_nodes:
8892
          f_create = node == instance.primary_node
8893
          try:
8894
            _CreateBlockDev(self, node, instance, new_disk,
8895
                            f_create, info, f_create)
8896
          except errors.OpExecError, err:
8897
            self.LogWarning("Failed to create volume %s (%s) on"
8898
                            " node %s: %s",
8899
                            new_disk.iv_name, new_disk, node, err)
8900
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8901
                       (new_disk.size, new_disk.mode)))
8902
      else:
8903
        # change a given disk
8904
        instance.disks[disk_op].mode = disk_dict['mode']
8905
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8906

    
8907
    if self.op.disk_template:
8908
      r_shut = _ShutdownInstanceDisks(self, instance)
8909
      if not r_shut:
8910
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8911
                                 " proceed with disk template conversion")
8912
      mode = (instance.disk_template, self.op.disk_template)
8913
      try:
8914
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
8915
      except:
8916
        self.cfg.ReleaseDRBDMinors(instance.name)
8917
        raise
8918
      result.append(("disk_template", self.op.disk_template))
8919

    
8920
    # NIC changes
8921
    for nic_op, nic_dict in self.op.nics:
8922
      if nic_op == constants.DDM_REMOVE:
8923
        # remove the last nic
8924
        del instance.nics[-1]
8925
        result.append(("nic.%d" % len(instance.nics), "remove"))
8926
      elif nic_op == constants.DDM_ADD:
8927
        # mac and bridge should be set, by now
8928
        mac = nic_dict['mac']
8929
        ip = nic_dict.get('ip', None)
8930
        nicparams = self.nic_pinst[constants.DDM_ADD]
8931
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8932
        instance.nics.append(new_nic)
8933
        result.append(("nic.%d" % (len(instance.nics) - 1),
8934
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
8935
                       (new_nic.mac, new_nic.ip,
8936
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8937
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8938
                       )))
8939
      else:
8940
        for key in 'mac', 'ip':
8941
          if key in nic_dict:
8942
            setattr(instance.nics[nic_op], key, nic_dict[key])
8943
        if nic_op in self.nic_pinst:
8944
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8945
        for key, val in nic_dict.iteritems():
8946
          result.append(("nic.%s/%d" % (key, nic_op), val))
8947

    
8948
    # hvparams changes
8949
    if self.op.hvparams:
8950
      instance.hvparams = self.hv_inst
8951
      for key, val in self.op.hvparams.iteritems():
8952
        result.append(("hv/%s" % key, val))
8953

    
8954
    # beparams changes
8955
    if self.op.beparams:
8956
      instance.beparams = self.be_inst
8957
      for key, val in self.op.beparams.iteritems():
8958
        result.append(("be/%s" % key, val))
8959

    
8960
    # OS change
8961
    if self.op.os_name:
8962
      instance.os = self.op.os_name
8963

    
8964
    # osparams changes
8965
    if self.op.osparams:
8966
      instance.osparams = self.os_inst
8967
      for key, val in self.op.osparams.iteritems():
8968
        result.append(("os/%s" % key, val))
8969

    
8970
    self.cfg.Update(instance, feedback_fn)
8971

    
8972
    return result
8973

    
8974
  _DISK_CONVERSIONS = {
8975
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8976
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8977
    }
8978

    
8979

    
8980
class LUQueryExports(NoHooksLU):
8981
  """Query the exports list
8982

8983
  """
8984
  _OP_REQP = ['nodes']
8985
  REQ_BGL = False
8986

    
8987
  def ExpandNames(self):
8988
    self.needed_locks = {}
8989
    self.share_locks[locking.LEVEL_NODE] = 1
8990
    if not self.op.nodes:
8991
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8992
    else:
8993
      self.needed_locks[locking.LEVEL_NODE] = \
8994
        _GetWantedNodes(self, self.op.nodes)
8995

    
8996
  def CheckPrereq(self):
8997
    """Check prerequisites.
8998

8999
    """
9000
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9001

    
9002
  def Exec(self, feedback_fn):
9003
    """Compute the list of all the exported system images.
9004

9005
    @rtype: dict
9006
    @return: a dictionary with the structure node->(export-list)
9007
        where export-list is a list of the instances exported on
9008
        that node.
9009

9010
    """
9011
    rpcresult = self.rpc.call_export_list(self.nodes)
9012
    result = {}
9013
    for node in rpcresult:
9014
      if rpcresult[node].fail_msg:
9015
        result[node] = False
9016
      else:
9017
        result[node] = rpcresult[node].payload
9018

    
9019
    return result
9020

    
9021

    
9022
class LUPrepareExport(NoHooksLU):
9023
  """Prepares an instance for an export and returns useful information.
9024

9025
  """
9026
  _OP_REQP = ["instance_name", "mode"]
9027
  REQ_BGL = False
9028

    
9029
  def CheckArguments(self):
9030
    """Check the arguments.
9031

9032
    """
9033
    if self.op.mode not in constants.EXPORT_MODES:
9034
      raise errors.OpPrereqError("Invalid export mode %r" % self.op.mode,
9035
                                 errors.ECODE_INVAL)
9036

    
9037
  def ExpandNames(self):
9038
    self._ExpandAndLockInstance()
9039

    
9040
  def CheckPrereq(self):
9041
    """Check prerequisites.
9042

9043
    """
9044
    instance_name = self.op.instance_name
9045

    
9046
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9047
    assert self.instance is not None, \
9048
          "Cannot retrieve locked instance %s" % self.op.instance_name
9049
    _CheckNodeOnline(self, self.instance.primary_node)
9050

    
9051
    self._cds = _GetClusterDomainSecret()
9052

    
9053
  def Exec(self, feedback_fn):
9054
    """Prepares an instance for an export.
9055

9056
    """
9057
    instance = self.instance
9058

    
9059
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9060
      salt = utils.GenerateSecret(8)
9061

    
9062
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9063
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9064
                                              constants.RIE_CERT_VALIDITY)
9065
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9066

    
9067
      (name, cert_pem) = result.payload
9068

    
9069
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9070
                                             cert_pem)
9071

    
9072
      return {
9073
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9074
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9075
                          salt),
9076
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9077
        }
9078

    
9079
    return None
9080

    
9081

    
9082
class LUExportInstance(LogicalUnit):
9083
  """Export an instance to an image in the cluster.
9084

9085
  """
9086
  HPATH = "instance-export"
9087
  HTYPE = constants.HTYPE_INSTANCE
9088
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
9089
  REQ_BGL = False
9090

    
9091
  def CheckArguments(self):
9092
    """Check the arguments.
9093

9094
    """
9095
    _CheckBooleanOpField(self.op, "remove_instance")
9096
    _CheckBooleanOpField(self.op, "ignore_remove_failures")
9097

    
9098
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
9099
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
9100
    self.remove_instance = getattr(self.op, "remove_instance", False)
9101
    self.ignore_remove_failures = getattr(self.op, "ignore_remove_failures",
9102
                                          False)
9103
    self.export_mode = getattr(self.op, "mode", constants.EXPORT_MODE_LOCAL)
9104
    self.x509_key_name = getattr(self.op, "x509_key_name", None)
9105
    self.dest_x509_ca_pem = getattr(self.op, "destination_x509_ca", None)
9106

    
9107
    if self.remove_instance and not self.op.shutdown:
9108
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9109
                                 " down before")
9110

    
9111
    if self.export_mode not in constants.EXPORT_MODES:
9112
      raise errors.OpPrereqError("Invalid export mode %r" % self.export_mode,
9113
                                 errors.ECODE_INVAL)
9114

    
9115
    if self.export_mode == constants.EXPORT_MODE_REMOTE:
9116
      if not self.x509_key_name:
9117
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9118
                                   errors.ECODE_INVAL)
9119

    
9120
      if not self.dest_x509_ca_pem:
9121
        raise errors.OpPrereqError("Missing destination X509 CA",
9122
                                   errors.ECODE_INVAL)
9123

    
9124
  def ExpandNames(self):
9125
    self._ExpandAndLockInstance()
9126

    
9127
    # Lock all nodes for local exports
9128
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9129
      # FIXME: lock only instance primary and destination node
9130
      #
9131
      # Sad but true, for now we have do lock all nodes, as we don't know where
9132
      # the previous export might be, and in this LU we search for it and
9133
      # remove it from its current node. In the future we could fix this by:
9134
      #  - making a tasklet to search (share-lock all), then create the new one,
9135
      #    then one to remove, after
9136
      #  - removing the removal operation altogether
9137
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9138

    
9139
  def DeclareLocks(self, level):
9140
    """Last minute lock declaration."""
9141
    # All nodes are locked anyway, so nothing to do here.
9142

    
9143
  def BuildHooksEnv(self):
9144
    """Build hooks env.
9145

9146
    This will run on the master, primary node and target node.
9147

9148
    """
9149
    env = {
9150
      "EXPORT_MODE": self.export_mode,
9151
      "EXPORT_NODE": self.op.target_node,
9152
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9153
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
9154
      # TODO: Generic function for boolean env variables
9155
      "REMOVE_INSTANCE": str(bool(self.remove_instance)),
9156
      }
9157

    
9158
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9159

    
9160
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9161

    
9162
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9163
      nl.append(self.op.target_node)
9164

    
9165
    return env, nl, nl
9166

    
9167
  def CheckPrereq(self):
9168
    """Check prerequisites.
9169

9170
    This checks that the instance and node names are valid.
9171

9172
    """
9173
    instance_name = self.op.instance_name
9174

    
9175
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9176
    assert self.instance is not None, \
9177
          "Cannot retrieve locked instance %s" % self.op.instance_name
9178
    _CheckNodeOnline(self, self.instance.primary_node)
9179

    
9180
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9181
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9182
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9183
      assert self.dst_node is not None
9184

    
9185
      _CheckNodeOnline(self, self.dst_node.name)
9186
      _CheckNodeNotDrained(self, self.dst_node.name)
9187

    
9188
      self._cds = None
9189
      self.dest_disk_info = None
9190
      self.dest_x509_ca = None
9191

    
9192
    elif self.export_mode == constants.EXPORT_MODE_REMOTE:
9193
      self.dst_node = None
9194

    
9195
      if len(self.op.target_node) != len(self.instance.disks):
9196
        raise errors.OpPrereqError(("Received destination information for %s"
9197
                                    " disks, but instance %s has %s disks") %
9198
                                   (len(self.op.target_node), instance_name,
9199
                                    len(self.instance.disks)),
9200
                                   errors.ECODE_INVAL)
9201

    
9202
      cds = _GetClusterDomainSecret()
9203

    
9204
      # Check X509 key name
9205
      try:
9206
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9207
      except (TypeError, ValueError), err:
9208
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9209

    
9210
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9211
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9212
                                   errors.ECODE_INVAL)
9213

    
9214
      # Load and verify CA
9215
      try:
9216
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9217
      except OpenSSL.crypto.Error, err:
9218
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9219
                                   (err, ), errors.ECODE_INVAL)
9220

    
9221
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9222
      if errcode is not None:
9223
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" % (msg, ),
9224
                                   errors.ECODE_INVAL)
9225

    
9226
      self.dest_x509_ca = cert
9227

    
9228
      # Verify target information
9229
      disk_info = []
9230
      for idx, disk_data in enumerate(self.op.target_node):
9231
        try:
9232
          (host, port, magic) = \
9233
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9234
        except errors.GenericError, err:
9235
          raise errors.OpPrereqError("Target info for disk %s: %s" % (idx, err),
9236
                                     errors.ECODE_INVAL)
9237

    
9238
        disk_info.append((host, port, magic))
9239

    
9240
      assert len(disk_info) == len(self.op.target_node)
9241
      self.dest_disk_info = disk_info
9242

    
9243
    else:
9244
      raise errors.ProgrammerError("Unhandled export mode %r" %
9245
                                   self.export_mode)
9246

    
9247
    # instance disk type verification
9248
    # TODO: Implement export support for file-based disks
9249
    for disk in self.instance.disks:
9250
      if disk.dev_type == constants.LD_FILE:
9251
        raise errors.OpPrereqError("Export not supported for instances with"
9252
                                   " file-based disks", errors.ECODE_INVAL)
9253

    
9254
  def _CleanupExports(self, feedback_fn):
9255
    """Removes exports of current instance from all other nodes.
9256

9257
    If an instance in a cluster with nodes A..D was exported to node C, its
9258
    exports will be removed from the nodes A, B and D.
9259

9260
    """
9261
    assert self.export_mode != constants.EXPORT_MODE_REMOTE
9262

    
9263
    nodelist = self.cfg.GetNodeList()
9264
    nodelist.remove(self.dst_node.name)
9265

    
9266
    # on one-node clusters nodelist will be empty after the removal
9267
    # if we proceed the backup would be removed because OpQueryExports
9268
    # substitutes an empty list with the full cluster node list.
9269
    iname = self.instance.name
9270
    if nodelist:
9271
      feedback_fn("Removing old exports for instance %s" % iname)
9272
      exportlist = self.rpc.call_export_list(nodelist)
9273
      for node in exportlist:
9274
        if exportlist[node].fail_msg:
9275
          continue
9276
        if iname in exportlist[node].payload:
9277
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9278
          if msg:
9279
            self.LogWarning("Could not remove older export for instance %s"
9280
                            " on node %s: %s", iname, node, msg)
9281

    
9282
  def Exec(self, feedback_fn):
9283
    """Export an instance to an image in the cluster.
9284

9285
    """
9286
    assert self.export_mode in constants.EXPORT_MODES
9287

    
9288
    instance = self.instance
9289
    src_node = instance.primary_node
9290

    
9291
    if self.op.shutdown:
9292
      # shutdown the instance, but not the disks
9293
      feedback_fn("Shutting down instance %s" % instance.name)
9294
      result = self.rpc.call_instance_shutdown(src_node, instance,
9295
                                               self.shutdown_timeout)
9296
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9297
      result.Raise("Could not shutdown instance %s on"
9298
                   " node %s" % (instance.name, src_node))
9299

    
9300
    # set the disks ID correctly since call_instance_start needs the
9301
    # correct drbd minor to create the symlinks
9302
    for disk in instance.disks:
9303
      self.cfg.SetDiskID(disk, src_node)
9304

    
9305
    activate_disks = (not instance.admin_up)
9306

    
9307
    if activate_disks:
9308
      # Activate the instance disks if we'exporting a stopped instance
9309
      feedback_fn("Activating disks for %s" % instance.name)
9310
      _StartInstanceDisks(self, instance, None)
9311

    
9312
    try:
9313
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9314
                                                     instance)
9315

    
9316
      helper.CreateSnapshots()
9317
      try:
9318
        if (self.op.shutdown and instance.admin_up and
9319
            not self.remove_instance):
9320
          assert not activate_disks
9321
          feedback_fn("Starting instance %s" % instance.name)
9322
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9323
          msg = result.fail_msg
9324
          if msg:
9325
            feedback_fn("Failed to start instance: %s" % msg)
9326
            _ShutdownInstanceDisks(self, instance)
9327
            raise errors.OpExecError("Could not start instance: %s" % msg)
9328

    
9329
        if self.export_mode == constants.EXPORT_MODE_LOCAL:
9330
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9331
        elif self.export_mode == constants.EXPORT_MODE_REMOTE:
9332
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9333
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9334

    
9335
          (key_name, _, _) = self.x509_key_name
9336

    
9337
          dest_ca_pem = \
9338
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9339
                                            self.dest_x509_ca)
9340

    
9341
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9342
                                                     key_name, dest_ca_pem,
9343
                                                     timeouts)
9344
      finally:
9345
        helper.Cleanup()
9346

    
9347
      # Check for backwards compatibility
9348
      assert len(dresults) == len(instance.disks)
9349
      assert compat.all(isinstance(i, bool) for i in dresults), \
9350
             "Not all results are boolean: %r" % dresults
9351

    
9352
    finally:
9353
      if activate_disks:
9354
        feedback_fn("Deactivating disks for %s" % instance.name)
9355
        _ShutdownInstanceDisks(self, instance)
9356

    
9357
    # Remove instance if requested
9358
    if self.remove_instance:
9359
      if not (compat.all(dresults) and fin_resu):
9360
        feedback_fn("Not removing instance %s as parts of the export failed" %
9361
                    instance.name)
9362
      else:
9363
        feedback_fn("Removing instance %s" % instance.name)
9364
        _RemoveInstance(self, feedback_fn, instance,
9365
                        self.ignore_remove_failures)
9366

    
9367
    if self.export_mode == constants.EXPORT_MODE_LOCAL:
9368
      self._CleanupExports(feedback_fn)
9369

    
9370
    return fin_resu, dresults
9371

    
9372

    
9373
class LURemoveExport(NoHooksLU):
9374
  """Remove exports related to the named instance.
9375

9376
  """
9377
  _OP_REQP = ["instance_name"]
9378
  REQ_BGL = False
9379

    
9380
  def ExpandNames(self):
9381
    self.needed_locks = {}
9382
    # We need all nodes to be locked in order for RemoveExport to work, but we
9383
    # don't need to lock the instance itself, as nothing will happen to it (and
9384
    # we can remove exports also for a removed instance)
9385
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9386

    
9387
  def CheckPrereq(self):
9388
    """Check prerequisites.
9389
    """
9390
    pass
9391

    
9392
  def Exec(self, feedback_fn):
9393
    """Remove any export.
9394

9395
    """
9396
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9397
    # If the instance was not found we'll try with the name that was passed in.
9398
    # This will only work if it was an FQDN, though.
9399
    fqdn_warn = False
9400
    if not instance_name:
9401
      fqdn_warn = True
9402
      instance_name = self.op.instance_name
9403

    
9404
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9405
    exportlist = self.rpc.call_export_list(locked_nodes)
9406
    found = False
9407
    for node in exportlist:
9408
      msg = exportlist[node].fail_msg
9409
      if msg:
9410
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9411
        continue
9412
      if instance_name in exportlist[node].payload:
9413
        found = True
9414
        result = self.rpc.call_export_remove(node, instance_name)
9415
        msg = result.fail_msg
9416
        if msg:
9417
          logging.error("Could not remove export for instance %s"
9418
                        " on node %s: %s", instance_name, node, msg)
9419

    
9420
    if fqdn_warn and not found:
9421
      feedback_fn("Export not found. If trying to remove an export belonging"
9422
                  " to a deleted instance please use its Fully Qualified"
9423
                  " Domain Name.")
9424

    
9425

    
9426
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9427
  """Generic tags LU.
9428

9429
  This is an abstract class which is the parent of all the other tags LUs.
9430

9431
  """
9432

    
9433
  def ExpandNames(self):
9434
    self.needed_locks = {}
9435
    if self.op.kind == constants.TAG_NODE:
9436
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9437
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9438
    elif self.op.kind == constants.TAG_INSTANCE:
9439
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9440
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9441

    
9442
  def CheckPrereq(self):
9443
    """Check prerequisites.
9444

9445
    """
9446
    if self.op.kind == constants.TAG_CLUSTER:
9447
      self.target = self.cfg.GetClusterInfo()
9448
    elif self.op.kind == constants.TAG_NODE:
9449
      self.target = self.cfg.GetNodeInfo(self.op.name)
9450
    elif self.op.kind == constants.TAG_INSTANCE:
9451
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9452
    else:
9453
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9454
                                 str(self.op.kind), errors.ECODE_INVAL)
9455

    
9456

    
9457
class LUGetTags(TagsLU):
9458
  """Returns the tags of a given object.
9459

9460
  """
9461
  _OP_REQP = ["kind", "name"]
9462
  REQ_BGL = False
9463

    
9464
  def Exec(self, feedback_fn):
9465
    """Returns the tag list.
9466

9467
    """
9468
    return list(self.target.GetTags())
9469

    
9470

    
9471
class LUSearchTags(NoHooksLU):
9472
  """Searches the tags for a given pattern.
9473

9474
  """
9475
  _OP_REQP = ["pattern"]
9476
  REQ_BGL = False
9477

    
9478
  def ExpandNames(self):
9479
    self.needed_locks = {}
9480

    
9481
  def CheckPrereq(self):
9482
    """Check prerequisites.
9483

9484
    This checks the pattern passed for validity by compiling it.
9485

9486
    """
9487
    try:
9488
      self.re = re.compile(self.op.pattern)
9489
    except re.error, err:
9490
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9491
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9492

    
9493
  def Exec(self, feedback_fn):
9494
    """Returns the tag list.
9495

9496
    """
9497
    cfg = self.cfg
9498
    tgts = [("/cluster", cfg.GetClusterInfo())]
9499
    ilist = cfg.GetAllInstancesInfo().values()
9500
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9501
    nlist = cfg.GetAllNodesInfo().values()
9502
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9503
    results = []
9504
    for path, target in tgts:
9505
      for tag in target.GetTags():
9506
        if self.re.search(tag):
9507
          results.append((path, tag))
9508
    return results
9509

    
9510

    
9511
class LUAddTags(TagsLU):
9512
  """Sets a tag on a given object.
9513

9514
  """
9515
  _OP_REQP = ["kind", "name", "tags"]
9516
  REQ_BGL = False
9517

    
9518
  def CheckPrereq(self):
9519
    """Check prerequisites.
9520

9521
    This checks the type and length of the tag name and value.
9522

9523
    """
9524
    TagsLU.CheckPrereq(self)
9525
    for tag in self.op.tags:
9526
      objects.TaggableObject.ValidateTag(tag)
9527

    
9528
  def Exec(self, feedback_fn):
9529
    """Sets the tag.
9530

9531
    """
9532
    try:
9533
      for tag in self.op.tags:
9534
        self.target.AddTag(tag)
9535
    except errors.TagError, err:
9536
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9537
    self.cfg.Update(self.target, feedback_fn)
9538

    
9539

    
9540
class LUDelTags(TagsLU):
9541
  """Delete a list of tags from a given object.
9542

9543
  """
9544
  _OP_REQP = ["kind", "name", "tags"]
9545
  REQ_BGL = False
9546

    
9547
  def CheckPrereq(self):
9548
    """Check prerequisites.
9549

9550
    This checks that we have the given tag.
9551

9552
    """
9553
    TagsLU.CheckPrereq(self)
9554
    for tag in self.op.tags:
9555
      objects.TaggableObject.ValidateTag(tag)
9556
    del_tags = frozenset(self.op.tags)
9557
    cur_tags = self.target.GetTags()
9558
    if not del_tags <= cur_tags:
9559
      diff_tags = del_tags - cur_tags
9560
      diff_names = ["'%s'" % tag for tag in diff_tags]
9561
      diff_names.sort()
9562
      raise errors.OpPrereqError("Tag(s) %s not found" %
9563
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9564

    
9565
  def Exec(self, feedback_fn):
9566
    """Remove the tag from the object.
9567

9568
    """
9569
    for tag in self.op.tags:
9570
      self.target.RemoveTag(tag)
9571
    self.cfg.Update(self.target, feedback_fn)
9572

    
9573

    
9574
class LUTestDelay(NoHooksLU):
9575
  """Sleep for a specified amount of time.
9576

9577
  This LU sleeps on the master and/or nodes for a specified amount of
9578
  time.
9579

9580
  """
9581
  _OP_REQP = ["duration", "on_master", "on_nodes"]
9582
  REQ_BGL = False
9583

    
9584
  def CheckArguments(self):
9585
    # TODO: convert to the type system
9586
    self.op.repeat = getattr(self.op, "repeat", 0)
9587
    if self.op.repeat < 0:
9588
      raise errors.OpPrereqError("Repetition count cannot be negative")
9589

    
9590
  def ExpandNames(self):
9591
    """Expand names and set required locks.
9592

9593
    This expands the node list, if any.
9594

9595
    """
9596
    self.needed_locks = {}
9597
    if self.op.on_nodes:
9598
      # _GetWantedNodes can be used here, but is not always appropriate to use
9599
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9600
      # more information.
9601
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9602
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9603

    
9604
  def CheckPrereq(self):
9605
    """Check prerequisites.
9606

9607
    """
9608

    
9609
  def _TestDelay(self):
9610
    """Do the actual sleep.
9611

9612
    """
9613
    if self.op.on_master:
9614
      if not utils.TestDelay(self.op.duration):
9615
        raise errors.OpExecError("Error during master delay test")
9616
    if self.op.on_nodes:
9617
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9618
      for node, node_result in result.items():
9619
        node_result.Raise("Failure during rpc call to node %s" % node)
9620

    
9621
  def Exec(self, feedback_fn):
9622
    """Execute the test delay opcode, with the wanted repetitions.
9623

9624
    """
9625
    if self.op.repeat == 0:
9626
      self._TestDelay()
9627
    else:
9628
      top_value = self.op.repeat - 1
9629
      for i in range(self.op.repeat):
9630
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9631
        self._TestDelay()
9632

    
9633

    
9634
class IAllocator(object):
9635
  """IAllocator framework.
9636

9637
  An IAllocator instance has three sets of attributes:
9638
    - cfg that is needed to query the cluster
9639
    - input data (all members of the _KEYS class attribute are required)
9640
    - four buffer attributes (in|out_data|text), that represent the
9641
      input (to the external script) in text and data structure format,
9642
      and the output from it, again in two formats
9643
    - the result variables from the script (success, info, nodes) for
9644
      easy usage
9645

9646
  """
9647
  # pylint: disable-msg=R0902
9648
  # lots of instance attributes
9649
  _ALLO_KEYS = [
9650
    "name", "mem_size", "disks", "disk_template",
9651
    "os", "tags", "nics", "vcpus", "hypervisor",
9652
    ]
9653
  _RELO_KEYS = [
9654
    "name", "relocate_from",
9655
    ]
9656
  _EVAC_KEYS = [
9657
    "evac_nodes",
9658
    ]
9659

    
9660
  def __init__(self, cfg, rpc, mode, **kwargs):
9661
    self.cfg = cfg
9662
    self.rpc = rpc
9663
    # init buffer variables
9664
    self.in_text = self.out_text = self.in_data = self.out_data = None
9665
    # init all input fields so that pylint is happy
9666
    self.mode = mode
9667
    self.mem_size = self.disks = self.disk_template = None
9668
    self.os = self.tags = self.nics = self.vcpus = None
9669
    self.hypervisor = None
9670
    self.relocate_from = None
9671
    self.name = None
9672
    self.evac_nodes = None
9673
    # computed fields
9674
    self.required_nodes = None
9675
    # init result fields
9676
    self.success = self.info = self.result = None
9677
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9678
      keyset = self._ALLO_KEYS
9679
      fn = self._AddNewInstance
9680
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9681
      keyset = self._RELO_KEYS
9682
      fn = self._AddRelocateInstance
9683
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9684
      keyset = self._EVAC_KEYS
9685
      fn = self._AddEvacuateNodes
9686
    else:
9687
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9688
                                   " IAllocator" % self.mode)
9689
    for key in kwargs:
9690
      if key not in keyset:
9691
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9692
                                     " IAllocator" % key)
9693
      setattr(self, key, kwargs[key])
9694

    
9695
    for key in keyset:
9696
      if key not in kwargs:
9697
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9698
                                     " IAllocator" % key)
9699
    self._BuildInputData(fn)
9700

    
9701
  def _ComputeClusterData(self):
9702
    """Compute the generic allocator input data.
9703

9704
    This is the data that is independent of the actual operation.
9705

9706
    """
9707
    cfg = self.cfg
9708
    cluster_info = cfg.GetClusterInfo()
9709
    # cluster data
9710
    data = {
9711
      "version": constants.IALLOCATOR_VERSION,
9712
      "cluster_name": cfg.GetClusterName(),
9713
      "cluster_tags": list(cluster_info.GetTags()),
9714
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9715
      # we don't have job IDs
9716
      }
9717
    iinfo = cfg.GetAllInstancesInfo().values()
9718
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9719

    
9720
    # node data
9721
    node_results = {}
9722
    node_list = cfg.GetNodeList()
9723

    
9724
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9725
      hypervisor_name = self.hypervisor
9726
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9727
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9728
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9729
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9730

    
9731
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9732
                                        hypervisor_name)
9733
    node_iinfo = \
9734
      self.rpc.call_all_instances_info(node_list,
9735
                                       cluster_info.enabled_hypervisors)
9736
    for nname, nresult in node_data.items():
9737
      # first fill in static (config-based) values
9738
      ninfo = cfg.GetNodeInfo(nname)
9739
      pnr = {
9740
        "tags": list(ninfo.GetTags()),
9741
        "primary_ip": ninfo.primary_ip,
9742
        "secondary_ip": ninfo.secondary_ip,
9743
        "offline": ninfo.offline,
9744
        "drained": ninfo.drained,
9745
        "master_candidate": ninfo.master_candidate,
9746
        }
9747

    
9748
      if not (ninfo.offline or ninfo.drained):
9749
        nresult.Raise("Can't get data for node %s" % nname)
9750
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9751
                                nname)
9752
        remote_info = nresult.payload
9753

    
9754
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9755
                     'vg_size', 'vg_free', 'cpu_total']:
9756
          if attr not in remote_info:
9757
            raise errors.OpExecError("Node '%s' didn't return attribute"
9758
                                     " '%s'" % (nname, attr))
9759
          if not isinstance(remote_info[attr], int):
9760
            raise errors.OpExecError("Node '%s' returned invalid value"
9761
                                     " for '%s': %s" %
9762
                                     (nname, attr, remote_info[attr]))
9763
        # compute memory used by primary instances
9764
        i_p_mem = i_p_up_mem = 0
9765
        for iinfo, beinfo in i_list:
9766
          if iinfo.primary_node == nname:
9767
            i_p_mem += beinfo[constants.BE_MEMORY]
9768
            if iinfo.name not in node_iinfo[nname].payload:
9769
              i_used_mem = 0
9770
            else:
9771
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9772
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9773
            remote_info['memory_free'] -= max(0, i_mem_diff)
9774

    
9775
            if iinfo.admin_up:
9776
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9777

    
9778
        # compute memory used by instances
9779
        pnr_dyn = {
9780
          "total_memory": remote_info['memory_total'],
9781
          "reserved_memory": remote_info['memory_dom0'],
9782
          "free_memory": remote_info['memory_free'],
9783
          "total_disk": remote_info['vg_size'],
9784
          "free_disk": remote_info['vg_free'],
9785
          "total_cpus": remote_info['cpu_total'],
9786
          "i_pri_memory": i_p_mem,
9787
          "i_pri_up_memory": i_p_up_mem,
9788
          }
9789
        pnr.update(pnr_dyn)
9790

    
9791
      node_results[nname] = pnr
9792
    data["nodes"] = node_results
9793

    
9794
    # instance data
9795
    instance_data = {}
9796
    for iinfo, beinfo in i_list:
9797
      nic_data = []
9798
      for nic in iinfo.nics:
9799
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9800
        nic_dict = {"mac": nic.mac,
9801
                    "ip": nic.ip,
9802
                    "mode": filled_params[constants.NIC_MODE],
9803
                    "link": filled_params[constants.NIC_LINK],
9804
                   }
9805
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9806
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9807
        nic_data.append(nic_dict)
9808
      pir = {
9809
        "tags": list(iinfo.GetTags()),
9810
        "admin_up": iinfo.admin_up,
9811
        "vcpus": beinfo[constants.BE_VCPUS],
9812
        "memory": beinfo[constants.BE_MEMORY],
9813
        "os": iinfo.os,
9814
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9815
        "nics": nic_data,
9816
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9817
        "disk_template": iinfo.disk_template,
9818
        "hypervisor": iinfo.hypervisor,
9819
        }
9820
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9821
                                                 pir["disks"])
9822
      instance_data[iinfo.name] = pir
9823

    
9824
    data["instances"] = instance_data
9825

    
9826
    self.in_data = data
9827

    
9828
  def _AddNewInstance(self):
9829
    """Add new instance data to allocator structure.
9830

9831
    This in combination with _AllocatorGetClusterData will create the
9832
    correct structure needed as input for the allocator.
9833

9834
    The checks for the completeness of the opcode must have already been
9835
    done.
9836

9837
    """
9838
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9839

    
9840
    if self.disk_template in constants.DTS_NET_MIRROR:
9841
      self.required_nodes = 2
9842
    else:
9843
      self.required_nodes = 1
9844
    request = {
9845
      "name": self.name,
9846
      "disk_template": self.disk_template,
9847
      "tags": self.tags,
9848
      "os": self.os,
9849
      "vcpus": self.vcpus,
9850
      "memory": self.mem_size,
9851
      "disks": self.disks,
9852
      "disk_space_total": disk_space,
9853
      "nics": self.nics,
9854
      "required_nodes": self.required_nodes,
9855
      }
9856
    return request
9857

    
9858
  def _AddRelocateInstance(self):
9859
    """Add relocate instance data to allocator structure.
9860

9861
    This in combination with _IAllocatorGetClusterData will create the
9862
    correct structure needed as input for the allocator.
9863

9864
    The checks for the completeness of the opcode must have already been
9865
    done.
9866

9867
    """
9868
    instance = self.cfg.GetInstanceInfo(self.name)
9869
    if instance is None:
9870
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
9871
                                   " IAllocator" % self.name)
9872

    
9873
    if instance.disk_template not in constants.DTS_NET_MIRROR:
9874
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9875
                                 errors.ECODE_INVAL)
9876

    
9877
    if len(instance.secondary_nodes) != 1:
9878
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
9879
                                 errors.ECODE_STATE)
9880

    
9881
    self.required_nodes = 1
9882
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
9883
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9884

    
9885
    request = {
9886
      "name": self.name,
9887
      "disk_space_total": disk_space,
9888
      "required_nodes": self.required_nodes,
9889
      "relocate_from": self.relocate_from,
9890
      }
9891
    return request
9892

    
9893
  def _AddEvacuateNodes(self):
9894
    """Add evacuate nodes data to allocator structure.
9895

9896
    """
9897
    request = {
9898
      "evac_nodes": self.evac_nodes
9899
      }
9900
    return request
9901

    
9902
  def _BuildInputData(self, fn):
9903
    """Build input data structures.
9904

9905
    """
9906
    self._ComputeClusterData()
9907

    
9908
    request = fn()
9909
    request["type"] = self.mode
9910
    self.in_data["request"] = request
9911

    
9912
    self.in_text = serializer.Dump(self.in_data)
9913

    
9914
  def Run(self, name, validate=True, call_fn=None):
9915
    """Run an instance allocator and return the results.
9916

9917
    """
9918
    if call_fn is None:
9919
      call_fn = self.rpc.call_iallocator_runner
9920

    
9921
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9922
    result.Raise("Failure while running the iallocator script")
9923

    
9924
    self.out_text = result.payload
9925
    if validate:
9926
      self._ValidateResult()
9927

    
9928
  def _ValidateResult(self):
9929
    """Process the allocator results.
9930

9931
    This will process and if successful save the result in
9932
    self.out_data and the other parameters.
9933

9934
    """
9935
    try:
9936
      rdict = serializer.Load(self.out_text)
9937
    except Exception, err:
9938
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9939

    
9940
    if not isinstance(rdict, dict):
9941
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
9942

    
9943
    # TODO: remove backwards compatiblity in later versions
9944
    if "nodes" in rdict and "result" not in rdict:
9945
      rdict["result"] = rdict["nodes"]
9946
      del rdict["nodes"]
9947

    
9948
    for key in "success", "info", "result":
9949
      if key not in rdict:
9950
        raise errors.OpExecError("Can't parse iallocator results:"
9951
                                 " missing key '%s'" % key)
9952
      setattr(self, key, rdict[key])
9953

    
9954
    if not isinstance(rdict["result"], list):
9955
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9956
                               " is not a list")
9957
    self.out_data = rdict
9958

    
9959

    
9960
class LUTestAllocator(NoHooksLU):
9961
  """Run allocator tests.
9962

9963
  This LU runs the allocator tests
9964

9965
  """
9966
  _OP_REQP = ["direction", "mode", "name"]
9967

    
9968
  def CheckPrereq(self):
9969
    """Check prerequisites.
9970

9971
    This checks the opcode parameters depending on the director and mode test.
9972

9973
    """
9974
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9975
      for attr in ["name", "mem_size", "disks", "disk_template",
9976
                   "os", "tags", "nics", "vcpus"]:
9977
        if not hasattr(self.op, attr):
9978
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9979
                                     attr, errors.ECODE_INVAL)
9980
      iname = self.cfg.ExpandInstanceName(self.op.name)
9981
      if iname is not None:
9982
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9983
                                   iname, errors.ECODE_EXISTS)
9984
      if not isinstance(self.op.nics, list):
9985
        raise errors.OpPrereqError("Invalid parameter 'nics'",
9986
                                   errors.ECODE_INVAL)
9987
      for row in self.op.nics:
9988
        if (not isinstance(row, dict) or
9989
            "mac" not in row or
9990
            "ip" not in row or
9991
            "bridge" not in row):
9992
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
9993
                                     " parameter", errors.ECODE_INVAL)
9994
      if not isinstance(self.op.disks, list):
9995
        raise errors.OpPrereqError("Invalid parameter 'disks'",
9996
                                   errors.ECODE_INVAL)
9997
      for row in self.op.disks:
9998
        if (not isinstance(row, dict) or
9999
            "size" not in row or
10000
            not isinstance(row["size"], int) or
10001
            "mode" not in row or
10002
            row["mode"] not in ['r', 'w']):
10003
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
10004
                                     " parameter", errors.ECODE_INVAL)
10005
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
10006
        self.op.hypervisor = self.cfg.GetHypervisorType()
10007
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10008
      if not hasattr(self.op, "name"):
10009
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
10010
                                   errors.ECODE_INVAL)
10011
      fname = _ExpandInstanceName(self.cfg, self.op.name)
10012
      self.op.name = fname
10013
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10014
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10015
      if not hasattr(self.op, "evac_nodes"):
10016
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10017
                                   " opcode input", errors.ECODE_INVAL)
10018
    else:
10019
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10020
                                 self.op.mode, errors.ECODE_INVAL)
10021

    
10022
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10023
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
10024
        raise errors.OpPrereqError("Missing allocator name",
10025
                                   errors.ECODE_INVAL)
10026
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10027
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
10028
                                 self.op.direction, errors.ECODE_INVAL)
10029

    
10030
  def Exec(self, feedback_fn):
10031
    """Run the allocator test.
10032

10033
    """
10034
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10035
      ial = IAllocator(self.cfg, self.rpc,
10036
                       mode=self.op.mode,
10037
                       name=self.op.name,
10038
                       mem_size=self.op.mem_size,
10039
                       disks=self.op.disks,
10040
                       disk_template=self.op.disk_template,
10041
                       os=self.op.os,
10042
                       tags=self.op.tags,
10043
                       nics=self.op.nics,
10044
                       vcpus=self.op.vcpus,
10045
                       hypervisor=self.op.hypervisor,
10046
                       )
10047
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10048
      ial = IAllocator(self.cfg, self.rpc,
10049
                       mode=self.op.mode,
10050
                       name=self.op.name,
10051
                       relocate_from=list(self.relocate_from),
10052
                       )
10053
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10054
      ial = IAllocator(self.cfg, self.rpc,
10055
                       mode=self.op.mode,
10056
                       evac_nodes=self.op.evac_nodes)
10057
    else:
10058
      raise errors.ProgrammerError("Uncatched mode %s in"
10059
                                   " LUTestAllocator.Exec", self.op.mode)
10060

    
10061
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10062
      result = ial.in_text
10063
    else:
10064
      ial.Run(self.op.allocator, validate=False)
10065
      result = ial.out_text
10066
    return result