Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 3e512563

History | View | Annotate | Download (352.5 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39

    
40
from ganeti import ssh
41
from ganeti import utils
42
from ganeti import errors
43
from ganeti import hypervisor
44
from ganeti import locking
45
from ganeti import constants
46
from ganeti import objects
47
from ganeti import serializer
48
from ganeti import ssconf
49
from ganeti import uidpool
50
from ganeti import compat
51
from ganeti import masterd
52

    
53
import ganeti.masterd.instance # pylint: disable-msg=W0611
54

    
55

    
56
# need to define these here before the actual LUs
57

    
58
def _EmptyList():
59
  """Returns an empty list.
60

61
  """
62
  return []
63

    
64

    
65
def _EmptyDict():
66
  """Returns an empty dict.
67

68
  """
69
  return {}
70

    
71

    
72
class LogicalUnit(object):
73
  """Logical Unit base class.
74

75
  Subclasses must follow these rules:
76
    - implement ExpandNames
77
    - implement CheckPrereq (except when tasklets are used)
78
    - implement Exec (except when tasklets are used)
79
    - implement BuildHooksEnv
80
    - redefine HPATH and HTYPE
81
    - optionally redefine their run requirements:
82
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
83

84
  Note that all commands require root permissions.
85

86
  @ivar dry_run_result: the value (if any) that will be returned to the caller
87
      in dry-run mode (signalled by opcode dry_run parameter)
88
  @cvar _OP_DEFS: a list of opcode attributes and the defaults values
89
      they should get if not already existing
90

91
  """
92
  HPATH = None
93
  HTYPE = None
94
  _OP_REQP = []
95
  _OP_DEFS = []
96
  REQ_BGL = True
97

    
98
  def __init__(self, processor, op, context, rpc):
99
    """Constructor for LogicalUnit.
100

101
    This needs to be overridden in derived classes in order to check op
102
    validity.
103

104
    """
105
    self.proc = processor
106
    self.op = op
107
    self.cfg = context.cfg
108
    self.context = context
109
    self.rpc = rpc
110
    # Dicts used to declare locking needs to mcpu
111
    self.needed_locks = None
112
    self.acquired_locks = {}
113
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
114
    self.add_locks = {}
115
    self.remove_locks = {}
116
    # Used to force good behavior when calling helper functions
117
    self.recalculate_locks = {}
118
    self.__ssh = None
119
    # logging
120
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
121
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
122
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
123
    # support for dry-run
124
    self.dry_run_result = None
125
    # support for generic debug attribute
126
    if (not hasattr(self.op, "debug_level") or
127
        not isinstance(self.op.debug_level, int)):
128
      self.op.debug_level = 0
129

    
130
    # Tasklets
131
    self.tasklets = None
132

    
133
    for aname, aval in self._OP_DEFS:
134
      if not hasattr(self.op, aname):
135
        if callable(aval):
136
          dval = aval()
137
        else:
138
          dval = aval
139
        setattr(self.op, aname, dval)
140

    
141
    for attr_name in self._OP_REQP:
142
      attr_val = getattr(op, attr_name, None)
143
      if attr_val is None:
144
        raise errors.OpPrereqError("Required parameter '%s' missing" %
145
                                   attr_name, errors.ECODE_INVAL)
146

    
147
    self.CheckArguments()
148

    
149
  def __GetSSH(self):
150
    """Returns the SshRunner object
151

152
    """
153
    if not self.__ssh:
154
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
155
    return self.__ssh
156

    
157
  ssh = property(fget=__GetSSH)
158

    
159
  def CheckArguments(self):
160
    """Check syntactic validity for the opcode arguments.
161

162
    This method is for doing a simple syntactic check and ensure
163
    validity of opcode parameters, without any cluster-related
164
    checks. While the same can be accomplished in ExpandNames and/or
165
    CheckPrereq, doing these separate is better because:
166

167
      - ExpandNames is left as as purely a lock-related function
168
      - CheckPrereq is run after we have acquired locks (and possible
169
        waited for them)
170

171
    The function is allowed to change the self.op attribute so that
172
    later methods can no longer worry about missing parameters.
173

174
    """
175
    pass
176

    
177
  def ExpandNames(self):
178
    """Expand names for this LU.
179

180
    This method is called before starting to execute the opcode, and it should
181
    update all the parameters of the opcode to their canonical form (e.g. a
182
    short node name must be fully expanded after this method has successfully
183
    completed). This way locking, hooks, logging, ecc. can work correctly.
184

185
    LUs which implement this method must also populate the self.needed_locks
186
    member, as a dict with lock levels as keys, and a list of needed lock names
187
    as values. Rules:
188

189
      - use an empty dict if you don't need any lock
190
      - if you don't need any lock at a particular level omit that level
191
      - don't put anything for the BGL level
192
      - if you want all locks at a level use locking.ALL_SET as a value
193

194
    If you need to share locks (rather than acquire them exclusively) at one
195
    level you can modify self.share_locks, setting a true value (usually 1) for
196
    that level. By default locks are not shared.
197

198
    This function can also define a list of tasklets, which then will be
199
    executed in order instead of the usual LU-level CheckPrereq and Exec
200
    functions, if those are not defined by the LU.
201

202
    Examples::
203

204
      # Acquire all nodes and one instance
205
      self.needed_locks = {
206
        locking.LEVEL_NODE: locking.ALL_SET,
207
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
208
      }
209
      # Acquire just two nodes
210
      self.needed_locks = {
211
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
212
      }
213
      # Acquire no locks
214
      self.needed_locks = {} # No, you can't leave it to the default value None
215

216
    """
217
    # The implementation of this method is mandatory only if the new LU is
218
    # concurrent, so that old LUs don't need to be changed all at the same
219
    # time.
220
    if self.REQ_BGL:
221
      self.needed_locks = {} # Exclusive LUs don't need locks.
222
    else:
223
      raise NotImplementedError
224

    
225
  def DeclareLocks(self, level):
226
    """Declare LU locking needs for a level
227

228
    While most LUs can just declare their locking needs at ExpandNames time,
229
    sometimes there's the need to calculate some locks after having acquired
230
    the ones before. This function is called just before acquiring locks at a
231
    particular level, but after acquiring the ones at lower levels, and permits
232
    such calculations. It can be used to modify self.needed_locks, and by
233
    default it does nothing.
234

235
    This function is only called if you have something already set in
236
    self.needed_locks for the level.
237

238
    @param level: Locking level which is going to be locked
239
    @type level: member of ganeti.locking.LEVELS
240

241
    """
242

    
243
  def CheckPrereq(self):
244
    """Check prerequisites for this LU.
245

246
    This method should check that the prerequisites for the execution
247
    of this LU are fulfilled. It can do internode communication, but
248
    it should be idempotent - no cluster or system changes are
249
    allowed.
250

251
    The method should raise errors.OpPrereqError in case something is
252
    not fulfilled. Its return value is ignored.
253

254
    This method should also update all the parameters of the opcode to
255
    their canonical form if it hasn't been done by ExpandNames before.
256

257
    """
258
    if self.tasklets is not None:
259
      for (idx, tl) in enumerate(self.tasklets):
260
        logging.debug("Checking prerequisites for tasklet %s/%s",
261
                      idx + 1, len(self.tasklets))
262
        tl.CheckPrereq()
263
    else:
264
      pass
265

    
266
  def Exec(self, feedback_fn):
267
    """Execute the LU.
268

269
    This method should implement the actual work. It should raise
270
    errors.OpExecError for failures that are somewhat dealt with in
271
    code, or expected.
272

273
    """
274
    if self.tasklets is not None:
275
      for (idx, tl) in enumerate(self.tasklets):
276
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
277
        tl.Exec(feedback_fn)
278
    else:
279
      raise NotImplementedError
280

    
281
  def BuildHooksEnv(self):
282
    """Build hooks environment for this LU.
283

284
    This method should return a three-node tuple consisting of: a dict
285
    containing the environment that will be used for running the
286
    specific hook for this LU, a list of node names on which the hook
287
    should run before the execution, and a list of node names on which
288
    the hook should run after the execution.
289

290
    The keys of the dict must not have 'GANETI_' prefixed as this will
291
    be handled in the hooks runner. Also note additional keys will be
292
    added by the hooks runner. If the LU doesn't define any
293
    environment, an empty dict (and not None) should be returned.
294

295
    No nodes should be returned as an empty list (and not None).
296

297
    Note that if the HPATH for a LU class is None, this function will
298
    not be called.
299

300
    """
301
    raise NotImplementedError
302

    
303
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
304
    """Notify the LU about the results of its hooks.
305

306
    This method is called every time a hooks phase is executed, and notifies
307
    the Logical Unit about the hooks' result. The LU can then use it to alter
308
    its result based on the hooks.  By default the method does nothing and the
309
    previous result is passed back unchanged but any LU can define it if it
310
    wants to use the local cluster hook-scripts somehow.
311

312
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
313
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
314
    @param hook_results: the results of the multi-node hooks rpc call
315
    @param feedback_fn: function used send feedback back to the caller
316
    @param lu_result: the previous Exec result this LU had, or None
317
        in the PRE phase
318
    @return: the new Exec result, based on the previous result
319
        and hook results
320

321
    """
322
    # API must be kept, thus we ignore the unused argument and could
323
    # be a function warnings
324
    # pylint: disable-msg=W0613,R0201
325
    return lu_result
326

    
327
  def _ExpandAndLockInstance(self):
328
    """Helper function to expand and lock an instance.
329

330
    Many LUs that work on an instance take its name in self.op.instance_name
331
    and need to expand it and then declare the expanded name for locking. This
332
    function does it, and then updates self.op.instance_name to the expanded
333
    name. It also initializes needed_locks as a dict, if this hasn't been done
334
    before.
335

336
    """
337
    if self.needed_locks is None:
338
      self.needed_locks = {}
339
    else:
340
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
341
        "_ExpandAndLockInstance called with instance-level locks set"
342
    self.op.instance_name = _ExpandInstanceName(self.cfg,
343
                                                self.op.instance_name)
344
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
345

    
346
  def _LockInstancesNodes(self, primary_only=False):
347
    """Helper function to declare instances' nodes for locking.
348

349
    This function should be called after locking one or more instances to lock
350
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
351
    with all primary or secondary nodes for instances already locked and
352
    present in self.needed_locks[locking.LEVEL_INSTANCE].
353

354
    It should be called from DeclareLocks, and for safety only works if
355
    self.recalculate_locks[locking.LEVEL_NODE] is set.
356

357
    In the future it may grow parameters to just lock some instance's nodes, or
358
    to just lock primaries or secondary nodes, if needed.
359

360
    If should be called in DeclareLocks in a way similar to::
361

362
      if level == locking.LEVEL_NODE:
363
        self._LockInstancesNodes()
364

365
    @type primary_only: boolean
366
    @param primary_only: only lock primary nodes of locked instances
367

368
    """
369
    assert locking.LEVEL_NODE in self.recalculate_locks, \
370
      "_LockInstancesNodes helper function called with no nodes to recalculate"
371

    
372
    # TODO: check if we're really been called with the instance locks held
373

    
374
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
375
    # future we might want to have different behaviors depending on the value
376
    # of self.recalculate_locks[locking.LEVEL_NODE]
377
    wanted_nodes = []
378
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
379
      instance = self.context.cfg.GetInstanceInfo(instance_name)
380
      wanted_nodes.append(instance.primary_node)
381
      if not primary_only:
382
        wanted_nodes.extend(instance.secondary_nodes)
383

    
384
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
385
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
386
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
387
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
388

    
389
    del self.recalculate_locks[locking.LEVEL_NODE]
390

    
391

    
392
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
393
  """Simple LU which runs no hooks.
394

395
  This LU is intended as a parent for other LogicalUnits which will
396
  run no hooks, in order to reduce duplicate code.
397

398
  """
399
  HPATH = None
400
  HTYPE = None
401

    
402
  def BuildHooksEnv(self):
403
    """Empty BuildHooksEnv for NoHooksLu.
404

405
    This just raises an error.
406

407
    """
408
    assert False, "BuildHooksEnv called for NoHooksLUs"
409

    
410

    
411
class Tasklet:
412
  """Tasklet base class.
413

414
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
415
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
416
  tasklets know nothing about locks.
417

418
  Subclasses must follow these rules:
419
    - Implement CheckPrereq
420
    - Implement Exec
421

422
  """
423
  def __init__(self, lu):
424
    self.lu = lu
425

    
426
    # Shortcuts
427
    self.cfg = lu.cfg
428
    self.rpc = lu.rpc
429

    
430
  def CheckPrereq(self):
431
    """Check prerequisites for this tasklets.
432

433
    This method should check whether the prerequisites for the execution of
434
    this tasklet are fulfilled. It can do internode communication, but it
435
    should be idempotent - no cluster or system changes are allowed.
436

437
    The method should raise errors.OpPrereqError in case something is not
438
    fulfilled. Its return value is ignored.
439

440
    This method should also update all parameters to their canonical form if it
441
    hasn't been done before.
442

443
    """
444
    pass
445

    
446
  def Exec(self, feedback_fn):
447
    """Execute the tasklet.
448

449
    This method should implement the actual work. It should raise
450
    errors.OpExecError for failures that are somewhat dealt with in code, or
451
    expected.
452

453
    """
454
    raise NotImplementedError
455

    
456

    
457
def _GetWantedNodes(lu, nodes):
458
  """Returns list of checked and expanded node names.
459

460
  @type lu: L{LogicalUnit}
461
  @param lu: the logical unit on whose behalf we execute
462
  @type nodes: list
463
  @param nodes: list of node names or None for all nodes
464
  @rtype: list
465
  @return: the list of nodes, sorted
466
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
467

468
  """
469
  if not isinstance(nodes, list):
470
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
471
                               errors.ECODE_INVAL)
472

    
473
  if not nodes:
474
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
475
      " non-empty list of nodes whose name is to be expanded.")
476

    
477
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
478
  return utils.NiceSort(wanted)
479

    
480

    
481
def _GetWantedInstances(lu, instances):
482
  """Returns list of checked and expanded instance names.
483

484
  @type lu: L{LogicalUnit}
485
  @param lu: the logical unit on whose behalf we execute
486
  @type instances: list
487
  @param instances: list of instance names or None for all instances
488
  @rtype: list
489
  @return: the list of instances, sorted
490
  @raise errors.OpPrereqError: if the instances parameter is wrong type
491
  @raise errors.OpPrereqError: if any of the passed instances is not found
492

493
  """
494
  if not isinstance(instances, list):
495
    raise errors.OpPrereqError("Invalid argument type 'instances'",
496
                               errors.ECODE_INVAL)
497

    
498
  if instances:
499
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
500
  else:
501
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
502
  return wanted
503

    
504

    
505
def _GetUpdatedParams(old_params, update_dict,
506
                      use_default=True, use_none=False):
507
  """Return the new version of a parameter dictionary.
508

509
  @type old_params: dict
510
  @param old_params: old parameters
511
  @type update_dict: dict
512
  @param update_dict: dict containing new parameter values, or
513
      constants.VALUE_DEFAULT to reset the parameter to its default
514
      value
515
  @param use_default: boolean
516
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
517
      values as 'to be deleted' values
518
  @param use_none: boolean
519
  @type use_none: whether to recognise C{None} values as 'to be
520
      deleted' values
521
  @rtype: dict
522
  @return: the new parameter dictionary
523

524
  """
525
  params_copy = copy.deepcopy(old_params)
526
  for key, val in update_dict.iteritems():
527
    if ((use_default and val == constants.VALUE_DEFAULT) or
528
        (use_none and val is None)):
529
      try:
530
        del params_copy[key]
531
      except KeyError:
532
        pass
533
    else:
534
      params_copy[key] = val
535
  return params_copy
536

    
537

    
538
def _CheckOutputFields(static, dynamic, selected):
539
  """Checks whether all selected fields are valid.
540

541
  @type static: L{utils.FieldSet}
542
  @param static: static fields set
543
  @type dynamic: L{utils.FieldSet}
544
  @param dynamic: dynamic fields set
545

546
  """
547
  f = utils.FieldSet()
548
  f.Extend(static)
549
  f.Extend(dynamic)
550

    
551
  delta = f.NonMatching(selected)
552
  if delta:
553
    raise errors.OpPrereqError("Unknown output fields selected: %s"
554
                               % ",".join(delta), errors.ECODE_INVAL)
555

    
556

    
557
def _CheckBooleanOpField(op, name):
558
  """Validates boolean opcode parameters.
559

560
  This will ensure that an opcode parameter is either a boolean value,
561
  or None (but that it always exists).
562

563
  """
564
  val = getattr(op, name, None)
565
  if not (val is None or isinstance(val, bool)):
566
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
567
                               (name, str(val)), errors.ECODE_INVAL)
568
  setattr(op, name, val)
569

    
570

    
571
def _CheckGlobalHvParams(params):
572
  """Validates that given hypervisor params are not global ones.
573

574
  This will ensure that instances don't get customised versions of
575
  global params.
576

577
  """
578
  used_globals = constants.HVC_GLOBALS.intersection(params)
579
  if used_globals:
580
    msg = ("The following hypervisor parameters are global and cannot"
581
           " be customized at instance level, please modify them at"
582
           " cluster level: %s" % utils.CommaJoin(used_globals))
583
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
584

    
585

    
586
def _CheckNodeOnline(lu, node):
587
  """Ensure that a given node is online.
588

589
  @param lu: the LU on behalf of which we make the check
590
  @param node: the node to check
591
  @raise errors.OpPrereqError: if the node is offline
592

593
  """
594
  if lu.cfg.GetNodeInfo(node).offline:
595
    raise errors.OpPrereqError("Can't use offline node %s" % node,
596
                               errors.ECODE_INVAL)
597

    
598

    
599
def _CheckNodeNotDrained(lu, node):
600
  """Ensure that a given node is not drained.
601

602
  @param lu: the LU on behalf of which we make the check
603
  @param node: the node to check
604
  @raise errors.OpPrereqError: if the node is drained
605

606
  """
607
  if lu.cfg.GetNodeInfo(node).drained:
608
    raise errors.OpPrereqError("Can't use drained node %s" % node,
609
                               errors.ECODE_INVAL)
610

    
611

    
612
def _CheckNodeHasOS(lu, node, os_name, force_variant):
613
  """Ensure that a node supports a given OS.
614

615
  @param lu: the LU on behalf of which we make the check
616
  @param node: the node to check
617
  @param os_name: the OS to query about
618
  @param force_variant: whether to ignore variant errors
619
  @raise errors.OpPrereqError: if the node is not supporting the OS
620

621
  """
622
  result = lu.rpc.call_os_get(node, os_name)
623
  result.Raise("OS '%s' not in supported OS list for node %s" %
624
               (os_name, node),
625
               prereq=True, ecode=errors.ECODE_INVAL)
626
  if not force_variant:
627
    _CheckOSVariant(result.payload, os_name)
628

    
629

    
630
def _RequireFileStorage():
631
  """Checks that file storage is enabled.
632

633
  @raise errors.OpPrereqError: when file storage is disabled
634

635
  """
636
  if not constants.ENABLE_FILE_STORAGE:
637
    raise errors.OpPrereqError("File storage disabled at configure time",
638
                               errors.ECODE_INVAL)
639

    
640

    
641
def _CheckDiskTemplate(template):
642
  """Ensure a given disk template is valid.
643

644
  """
645
  if template not in constants.DISK_TEMPLATES:
646
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
647
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
648
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
649
  if template == constants.DT_FILE:
650
    _RequireFileStorage()
651

    
652

    
653
def _CheckStorageType(storage_type):
654
  """Ensure a given storage type is valid.
655

656
  """
657
  if storage_type not in constants.VALID_STORAGE_TYPES:
658
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
659
                               errors.ECODE_INVAL)
660
  if storage_type == constants.ST_FILE:
661
    _RequireFileStorage()
662

    
663

    
664
def _GetClusterDomainSecret():
665
  """Reads the cluster domain secret.
666

667
  """
668
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
669
                               strict=True)
670

    
671

    
672
def _CheckInstanceDown(lu, instance, reason):
673
  """Ensure that an instance is not running."""
674
  if instance.admin_up:
675
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
676
                               (instance.name, reason), errors.ECODE_STATE)
677

    
678
  pnode = instance.primary_node
679
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
680
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
681
              prereq=True, ecode=errors.ECODE_ENVIRON)
682

    
683
  if instance.name in ins_l.payload:
684
    raise errors.OpPrereqError("Instance %s is running, %s" %
685
                               (instance.name, reason), errors.ECODE_STATE)
686

    
687

    
688
def _CheckExportMode(mode):
689
  """Ensures that a given export mode is valid.
690

691
  @param mode: the export mode to check
692
  @raise errors.OpPrereqError: when the export mode is not valid
693

694
  """
695
  if mode not in constants.EXPORT_MODES:
696
    raise errors.OpPrereqError("Invalid export mode %r" % mode,
697
                               errors.ECODE_INVAL)
698

    
699

    
700
def _ExpandItemName(fn, name, kind):
701
  """Expand an item name.
702

703
  @param fn: the function to use for expansion
704
  @param name: requested item name
705
  @param kind: text description ('Node' or 'Instance')
706
  @return: the resolved (full) name
707
  @raise errors.OpPrereqError: if the item is not found
708

709
  """
710
  full_name = fn(name)
711
  if full_name is None:
712
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
713
                               errors.ECODE_NOENT)
714
  return full_name
715

    
716

    
717
def _ExpandNodeName(cfg, name):
718
  """Wrapper over L{_ExpandItemName} for nodes."""
719
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
720

    
721

    
722
def _ExpandInstanceName(cfg, name):
723
  """Wrapper over L{_ExpandItemName} for instance."""
724
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
725

    
726

    
727
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
728
                          memory, vcpus, nics, disk_template, disks,
729
                          bep, hvp, hypervisor_name):
730
  """Builds instance related env variables for hooks
731

732
  This builds the hook environment from individual variables.
733

734
  @type name: string
735
  @param name: the name of the instance
736
  @type primary_node: string
737
  @param primary_node: the name of the instance's primary node
738
  @type secondary_nodes: list
739
  @param secondary_nodes: list of secondary nodes as strings
740
  @type os_type: string
741
  @param os_type: the name of the instance's OS
742
  @type status: boolean
743
  @param status: the should_run status of the instance
744
  @type memory: string
745
  @param memory: the memory size of the instance
746
  @type vcpus: string
747
  @param vcpus: the count of VCPUs the instance has
748
  @type nics: list
749
  @param nics: list of tuples (ip, mac, mode, link) representing
750
      the NICs the instance has
751
  @type disk_template: string
752
  @param disk_template: the disk template of the instance
753
  @type disks: list
754
  @param disks: the list of (size, mode) pairs
755
  @type bep: dict
756
  @param bep: the backend parameters for the instance
757
  @type hvp: dict
758
  @param hvp: the hypervisor parameters for the instance
759
  @type hypervisor_name: string
760
  @param hypervisor_name: the hypervisor for the instance
761
  @rtype: dict
762
  @return: the hook environment for this instance
763

764
  """
765
  if status:
766
    str_status = "up"
767
  else:
768
    str_status = "down"
769
  env = {
770
    "OP_TARGET": name,
771
    "INSTANCE_NAME": name,
772
    "INSTANCE_PRIMARY": primary_node,
773
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
774
    "INSTANCE_OS_TYPE": os_type,
775
    "INSTANCE_STATUS": str_status,
776
    "INSTANCE_MEMORY": memory,
777
    "INSTANCE_VCPUS": vcpus,
778
    "INSTANCE_DISK_TEMPLATE": disk_template,
779
    "INSTANCE_HYPERVISOR": hypervisor_name,
780
  }
781

    
782
  if nics:
783
    nic_count = len(nics)
784
    for idx, (ip, mac, mode, link) in enumerate(nics):
785
      if ip is None:
786
        ip = ""
787
      env["INSTANCE_NIC%d_IP" % idx] = ip
788
      env["INSTANCE_NIC%d_MAC" % idx] = mac
789
      env["INSTANCE_NIC%d_MODE" % idx] = mode
790
      env["INSTANCE_NIC%d_LINK" % idx] = link
791
      if mode == constants.NIC_MODE_BRIDGED:
792
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
793
  else:
794
    nic_count = 0
795

    
796
  env["INSTANCE_NIC_COUNT"] = nic_count
797

    
798
  if disks:
799
    disk_count = len(disks)
800
    for idx, (size, mode) in enumerate(disks):
801
      env["INSTANCE_DISK%d_SIZE" % idx] = size
802
      env["INSTANCE_DISK%d_MODE" % idx] = mode
803
  else:
804
    disk_count = 0
805

    
806
  env["INSTANCE_DISK_COUNT"] = disk_count
807

    
808
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
809
    for key, value in source.items():
810
      env["INSTANCE_%s_%s" % (kind, key)] = value
811

    
812
  return env
813

    
814

    
815
def _NICListToTuple(lu, nics):
816
  """Build a list of nic information tuples.
817

818
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
819
  value in LUQueryInstanceData.
820

821
  @type lu:  L{LogicalUnit}
822
  @param lu: the logical unit on whose behalf we execute
823
  @type nics: list of L{objects.NIC}
824
  @param nics: list of nics to convert to hooks tuples
825

826
  """
827
  hooks_nics = []
828
  cluster = lu.cfg.GetClusterInfo()
829
  for nic in nics:
830
    ip = nic.ip
831
    mac = nic.mac
832
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
833
    mode = filled_params[constants.NIC_MODE]
834
    link = filled_params[constants.NIC_LINK]
835
    hooks_nics.append((ip, mac, mode, link))
836
  return hooks_nics
837

    
838

    
839
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
840
  """Builds instance related env variables for hooks from an object.
841

842
  @type lu: L{LogicalUnit}
843
  @param lu: the logical unit on whose behalf we execute
844
  @type instance: L{objects.Instance}
845
  @param instance: the instance for which we should build the
846
      environment
847
  @type override: dict
848
  @param override: dictionary with key/values that will override
849
      our values
850
  @rtype: dict
851
  @return: the hook environment dictionary
852

853
  """
854
  cluster = lu.cfg.GetClusterInfo()
855
  bep = cluster.FillBE(instance)
856
  hvp = cluster.FillHV(instance)
857
  args = {
858
    'name': instance.name,
859
    'primary_node': instance.primary_node,
860
    'secondary_nodes': instance.secondary_nodes,
861
    'os_type': instance.os,
862
    'status': instance.admin_up,
863
    'memory': bep[constants.BE_MEMORY],
864
    'vcpus': bep[constants.BE_VCPUS],
865
    'nics': _NICListToTuple(lu, instance.nics),
866
    'disk_template': instance.disk_template,
867
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
868
    'bep': bep,
869
    'hvp': hvp,
870
    'hypervisor_name': instance.hypervisor,
871
  }
872
  if override:
873
    args.update(override)
874
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
875

    
876

    
877
def _AdjustCandidatePool(lu, exceptions):
878
  """Adjust the candidate pool after node operations.
879

880
  """
881
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
882
  if mod_list:
883
    lu.LogInfo("Promoted nodes to master candidate role: %s",
884
               utils.CommaJoin(node.name for node in mod_list))
885
    for name in mod_list:
886
      lu.context.ReaddNode(name)
887
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
888
  if mc_now > mc_max:
889
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
890
               (mc_now, mc_max))
891

    
892

    
893
def _DecideSelfPromotion(lu, exceptions=None):
894
  """Decide whether I should promote myself as a master candidate.
895

896
  """
897
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
898
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
899
  # the new node will increase mc_max with one, so:
900
  mc_should = min(mc_should + 1, cp_size)
901
  return mc_now < mc_should
902

    
903

    
904
def _CheckNicsBridgesExist(lu, target_nics, target_node):
905
  """Check that the brigdes needed by a list of nics exist.
906

907
  """
908
  cluster = lu.cfg.GetClusterInfo()
909
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
910
  brlist = [params[constants.NIC_LINK] for params in paramslist
911
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
912
  if brlist:
913
    result = lu.rpc.call_bridges_exist(target_node, brlist)
914
    result.Raise("Error checking bridges on destination node '%s'" %
915
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
916

    
917

    
918
def _CheckInstanceBridgesExist(lu, instance, node=None):
919
  """Check that the brigdes needed by an instance exist.
920

921
  """
922
  if node is None:
923
    node = instance.primary_node
924
  _CheckNicsBridgesExist(lu, instance.nics, node)
925

    
926

    
927
def _CheckOSVariant(os_obj, name):
928
  """Check whether an OS name conforms to the os variants specification.
929

930
  @type os_obj: L{objects.OS}
931
  @param os_obj: OS object to check
932
  @type name: string
933
  @param name: OS name passed by the user, to check for validity
934

935
  """
936
  if not os_obj.supported_variants:
937
    return
938
  try:
939
    variant = name.split("+", 1)[1]
940
  except IndexError:
941
    raise errors.OpPrereqError("OS name must include a variant",
942
                               errors.ECODE_INVAL)
943

    
944
  if variant not in os_obj.supported_variants:
945
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
946

    
947

    
948
def _GetNodeInstancesInner(cfg, fn):
949
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
950

    
951

    
952
def _GetNodeInstances(cfg, node_name):
953
  """Returns a list of all primary and secondary instances on a node.
954

955
  """
956

    
957
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
958

    
959

    
960
def _GetNodePrimaryInstances(cfg, node_name):
961
  """Returns primary instances on a node.
962

963
  """
964
  return _GetNodeInstancesInner(cfg,
965
                                lambda inst: node_name == inst.primary_node)
966

    
967

    
968
def _GetNodeSecondaryInstances(cfg, node_name):
969
  """Returns secondary instances on a node.
970

971
  """
972
  return _GetNodeInstancesInner(cfg,
973
                                lambda inst: node_name in inst.secondary_nodes)
974

    
975

    
976
def _GetStorageTypeArgs(cfg, storage_type):
977
  """Returns the arguments for a storage type.
978

979
  """
980
  # Special case for file storage
981
  if storage_type == constants.ST_FILE:
982
    # storage.FileStorage wants a list of storage directories
983
    return [[cfg.GetFileStorageDir()]]
984

    
985
  return []
986

    
987

    
988
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
989
  faulty = []
990

    
991
  for dev in instance.disks:
992
    cfg.SetDiskID(dev, node_name)
993

    
994
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
995
  result.Raise("Failed to get disk status from node %s" % node_name,
996
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
997

    
998
  for idx, bdev_status in enumerate(result.payload):
999
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1000
      faulty.append(idx)
1001

    
1002
  return faulty
1003

    
1004

    
1005
class LUPostInitCluster(LogicalUnit):
1006
  """Logical unit for running hooks after cluster initialization.
1007

1008
  """
1009
  HPATH = "cluster-init"
1010
  HTYPE = constants.HTYPE_CLUSTER
1011
  _OP_REQP = []
1012

    
1013
  def BuildHooksEnv(self):
1014
    """Build hooks env.
1015

1016
    """
1017
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1018
    mn = self.cfg.GetMasterNode()
1019
    return env, [], [mn]
1020

    
1021
  def Exec(self, feedback_fn):
1022
    """Nothing to do.
1023

1024
    """
1025
    return True
1026

    
1027

    
1028
class LUDestroyCluster(LogicalUnit):
1029
  """Logical unit for destroying the cluster.
1030

1031
  """
1032
  HPATH = "cluster-destroy"
1033
  HTYPE = constants.HTYPE_CLUSTER
1034
  _OP_REQP = []
1035

    
1036
  def BuildHooksEnv(self):
1037
    """Build hooks env.
1038

1039
    """
1040
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1041
    return env, [], []
1042

    
1043
  def CheckPrereq(self):
1044
    """Check prerequisites.
1045

1046
    This checks whether the cluster is empty.
1047

1048
    Any errors are signaled by raising errors.OpPrereqError.
1049

1050
    """
1051
    master = self.cfg.GetMasterNode()
1052

    
1053
    nodelist = self.cfg.GetNodeList()
1054
    if len(nodelist) != 1 or nodelist[0] != master:
1055
      raise errors.OpPrereqError("There are still %d node(s) in"
1056
                                 " this cluster." % (len(nodelist) - 1),
1057
                                 errors.ECODE_INVAL)
1058
    instancelist = self.cfg.GetInstanceList()
1059
    if instancelist:
1060
      raise errors.OpPrereqError("There are still %d instance(s) in"
1061
                                 " this cluster." % len(instancelist),
1062
                                 errors.ECODE_INVAL)
1063

    
1064
  def Exec(self, feedback_fn):
1065
    """Destroys the cluster.
1066

1067
    """
1068
    master = self.cfg.GetMasterNode()
1069
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1070

    
1071
    # Run post hooks on master node before it's removed
1072
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1073
    try:
1074
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1075
    except:
1076
      # pylint: disable-msg=W0702
1077
      self.LogWarning("Errors occurred running hooks on %s" % master)
1078

    
1079
    result = self.rpc.call_node_stop_master(master, False)
1080
    result.Raise("Could not disable the master role")
1081

    
1082
    if modify_ssh_setup:
1083
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1084
      utils.CreateBackup(priv_key)
1085
      utils.CreateBackup(pub_key)
1086

    
1087
    return master
1088

    
1089

    
1090
def _VerifyCertificate(filename):
1091
  """Verifies a certificate for LUVerifyCluster.
1092

1093
  @type filename: string
1094
  @param filename: Path to PEM file
1095

1096
  """
1097
  try:
1098
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1099
                                           utils.ReadFile(filename))
1100
  except Exception, err: # pylint: disable-msg=W0703
1101
    return (LUVerifyCluster.ETYPE_ERROR,
1102
            "Failed to load X509 certificate %s: %s" % (filename, err))
1103

    
1104
  (errcode, msg) = \
1105
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1106
                                constants.SSL_CERT_EXPIRATION_ERROR)
1107

    
1108
  if msg:
1109
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1110
  else:
1111
    fnamemsg = None
1112

    
1113
  if errcode is None:
1114
    return (None, fnamemsg)
1115
  elif errcode == utils.CERT_WARNING:
1116
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1117
  elif errcode == utils.CERT_ERROR:
1118
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1119

    
1120
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1121

    
1122

    
1123
class LUVerifyCluster(LogicalUnit):
1124
  """Verifies the cluster status.
1125

1126
  """
1127
  HPATH = "cluster-verify"
1128
  HTYPE = constants.HTYPE_CLUSTER
1129
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1130
  REQ_BGL = False
1131

    
1132
  TCLUSTER = "cluster"
1133
  TNODE = "node"
1134
  TINSTANCE = "instance"
1135

    
1136
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1137
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1138
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1139
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1140
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1141
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1142
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1143
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1144
  ENODEDRBD = (TNODE, "ENODEDRBD")
1145
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1146
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1147
  ENODEHV = (TNODE, "ENODEHV")
1148
  ENODELVM = (TNODE, "ENODELVM")
1149
  ENODEN1 = (TNODE, "ENODEN1")
1150
  ENODENET = (TNODE, "ENODENET")
1151
  ENODEOS = (TNODE, "ENODEOS")
1152
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1153
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1154
  ENODERPC = (TNODE, "ENODERPC")
1155
  ENODESSH = (TNODE, "ENODESSH")
1156
  ENODEVERSION = (TNODE, "ENODEVERSION")
1157
  ENODESETUP = (TNODE, "ENODESETUP")
1158
  ENODETIME = (TNODE, "ENODETIME")
1159

    
1160
  ETYPE_FIELD = "code"
1161
  ETYPE_ERROR = "ERROR"
1162
  ETYPE_WARNING = "WARNING"
1163

    
1164
  class NodeImage(object):
1165
    """A class representing the logical and physical status of a node.
1166

1167
    @type name: string
1168
    @ivar name: the node name to which this object refers
1169
    @ivar volumes: a structure as returned from
1170
        L{ganeti.backend.GetVolumeList} (runtime)
1171
    @ivar instances: a list of running instances (runtime)
1172
    @ivar pinst: list of configured primary instances (config)
1173
    @ivar sinst: list of configured secondary instances (config)
1174
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1175
        of this node (config)
1176
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1177
    @ivar dfree: free disk, as reported by the node (runtime)
1178
    @ivar offline: the offline status (config)
1179
    @type rpc_fail: boolean
1180
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1181
        not whether the individual keys were correct) (runtime)
1182
    @type lvm_fail: boolean
1183
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1184
    @type hyp_fail: boolean
1185
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1186
    @type ghost: boolean
1187
    @ivar ghost: whether this is a known node or not (config)
1188
    @type os_fail: boolean
1189
    @ivar os_fail: whether the RPC call didn't return valid OS data
1190
    @type oslist: list
1191
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1192

1193
    """
1194
    def __init__(self, offline=False, name=None):
1195
      self.name = name
1196
      self.volumes = {}
1197
      self.instances = []
1198
      self.pinst = []
1199
      self.sinst = []
1200
      self.sbp = {}
1201
      self.mfree = 0
1202
      self.dfree = 0
1203
      self.offline = offline
1204
      self.rpc_fail = False
1205
      self.lvm_fail = False
1206
      self.hyp_fail = False
1207
      self.ghost = False
1208
      self.os_fail = False
1209
      self.oslist = {}
1210

    
1211
  def ExpandNames(self):
1212
    self.needed_locks = {
1213
      locking.LEVEL_NODE: locking.ALL_SET,
1214
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1215
    }
1216
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1217

    
1218
  def _Error(self, ecode, item, msg, *args, **kwargs):
1219
    """Format an error message.
1220

1221
    Based on the opcode's error_codes parameter, either format a
1222
    parseable error code, or a simpler error string.
1223

1224
    This must be called only from Exec and functions called from Exec.
1225

1226
    """
1227
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1228
    itype, etxt = ecode
1229
    # first complete the msg
1230
    if args:
1231
      msg = msg % args
1232
    # then format the whole message
1233
    if self.op.error_codes:
1234
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1235
    else:
1236
      if item:
1237
        item = " " + item
1238
      else:
1239
        item = ""
1240
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1241
    # and finally report it via the feedback_fn
1242
    self._feedback_fn("  - %s" % msg)
1243

    
1244
  def _ErrorIf(self, cond, *args, **kwargs):
1245
    """Log an error message if the passed condition is True.
1246

1247
    """
1248
    cond = bool(cond) or self.op.debug_simulate_errors
1249
    if cond:
1250
      self._Error(*args, **kwargs)
1251
    # do not mark the operation as failed for WARN cases only
1252
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1253
      self.bad = self.bad or cond
1254

    
1255
  def _VerifyNode(self, ninfo, nresult):
1256
    """Run multiple tests against a node.
1257

1258
    Test list:
1259

1260
      - compares ganeti version
1261
      - checks vg existence and size > 20G
1262
      - checks config file checksum
1263
      - checks ssh to other nodes
1264

1265
    @type ninfo: L{objects.Node}
1266
    @param ninfo: the node to check
1267
    @param nresult: the results from the node
1268
    @rtype: boolean
1269
    @return: whether overall this call was successful (and we can expect
1270
         reasonable values in the respose)
1271

1272
    """
1273
    node = ninfo.name
1274
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1275

    
1276
    # main result, nresult should be a non-empty dict
1277
    test = not nresult or not isinstance(nresult, dict)
1278
    _ErrorIf(test, self.ENODERPC, node,
1279
                  "unable to verify node: no data returned")
1280
    if test:
1281
      return False
1282

    
1283
    # compares ganeti version
1284
    local_version = constants.PROTOCOL_VERSION
1285
    remote_version = nresult.get("version", None)
1286
    test = not (remote_version and
1287
                isinstance(remote_version, (list, tuple)) and
1288
                len(remote_version) == 2)
1289
    _ErrorIf(test, self.ENODERPC, node,
1290
             "connection to node returned invalid data")
1291
    if test:
1292
      return False
1293

    
1294
    test = local_version != remote_version[0]
1295
    _ErrorIf(test, self.ENODEVERSION, node,
1296
             "incompatible protocol versions: master %s,"
1297
             " node %s", local_version, remote_version[0])
1298
    if test:
1299
      return False
1300

    
1301
    # node seems compatible, we can actually try to look into its results
1302

    
1303
    # full package version
1304
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1305
                  self.ENODEVERSION, node,
1306
                  "software version mismatch: master %s, node %s",
1307
                  constants.RELEASE_VERSION, remote_version[1],
1308
                  code=self.ETYPE_WARNING)
1309

    
1310
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1311
    if isinstance(hyp_result, dict):
1312
      for hv_name, hv_result in hyp_result.iteritems():
1313
        test = hv_result is not None
1314
        _ErrorIf(test, self.ENODEHV, node,
1315
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1316

    
1317

    
1318
    test = nresult.get(constants.NV_NODESETUP,
1319
                           ["Missing NODESETUP results"])
1320
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1321
             "; ".join(test))
1322

    
1323
    return True
1324

    
1325
  def _VerifyNodeTime(self, ninfo, nresult,
1326
                      nvinfo_starttime, nvinfo_endtime):
1327
    """Check the node time.
1328

1329
    @type ninfo: L{objects.Node}
1330
    @param ninfo: the node to check
1331
    @param nresult: the remote results for the node
1332
    @param nvinfo_starttime: the start time of the RPC call
1333
    @param nvinfo_endtime: the end time of the RPC call
1334

1335
    """
1336
    node = ninfo.name
1337
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1338

    
1339
    ntime = nresult.get(constants.NV_TIME, None)
1340
    try:
1341
      ntime_merged = utils.MergeTime(ntime)
1342
    except (ValueError, TypeError):
1343
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1344
      return
1345

    
1346
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1347
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1348
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1349
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1350
    else:
1351
      ntime_diff = None
1352

    
1353
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1354
             "Node time diverges by at least %s from master node time",
1355
             ntime_diff)
1356

    
1357
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1358
    """Check the node time.
1359

1360
    @type ninfo: L{objects.Node}
1361
    @param ninfo: the node to check
1362
    @param nresult: the remote results for the node
1363
    @param vg_name: the configured VG name
1364

1365
    """
1366
    if vg_name is None:
1367
      return
1368

    
1369
    node = ninfo.name
1370
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1371

    
1372
    # checks vg existence and size > 20G
1373
    vglist = nresult.get(constants.NV_VGLIST, None)
1374
    test = not vglist
1375
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1376
    if not test:
1377
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1378
                                            constants.MIN_VG_SIZE)
1379
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1380

    
1381
    # check pv names
1382
    pvlist = nresult.get(constants.NV_PVLIST, None)
1383
    test = pvlist is None
1384
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1385
    if not test:
1386
      # check that ':' is not present in PV names, since it's a
1387
      # special character for lvcreate (denotes the range of PEs to
1388
      # use on the PV)
1389
      for _, pvname, owner_vg in pvlist:
1390
        test = ":" in pvname
1391
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1392
                 " '%s' of VG '%s'", pvname, owner_vg)
1393

    
1394
  def _VerifyNodeNetwork(self, ninfo, nresult):
1395
    """Check the node time.
1396

1397
    @type ninfo: L{objects.Node}
1398
    @param ninfo: the node to check
1399
    @param nresult: the remote results for the node
1400

1401
    """
1402
    node = ninfo.name
1403
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1404

    
1405
    test = constants.NV_NODELIST not in nresult
1406
    _ErrorIf(test, self.ENODESSH, node,
1407
             "node hasn't returned node ssh connectivity data")
1408
    if not test:
1409
      if nresult[constants.NV_NODELIST]:
1410
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1411
          _ErrorIf(True, self.ENODESSH, node,
1412
                   "ssh communication with node '%s': %s", a_node, a_msg)
1413

    
1414
    test = constants.NV_NODENETTEST not in nresult
1415
    _ErrorIf(test, self.ENODENET, node,
1416
             "node hasn't returned node tcp connectivity data")
1417
    if not test:
1418
      if nresult[constants.NV_NODENETTEST]:
1419
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1420
        for anode in nlist:
1421
          _ErrorIf(True, self.ENODENET, node,
1422
                   "tcp communication with node '%s': %s",
1423
                   anode, nresult[constants.NV_NODENETTEST][anode])
1424

    
1425
    test = constants.NV_MASTERIP not in nresult
1426
    _ErrorIf(test, self.ENODENET, node,
1427
             "node hasn't returned node master IP reachability data")
1428
    if not test:
1429
      if not nresult[constants.NV_MASTERIP]:
1430
        if node == self.master_node:
1431
          msg = "the master node cannot reach the master IP (not configured?)"
1432
        else:
1433
          msg = "cannot reach the master IP"
1434
        _ErrorIf(True, self.ENODENET, node, msg)
1435

    
1436

    
1437
  def _VerifyInstance(self, instance, instanceconfig, node_image):
1438
    """Verify an instance.
1439

1440
    This function checks to see if the required block devices are
1441
    available on the instance's node.
1442

1443
    """
1444
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1445
    node_current = instanceconfig.primary_node
1446

    
1447
    node_vol_should = {}
1448
    instanceconfig.MapLVsByNode(node_vol_should)
1449

    
1450
    for node in node_vol_should:
1451
      n_img = node_image[node]
1452
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1453
        # ignore missing volumes on offline or broken nodes
1454
        continue
1455
      for volume in node_vol_should[node]:
1456
        test = volume not in n_img.volumes
1457
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1458
                 "volume %s missing on node %s", volume, node)
1459

    
1460
    if instanceconfig.admin_up:
1461
      pri_img = node_image[node_current]
1462
      test = instance not in pri_img.instances and not pri_img.offline
1463
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1464
               "instance not running on its primary node %s",
1465
               node_current)
1466

    
1467
    for node, n_img in node_image.items():
1468
      if (not node == node_current):
1469
        test = instance in n_img.instances
1470
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1471
                 "instance should not run on node %s", node)
1472

    
1473
  def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1474
    """Verify if there are any unknown volumes in the cluster.
1475

1476
    The .os, .swap and backup volumes are ignored. All other volumes are
1477
    reported as unknown.
1478

1479
    """
1480
    for node, n_img in node_image.items():
1481
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1482
        # skip non-healthy nodes
1483
        continue
1484
      for volume in n_img.volumes:
1485
        test = (node not in node_vol_should or
1486
                volume not in node_vol_should[node])
1487
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1488
                      "volume %s is unknown", volume)
1489

    
1490
  def _VerifyOrphanInstances(self, instancelist, node_image):
1491
    """Verify the list of running instances.
1492

1493
    This checks what instances are running but unknown to the cluster.
1494

1495
    """
1496
    for node, n_img in node_image.items():
1497
      for o_inst in n_img.instances:
1498
        test = o_inst not in instancelist
1499
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1500
                      "instance %s on node %s should not exist", o_inst, node)
1501

    
1502
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1503
    """Verify N+1 Memory Resilience.
1504

1505
    Check that if one single node dies we can still start all the
1506
    instances it was primary for.
1507

1508
    """
1509
    for node, n_img in node_image.items():
1510
      # This code checks that every node which is now listed as
1511
      # secondary has enough memory to host all instances it is
1512
      # supposed to should a single other node in the cluster fail.
1513
      # FIXME: not ready for failover to an arbitrary node
1514
      # FIXME: does not support file-backed instances
1515
      # WARNING: we currently take into account down instances as well
1516
      # as up ones, considering that even if they're down someone
1517
      # might want to start them even in the event of a node failure.
1518
      for prinode, instances in n_img.sbp.items():
1519
        needed_mem = 0
1520
        for instance in instances:
1521
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1522
          if bep[constants.BE_AUTO_BALANCE]:
1523
            needed_mem += bep[constants.BE_MEMORY]
1524
        test = n_img.mfree < needed_mem
1525
        self._ErrorIf(test, self.ENODEN1, node,
1526
                      "not enough memory on to accommodate"
1527
                      " failovers should peer node %s fail", prinode)
1528

    
1529
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1530
                       master_files):
1531
    """Verifies and computes the node required file checksums.
1532

1533
    @type ninfo: L{objects.Node}
1534
    @param ninfo: the node to check
1535
    @param nresult: the remote results for the node
1536
    @param file_list: required list of files
1537
    @param local_cksum: dictionary of local files and their checksums
1538
    @param master_files: list of files that only masters should have
1539

1540
    """
1541
    node = ninfo.name
1542
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1543

    
1544
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1545
    test = not isinstance(remote_cksum, dict)
1546
    _ErrorIf(test, self.ENODEFILECHECK, node,
1547
             "node hasn't returned file checksum data")
1548
    if test:
1549
      return
1550

    
1551
    for file_name in file_list:
1552
      node_is_mc = ninfo.master_candidate
1553
      must_have = (file_name not in master_files) or node_is_mc
1554
      # missing
1555
      test1 = file_name not in remote_cksum
1556
      # invalid checksum
1557
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1558
      # existing and good
1559
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1560
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1561
               "file '%s' missing", file_name)
1562
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1563
               "file '%s' has wrong checksum", file_name)
1564
      # not candidate and this is not a must-have file
1565
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1566
               "file '%s' should not exist on non master"
1567
               " candidates (and the file is outdated)", file_name)
1568
      # all good, except non-master/non-must have combination
1569
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1570
               "file '%s' should not exist"
1571
               " on non master candidates", file_name)
1572

    
1573
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1574
    """Verifies and the node DRBD status.
1575

1576
    @type ninfo: L{objects.Node}
1577
    @param ninfo: the node to check
1578
    @param nresult: the remote results for the node
1579
    @param instanceinfo: the dict of instances
1580
    @param drbd_map: the DRBD map as returned by
1581
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1582

1583
    """
1584
    node = ninfo.name
1585
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1586

    
1587
    # compute the DRBD minors
1588
    node_drbd = {}
1589
    for minor, instance in drbd_map[node].items():
1590
      test = instance not in instanceinfo
1591
      _ErrorIf(test, self.ECLUSTERCFG, None,
1592
               "ghost instance '%s' in temporary DRBD map", instance)
1593
        # ghost instance should not be running, but otherwise we
1594
        # don't give double warnings (both ghost instance and
1595
        # unallocated minor in use)
1596
      if test:
1597
        node_drbd[minor] = (instance, False)
1598
      else:
1599
        instance = instanceinfo[instance]
1600
        node_drbd[minor] = (instance.name, instance.admin_up)
1601

    
1602
    # and now check them
1603
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1604
    test = not isinstance(used_minors, (tuple, list))
1605
    _ErrorIf(test, self.ENODEDRBD, node,
1606
             "cannot parse drbd status file: %s", str(used_minors))
1607
    if test:
1608
      # we cannot check drbd status
1609
      return
1610

    
1611
    for minor, (iname, must_exist) in node_drbd.items():
1612
      test = minor not in used_minors and must_exist
1613
      _ErrorIf(test, self.ENODEDRBD, node,
1614
               "drbd minor %d of instance %s is not active", minor, iname)
1615
    for minor in used_minors:
1616
      test = minor not in node_drbd
1617
      _ErrorIf(test, self.ENODEDRBD, node,
1618
               "unallocated drbd minor %d is in use", minor)
1619

    
1620
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1621
    """Builds the node OS structures.
1622

1623
    @type ninfo: L{objects.Node}
1624
    @param ninfo: the node to check
1625
    @param nresult: the remote results for the node
1626
    @param nimg: the node image object
1627

1628
    """
1629
    node = ninfo.name
1630
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1631

    
1632
    remote_os = nresult.get(constants.NV_OSLIST, None)
1633
    test = (not isinstance(remote_os, list) or
1634
            not compat.all(remote_os,
1635
                           lambda v: isinstance(v, list) and len(v) == 7))
1636

    
1637
    _ErrorIf(test, self.ENODEOS, node,
1638
             "node hasn't returned valid OS data")
1639

    
1640
    nimg.os_fail = test
1641

    
1642
    if test:
1643
      return
1644

    
1645
    os_dict = {}
1646

    
1647
    for (name, os_path, status, diagnose,
1648
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1649

    
1650
      if name not in os_dict:
1651
        os_dict[name] = []
1652

    
1653
      # parameters is a list of lists instead of list of tuples due to
1654
      # JSON lacking a real tuple type, fix it:
1655
      parameters = [tuple(v) for v in parameters]
1656
      os_dict[name].append((os_path, status, diagnose,
1657
                            set(variants), set(parameters), set(api_ver)))
1658

    
1659
    nimg.oslist = os_dict
1660

    
1661
  def _VerifyNodeOS(self, ninfo, nimg, base):
1662
    """Verifies the node OS list.
1663

1664
    @type ninfo: L{objects.Node}
1665
    @param ninfo: the node to check
1666
    @param nimg: the node image object
1667
    @param base: the 'template' node we match against (e.g. from the master)
1668

1669
    """
1670
    node = ninfo.name
1671
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1672

    
1673
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1674

    
1675
    for os_name, os_data in nimg.oslist.items():
1676
      assert os_data, "Empty OS status for OS %s?!" % os_name
1677
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1678
      _ErrorIf(not f_status, self.ENODEOS, node,
1679
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1680
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1681
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1682
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1683
      # this will catched in backend too
1684
      _ErrorIf(compat.any(f_api, lambda v: v >= constants.OS_API_V15)
1685
               and not f_var, self.ENODEOS, node,
1686
               "OS %s with API at least %d does not declare any variant",
1687
               os_name, constants.OS_API_V15)
1688
      # comparisons with the 'base' image
1689
      test = os_name not in base.oslist
1690
      _ErrorIf(test, self.ENODEOS, node,
1691
               "Extra OS %s not present on reference node (%s)",
1692
               os_name, base.name)
1693
      if test:
1694
        continue
1695
      assert base.oslist[os_name], "Base node has empty OS status?"
1696
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1697
      if not b_status:
1698
        # base OS is invalid, skipping
1699
        continue
1700
      for kind, a, b in [("API version", f_api, b_api),
1701
                         ("variants list", f_var, b_var),
1702
                         ("parameters", f_param, b_param)]:
1703
        _ErrorIf(a != b, self.ENODEOS, node,
1704
                 "OS %s %s differs from reference node %s: %s vs. %s",
1705
                 kind, os_name, base.name,
1706
                 utils.CommaJoin(a), utils.CommaJoin(a))
1707

    
1708
    # check any missing OSes
1709
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1710
    _ErrorIf(missing, self.ENODEOS, node,
1711
             "OSes present on reference node %s but missing on this node: %s",
1712
             base.name, utils.CommaJoin(missing))
1713

    
1714
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1715
    """Verifies and updates the node volume data.
1716

1717
    This function will update a L{NodeImage}'s internal structures
1718
    with data from the remote call.
1719

1720
    @type ninfo: L{objects.Node}
1721
    @param ninfo: the node to check
1722
    @param nresult: the remote results for the node
1723
    @param nimg: the node image object
1724
    @param vg_name: the configured VG name
1725

1726
    """
1727
    node = ninfo.name
1728
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1729

    
1730
    nimg.lvm_fail = True
1731
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1732
    if vg_name is None:
1733
      pass
1734
    elif isinstance(lvdata, basestring):
1735
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1736
               utils.SafeEncode(lvdata))
1737
    elif not isinstance(lvdata, dict):
1738
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1739
    else:
1740
      nimg.volumes = lvdata
1741
      nimg.lvm_fail = False
1742

    
1743
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1744
    """Verifies and updates the node instance list.
1745

1746
    If the listing was successful, then updates this node's instance
1747
    list. Otherwise, it marks the RPC call as failed for the instance
1748
    list key.
1749

1750
    @type ninfo: L{objects.Node}
1751
    @param ninfo: the node to check
1752
    @param nresult: the remote results for the node
1753
    @param nimg: the node image object
1754

1755
    """
1756
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1757
    test = not isinstance(idata, list)
1758
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1759
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1760
    if test:
1761
      nimg.hyp_fail = True
1762
    else:
1763
      nimg.instances = idata
1764

    
1765
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1766
    """Verifies and computes a node information map
1767

1768
    @type ninfo: L{objects.Node}
1769
    @param ninfo: the node to check
1770
    @param nresult: the remote results for the node
1771
    @param nimg: the node image object
1772
    @param vg_name: the configured VG name
1773

1774
    """
1775
    node = ninfo.name
1776
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1777

    
1778
    # try to read free memory (from the hypervisor)
1779
    hv_info = nresult.get(constants.NV_HVINFO, None)
1780
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1781
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1782
    if not test:
1783
      try:
1784
        nimg.mfree = int(hv_info["memory_free"])
1785
      except (ValueError, TypeError):
1786
        _ErrorIf(True, self.ENODERPC, node,
1787
                 "node returned invalid nodeinfo, check hypervisor")
1788

    
1789
    # FIXME: devise a free space model for file based instances as well
1790
    if vg_name is not None:
1791
      test = (constants.NV_VGLIST not in nresult or
1792
              vg_name not in nresult[constants.NV_VGLIST])
1793
      _ErrorIf(test, self.ENODELVM, node,
1794
               "node didn't return data for the volume group '%s'"
1795
               " - it is either missing or broken", vg_name)
1796
      if not test:
1797
        try:
1798
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1799
        except (ValueError, TypeError):
1800
          _ErrorIf(True, self.ENODERPC, node,
1801
                   "node returned invalid LVM info, check LVM status")
1802

    
1803
  def CheckPrereq(self):
1804
    """Check prerequisites.
1805

1806
    Transform the list of checks we're going to skip into a set and check that
1807
    all its members are valid.
1808

1809
    """
1810
    self.skip_set = frozenset(self.op.skip_checks)
1811
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1812
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1813
                                 errors.ECODE_INVAL)
1814

    
1815
  def BuildHooksEnv(self):
1816
    """Build hooks env.
1817

1818
    Cluster-Verify hooks just ran in the post phase and their failure makes
1819
    the output be logged in the verify output and the verification to fail.
1820

1821
    """
1822
    all_nodes = self.cfg.GetNodeList()
1823
    env = {
1824
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1825
      }
1826
    for node in self.cfg.GetAllNodesInfo().values():
1827
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1828

    
1829
    return env, [], all_nodes
1830

    
1831
  def Exec(self, feedback_fn):
1832
    """Verify integrity of cluster, performing various test on nodes.
1833

1834
    """
1835
    self.bad = False
1836
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1837
    verbose = self.op.verbose
1838
    self._feedback_fn = feedback_fn
1839
    feedback_fn("* Verifying global settings")
1840
    for msg in self.cfg.VerifyConfig():
1841
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1842

    
1843
    # Check the cluster certificates
1844
    for cert_filename in constants.ALL_CERT_FILES:
1845
      (errcode, msg) = _VerifyCertificate(cert_filename)
1846
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1847

    
1848
    vg_name = self.cfg.GetVGName()
1849
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1850
    cluster = self.cfg.GetClusterInfo()
1851
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1852
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1853
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1854
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1855
                        for iname in instancelist)
1856
    i_non_redundant = [] # Non redundant instances
1857
    i_non_a_balanced = [] # Non auto-balanced instances
1858
    n_offline = 0 # Count of offline nodes
1859
    n_drained = 0 # Count of nodes being drained
1860
    node_vol_should = {}
1861

    
1862
    # FIXME: verify OS list
1863
    # do local checksums
1864
    master_files = [constants.CLUSTER_CONF_FILE]
1865
    master_node = self.master_node = self.cfg.GetMasterNode()
1866
    master_ip = self.cfg.GetMasterIP()
1867

    
1868
    file_names = ssconf.SimpleStore().GetFileList()
1869
    file_names.extend(constants.ALL_CERT_FILES)
1870
    file_names.extend(master_files)
1871
    if cluster.modify_etc_hosts:
1872
      file_names.append(constants.ETC_HOSTS)
1873

    
1874
    local_checksums = utils.FingerprintFiles(file_names)
1875

    
1876
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1877
    node_verify_param = {
1878
      constants.NV_FILELIST: file_names,
1879
      constants.NV_NODELIST: [node.name for node in nodeinfo
1880
                              if not node.offline],
1881
      constants.NV_HYPERVISOR: hypervisors,
1882
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1883
                                  node.secondary_ip) for node in nodeinfo
1884
                                 if not node.offline],
1885
      constants.NV_INSTANCELIST: hypervisors,
1886
      constants.NV_VERSION: None,
1887
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1888
      constants.NV_NODESETUP: None,
1889
      constants.NV_TIME: None,
1890
      constants.NV_MASTERIP: (master_node, master_ip),
1891
      constants.NV_OSLIST: None,
1892
      }
1893

    
1894
    if vg_name is not None:
1895
      node_verify_param[constants.NV_VGLIST] = None
1896
      node_verify_param[constants.NV_LVLIST] = vg_name
1897
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1898
      node_verify_param[constants.NV_DRBDLIST] = None
1899

    
1900
    # Build our expected cluster state
1901
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
1902
                                                 name=node.name))
1903
                      for node in nodeinfo)
1904

    
1905
    for instance in instancelist:
1906
      inst_config = instanceinfo[instance]
1907

    
1908
      for nname in inst_config.all_nodes:
1909
        if nname not in node_image:
1910
          # ghost node
1911
          gnode = self.NodeImage(name=nname)
1912
          gnode.ghost = True
1913
          node_image[nname] = gnode
1914

    
1915
      inst_config.MapLVsByNode(node_vol_should)
1916

    
1917
      pnode = inst_config.primary_node
1918
      node_image[pnode].pinst.append(instance)
1919

    
1920
      for snode in inst_config.secondary_nodes:
1921
        nimg = node_image[snode]
1922
        nimg.sinst.append(instance)
1923
        if pnode not in nimg.sbp:
1924
          nimg.sbp[pnode] = []
1925
        nimg.sbp[pnode].append(instance)
1926

    
1927
    # At this point, we have the in-memory data structures complete,
1928
    # except for the runtime information, which we'll gather next
1929

    
1930
    # Due to the way our RPC system works, exact response times cannot be
1931
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1932
    # time before and after executing the request, we can at least have a time
1933
    # window.
1934
    nvinfo_starttime = time.time()
1935
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1936
                                           self.cfg.GetClusterName())
1937
    nvinfo_endtime = time.time()
1938

    
1939
    all_drbd_map = self.cfg.ComputeDRBDMap()
1940

    
1941
    feedback_fn("* Verifying node status")
1942

    
1943
    refos_img = None
1944

    
1945
    for node_i in nodeinfo:
1946
      node = node_i.name
1947
      nimg = node_image[node]
1948

    
1949
      if node_i.offline:
1950
        if verbose:
1951
          feedback_fn("* Skipping offline node %s" % (node,))
1952
        n_offline += 1
1953
        continue
1954

    
1955
      if node == master_node:
1956
        ntype = "master"
1957
      elif node_i.master_candidate:
1958
        ntype = "master candidate"
1959
      elif node_i.drained:
1960
        ntype = "drained"
1961
        n_drained += 1
1962
      else:
1963
        ntype = "regular"
1964
      if verbose:
1965
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1966

    
1967
      msg = all_nvinfo[node].fail_msg
1968
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1969
      if msg:
1970
        nimg.rpc_fail = True
1971
        continue
1972

    
1973
      nresult = all_nvinfo[node].payload
1974

    
1975
      nimg.call_ok = self._VerifyNode(node_i, nresult)
1976
      self._VerifyNodeNetwork(node_i, nresult)
1977
      self._VerifyNodeLVM(node_i, nresult, vg_name)
1978
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1979
                            master_files)
1980
      self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1981
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1982

    
1983
      self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1984
      self._UpdateNodeInstances(node_i, nresult, nimg)
1985
      self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1986
      self._UpdateNodeOS(node_i, nresult, nimg)
1987
      if not nimg.os_fail:
1988
        if refos_img is None:
1989
          refos_img = nimg
1990
        self._VerifyNodeOS(node_i, nimg, refos_img)
1991

    
1992
    feedback_fn("* Verifying instance status")
1993
    for instance in instancelist:
1994
      if verbose:
1995
        feedback_fn("* Verifying instance %s" % instance)
1996
      inst_config = instanceinfo[instance]
1997
      self._VerifyInstance(instance, inst_config, node_image)
1998
      inst_nodes_offline = []
1999

    
2000
      pnode = inst_config.primary_node
2001
      pnode_img = node_image[pnode]
2002
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2003
               self.ENODERPC, pnode, "instance %s, connection to"
2004
               " primary node failed", instance)
2005

    
2006
      if pnode_img.offline:
2007
        inst_nodes_offline.append(pnode)
2008

    
2009
      # If the instance is non-redundant we cannot survive losing its primary
2010
      # node, so we are not N+1 compliant. On the other hand we have no disk
2011
      # templates with more than one secondary so that situation is not well
2012
      # supported either.
2013
      # FIXME: does not support file-backed instances
2014
      if not inst_config.secondary_nodes:
2015
        i_non_redundant.append(instance)
2016
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2017
               instance, "instance has multiple secondary nodes: %s",
2018
               utils.CommaJoin(inst_config.secondary_nodes),
2019
               code=self.ETYPE_WARNING)
2020

    
2021
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2022
        i_non_a_balanced.append(instance)
2023

    
2024
      for snode in inst_config.secondary_nodes:
2025
        s_img = node_image[snode]
2026
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2027
                 "instance %s, connection to secondary node failed", instance)
2028

    
2029
        if s_img.offline:
2030
          inst_nodes_offline.append(snode)
2031

    
2032
      # warn that the instance lives on offline nodes
2033
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2034
               "instance lives on offline node(s) %s",
2035
               utils.CommaJoin(inst_nodes_offline))
2036
      # ... or ghost nodes
2037
      for node in inst_config.all_nodes:
2038
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2039
                 "instance lives on ghost node %s", node)
2040

    
2041
    feedback_fn("* Verifying orphan volumes")
2042
    self._VerifyOrphanVolumes(node_vol_should, node_image)
2043

    
2044
    feedback_fn("* Verifying orphan instances")
2045
    self._VerifyOrphanInstances(instancelist, node_image)
2046

    
2047
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
2048
      feedback_fn("* Verifying N+1 Memory redundancy")
2049
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2050

    
2051
    feedback_fn("* Other Notes")
2052
    if i_non_redundant:
2053
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2054
                  % len(i_non_redundant))
2055

    
2056
    if i_non_a_balanced:
2057
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2058
                  % len(i_non_a_balanced))
2059

    
2060
    if n_offline:
2061
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2062

    
2063
    if n_drained:
2064
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2065

    
2066
    return not self.bad
2067

    
2068
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2069
    """Analyze the post-hooks' result
2070

2071
    This method analyses the hook result, handles it, and sends some
2072
    nicely-formatted feedback back to the user.
2073

2074
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2075
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2076
    @param hooks_results: the results of the multi-node hooks rpc call
2077
    @param feedback_fn: function used send feedback back to the caller
2078
    @param lu_result: previous Exec result
2079
    @return: the new Exec result, based on the previous result
2080
        and hook results
2081

2082
    """
2083
    # We only really run POST phase hooks, and are only interested in
2084
    # their results
2085
    if phase == constants.HOOKS_PHASE_POST:
2086
      # Used to change hooks' output to proper indentation
2087
      indent_re = re.compile('^', re.M)
2088
      feedback_fn("* Hooks Results")
2089
      assert hooks_results, "invalid result from hooks"
2090

    
2091
      for node_name in hooks_results:
2092
        res = hooks_results[node_name]
2093
        msg = res.fail_msg
2094
        test = msg and not res.offline
2095
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2096
                      "Communication failure in hooks execution: %s", msg)
2097
        if res.offline or msg:
2098
          # No need to investigate payload if node is offline or gave an error.
2099
          # override manually lu_result here as _ErrorIf only
2100
          # overrides self.bad
2101
          lu_result = 1
2102
          continue
2103
        for script, hkr, output in res.payload:
2104
          test = hkr == constants.HKR_FAIL
2105
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2106
                        "Script %s failed, output:", script)
2107
          if test:
2108
            output = indent_re.sub('      ', output)
2109
            feedback_fn("%s" % output)
2110
            lu_result = 0
2111

    
2112
      return lu_result
2113

    
2114

    
2115
class LUVerifyDisks(NoHooksLU):
2116
  """Verifies the cluster disks status.
2117

2118
  """
2119
  _OP_REQP = []
2120
  REQ_BGL = False
2121

    
2122
  def ExpandNames(self):
2123
    self.needed_locks = {
2124
      locking.LEVEL_NODE: locking.ALL_SET,
2125
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2126
    }
2127
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2128

    
2129
  def Exec(self, feedback_fn):
2130
    """Verify integrity of cluster disks.
2131

2132
    @rtype: tuple of three items
2133
    @return: a tuple of (dict of node-to-node_error, list of instances
2134
        which need activate-disks, dict of instance: (node, volume) for
2135
        missing volumes
2136

2137
    """
2138
    result = res_nodes, res_instances, res_missing = {}, [], {}
2139

    
2140
    vg_name = self.cfg.GetVGName()
2141
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2142
    instances = [self.cfg.GetInstanceInfo(name)
2143
                 for name in self.cfg.GetInstanceList()]
2144

    
2145
    nv_dict = {}
2146
    for inst in instances:
2147
      inst_lvs = {}
2148
      if (not inst.admin_up or
2149
          inst.disk_template not in constants.DTS_NET_MIRROR):
2150
        continue
2151
      inst.MapLVsByNode(inst_lvs)
2152
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2153
      for node, vol_list in inst_lvs.iteritems():
2154
        for vol in vol_list:
2155
          nv_dict[(node, vol)] = inst
2156

    
2157
    if not nv_dict:
2158
      return result
2159

    
2160
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2161

    
2162
    for node in nodes:
2163
      # node_volume
2164
      node_res = node_lvs[node]
2165
      if node_res.offline:
2166
        continue
2167
      msg = node_res.fail_msg
2168
      if msg:
2169
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2170
        res_nodes[node] = msg
2171
        continue
2172

    
2173
      lvs = node_res.payload
2174
      for lv_name, (_, _, lv_online) in lvs.items():
2175
        inst = nv_dict.pop((node, lv_name), None)
2176
        if (not lv_online and inst is not None
2177
            and inst.name not in res_instances):
2178
          res_instances.append(inst.name)
2179

    
2180
    # any leftover items in nv_dict are missing LVs, let's arrange the
2181
    # data better
2182
    for key, inst in nv_dict.iteritems():
2183
      if inst.name not in res_missing:
2184
        res_missing[inst.name] = []
2185
      res_missing[inst.name].append(key)
2186

    
2187
    return result
2188

    
2189

    
2190
class LURepairDiskSizes(NoHooksLU):
2191
  """Verifies the cluster disks sizes.
2192

2193
  """
2194
  _OP_REQP = ["instances"]
2195
  REQ_BGL = False
2196

    
2197
  def CheckArguments(self):
2198
    if not isinstance(self.op.instances, list):
2199
      raise errors.OpPrereqError("Invalid argument type 'instances'",
2200
                                 errors.ECODE_INVAL)
2201

    
2202
  def ExpandNames(self):
2203
    if self.op.instances:
2204
      self.wanted_names = []
2205
      for name in self.op.instances:
2206
        full_name = _ExpandInstanceName(self.cfg, name)
2207
        self.wanted_names.append(full_name)
2208
      self.needed_locks = {
2209
        locking.LEVEL_NODE: [],
2210
        locking.LEVEL_INSTANCE: self.wanted_names,
2211
        }
2212
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2213
    else:
2214
      self.wanted_names = None
2215
      self.needed_locks = {
2216
        locking.LEVEL_NODE: locking.ALL_SET,
2217
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2218
        }
2219
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2220

    
2221
  def DeclareLocks(self, level):
2222
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2223
      self._LockInstancesNodes(primary_only=True)
2224

    
2225
  def CheckPrereq(self):
2226
    """Check prerequisites.
2227

2228
    This only checks the optional instance list against the existing names.
2229

2230
    """
2231
    if self.wanted_names is None:
2232
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2233

    
2234
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2235
                             in self.wanted_names]
2236

    
2237
  def _EnsureChildSizes(self, disk):
2238
    """Ensure children of the disk have the needed disk size.
2239

2240
    This is valid mainly for DRBD8 and fixes an issue where the
2241
    children have smaller disk size.
2242

2243
    @param disk: an L{ganeti.objects.Disk} object
2244

2245
    """
2246
    if disk.dev_type == constants.LD_DRBD8:
2247
      assert disk.children, "Empty children for DRBD8?"
2248
      fchild = disk.children[0]
2249
      mismatch = fchild.size < disk.size
2250
      if mismatch:
2251
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2252
                     fchild.size, disk.size)
2253
        fchild.size = disk.size
2254

    
2255
      # and we recurse on this child only, not on the metadev
2256
      return self._EnsureChildSizes(fchild) or mismatch
2257
    else:
2258
      return False
2259

    
2260
  def Exec(self, feedback_fn):
2261
    """Verify the size of cluster disks.
2262

2263
    """
2264
    # TODO: check child disks too
2265
    # TODO: check differences in size between primary/secondary nodes
2266
    per_node_disks = {}
2267
    for instance in self.wanted_instances:
2268
      pnode = instance.primary_node
2269
      if pnode not in per_node_disks:
2270
        per_node_disks[pnode] = []
2271
      for idx, disk in enumerate(instance.disks):
2272
        per_node_disks[pnode].append((instance, idx, disk))
2273

    
2274
    changed = []
2275
    for node, dskl in per_node_disks.items():
2276
      newl = [v[2].Copy() for v in dskl]
2277
      for dsk in newl:
2278
        self.cfg.SetDiskID(dsk, node)
2279
      result = self.rpc.call_blockdev_getsizes(node, newl)
2280
      if result.fail_msg:
2281
        self.LogWarning("Failure in blockdev_getsizes call to node"
2282
                        " %s, ignoring", node)
2283
        continue
2284
      if len(result.data) != len(dskl):
2285
        self.LogWarning("Invalid result from node %s, ignoring node results",
2286
                        node)
2287
        continue
2288
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2289
        if size is None:
2290
          self.LogWarning("Disk %d of instance %s did not return size"
2291
                          " information, ignoring", idx, instance.name)
2292
          continue
2293
        if not isinstance(size, (int, long)):
2294
          self.LogWarning("Disk %d of instance %s did not return valid"
2295
                          " size information, ignoring", idx, instance.name)
2296
          continue
2297
        size = size >> 20
2298
        if size != disk.size:
2299
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2300
                       " correcting: recorded %d, actual %d", idx,
2301
                       instance.name, disk.size, size)
2302
          disk.size = size
2303
          self.cfg.Update(instance, feedback_fn)
2304
          changed.append((instance.name, idx, size))
2305
        if self._EnsureChildSizes(disk):
2306
          self.cfg.Update(instance, feedback_fn)
2307
          changed.append((instance.name, idx, disk.size))
2308
    return changed
2309

    
2310

    
2311
class LURenameCluster(LogicalUnit):
2312
  """Rename the cluster.
2313

2314
  """
2315
  HPATH = "cluster-rename"
2316
  HTYPE = constants.HTYPE_CLUSTER
2317
  _OP_REQP = ["name"]
2318

    
2319
  def BuildHooksEnv(self):
2320
    """Build hooks env.
2321

2322
    """
2323
    env = {
2324
      "OP_TARGET": self.cfg.GetClusterName(),
2325
      "NEW_NAME": self.op.name,
2326
      }
2327
    mn = self.cfg.GetMasterNode()
2328
    all_nodes = self.cfg.GetNodeList()
2329
    return env, [mn], all_nodes
2330

    
2331
  def CheckPrereq(self):
2332
    """Verify that the passed name is a valid one.
2333

2334
    """
2335
    hostname = utils.GetHostInfo(self.op.name)
2336

    
2337
    new_name = hostname.name
2338
    self.ip = new_ip = hostname.ip
2339
    old_name = self.cfg.GetClusterName()
2340
    old_ip = self.cfg.GetMasterIP()
2341
    if new_name == old_name and new_ip == old_ip:
2342
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2343
                                 " cluster has changed",
2344
                                 errors.ECODE_INVAL)
2345
    if new_ip != old_ip:
2346
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2347
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2348
                                   " reachable on the network. Aborting." %
2349
                                   new_ip, errors.ECODE_NOTUNIQUE)
2350

    
2351
    self.op.name = new_name
2352

    
2353
  def Exec(self, feedback_fn):
2354
    """Rename the cluster.
2355

2356
    """
2357
    clustername = self.op.name
2358
    ip = self.ip
2359

    
2360
    # shutdown the master IP
2361
    master = self.cfg.GetMasterNode()
2362
    result = self.rpc.call_node_stop_master(master, False)
2363
    result.Raise("Could not disable the master role")
2364

    
2365
    try:
2366
      cluster = self.cfg.GetClusterInfo()
2367
      cluster.cluster_name = clustername
2368
      cluster.master_ip = ip
2369
      self.cfg.Update(cluster, feedback_fn)
2370

    
2371
      # update the known hosts file
2372
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2373
      node_list = self.cfg.GetNodeList()
2374
      try:
2375
        node_list.remove(master)
2376
      except ValueError:
2377
        pass
2378
      result = self.rpc.call_upload_file(node_list,
2379
                                         constants.SSH_KNOWN_HOSTS_FILE)
2380
      for to_node, to_result in result.iteritems():
2381
        msg = to_result.fail_msg
2382
        if msg:
2383
          msg = ("Copy of file %s to node %s failed: %s" %
2384
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2385
          self.proc.LogWarning(msg)
2386

    
2387
    finally:
2388
      result = self.rpc.call_node_start_master(master, False, False)
2389
      msg = result.fail_msg
2390
      if msg:
2391
        self.LogWarning("Could not re-enable the master role on"
2392
                        " the master, please restart manually: %s", msg)
2393

    
2394

    
2395
def _RecursiveCheckIfLVMBased(disk):
2396
  """Check if the given disk or its children are lvm-based.
2397

2398
  @type disk: L{objects.Disk}
2399
  @param disk: the disk to check
2400
  @rtype: boolean
2401
  @return: boolean indicating whether a LD_LV dev_type was found or not
2402

2403
  """
2404
  if disk.children:
2405
    for chdisk in disk.children:
2406
      if _RecursiveCheckIfLVMBased(chdisk):
2407
        return True
2408
  return disk.dev_type == constants.LD_LV
2409

    
2410

    
2411
class LUSetClusterParams(LogicalUnit):
2412
  """Change the parameters of the cluster.
2413

2414
  """
2415
  HPATH = "cluster-modify"
2416
  HTYPE = constants.HTYPE_CLUSTER
2417
  _OP_REQP = []
2418
  _OP_DEFS = [
2419
    ("candidate_pool_size", None),
2420
    ("uid_pool", None),
2421
    ("add_uids", None),
2422
    ("remove_uids", None),
2423
    ]
2424
  REQ_BGL = False
2425

    
2426
  def CheckArguments(self):
2427
    """Check parameters
2428

2429
    """
2430
    if self.op.candidate_pool_size is not None:
2431
      try:
2432
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2433
      except (ValueError, TypeError), err:
2434
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2435
                                   str(err), errors.ECODE_INVAL)
2436
      if self.op.candidate_pool_size < 1:
2437
        raise errors.OpPrereqError("At least one master candidate needed",
2438
                                   errors.ECODE_INVAL)
2439

    
2440
    _CheckBooleanOpField(self.op, "maintain_node_health")
2441

    
2442
    if self.op.uid_pool:
2443
      uidpool.CheckUidPool(self.op.uid_pool)
2444

    
2445
    if self.op.add_uids:
2446
      uidpool.CheckUidPool(self.op.add_uids)
2447

    
2448
    if self.op.remove_uids:
2449
      uidpool.CheckUidPool(self.op.remove_uids)
2450

    
2451
  def ExpandNames(self):
2452
    # FIXME: in the future maybe other cluster params won't require checking on
2453
    # all nodes to be modified.
2454
    self.needed_locks = {
2455
      locking.LEVEL_NODE: locking.ALL_SET,
2456
    }
2457
    self.share_locks[locking.LEVEL_NODE] = 1
2458

    
2459
  def BuildHooksEnv(self):
2460
    """Build hooks env.
2461

2462
    """
2463
    env = {
2464
      "OP_TARGET": self.cfg.GetClusterName(),
2465
      "NEW_VG_NAME": self.op.vg_name,
2466
      }
2467
    mn = self.cfg.GetMasterNode()
2468
    return env, [mn], [mn]
2469

    
2470
  def CheckPrereq(self):
2471
    """Check prerequisites.
2472

2473
    This checks whether the given params don't conflict and
2474
    if the given volume group is valid.
2475

2476
    """
2477
    if self.op.vg_name is not None and not self.op.vg_name:
2478
      instances = self.cfg.GetAllInstancesInfo().values()
2479
      for inst in instances:
2480
        for disk in inst.disks:
2481
          if _RecursiveCheckIfLVMBased(disk):
2482
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2483
                                       " lvm-based instances exist",
2484
                                       errors.ECODE_INVAL)
2485

    
2486
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2487

    
2488
    # if vg_name not None, checks given volume group on all nodes
2489
    if self.op.vg_name:
2490
      vglist = self.rpc.call_vg_list(node_list)
2491
      for node in node_list:
2492
        msg = vglist[node].fail_msg
2493
        if msg:
2494
          # ignoring down node
2495
          self.LogWarning("Error while gathering data on node %s"
2496
                          " (ignoring node): %s", node, msg)
2497
          continue
2498
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2499
                                              self.op.vg_name,
2500
                                              constants.MIN_VG_SIZE)
2501
        if vgstatus:
2502
          raise errors.OpPrereqError("Error on node '%s': %s" %
2503
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2504

    
2505
    self.cluster = cluster = self.cfg.GetClusterInfo()
2506
    # validate params changes
2507
    if self.op.beparams:
2508
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2509
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2510

    
2511
    if self.op.nicparams:
2512
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2513
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2514
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2515
      nic_errors = []
2516

    
2517
      # check all instances for consistency
2518
      for instance in self.cfg.GetAllInstancesInfo().values():
2519
        for nic_idx, nic in enumerate(instance.nics):
2520
          params_copy = copy.deepcopy(nic.nicparams)
2521
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2522

    
2523
          # check parameter syntax
2524
          try:
2525
            objects.NIC.CheckParameterSyntax(params_filled)
2526
          except errors.ConfigurationError, err:
2527
            nic_errors.append("Instance %s, nic/%d: %s" %
2528
                              (instance.name, nic_idx, err))
2529

    
2530
          # if we're moving instances to routed, check that they have an ip
2531
          target_mode = params_filled[constants.NIC_MODE]
2532
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2533
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2534
                              (instance.name, nic_idx))
2535
      if nic_errors:
2536
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2537
                                   "\n".join(nic_errors))
2538

    
2539
    # hypervisor list/parameters
2540
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2541
    if self.op.hvparams:
2542
      if not isinstance(self.op.hvparams, dict):
2543
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2544
                                   errors.ECODE_INVAL)
2545
      for hv_name, hv_dict in self.op.hvparams.items():
2546
        if hv_name not in self.new_hvparams:
2547
          self.new_hvparams[hv_name] = hv_dict
2548
        else:
2549
          self.new_hvparams[hv_name].update(hv_dict)
2550

    
2551
    # os hypervisor parameters
2552
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2553
    if self.op.os_hvp:
2554
      if not isinstance(self.op.os_hvp, dict):
2555
        raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2556
                                   errors.ECODE_INVAL)
2557
      for os_name, hvs in self.op.os_hvp.items():
2558
        if not isinstance(hvs, dict):
2559
          raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2560
                                      " input"), errors.ECODE_INVAL)
2561
        if os_name not in self.new_os_hvp:
2562
          self.new_os_hvp[os_name] = hvs
2563
        else:
2564
          for hv_name, hv_dict in hvs.items():
2565
            if hv_name not in self.new_os_hvp[os_name]:
2566
              self.new_os_hvp[os_name][hv_name] = hv_dict
2567
            else:
2568
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2569

    
2570
    # os parameters
2571
    self.new_osp = objects.FillDict(cluster.osparams, {})
2572
    if self.op.osparams:
2573
      if not isinstance(self.op.osparams, dict):
2574
        raise errors.OpPrereqError("Invalid 'osparams' parameter on input",
2575
                                   errors.ECODE_INVAL)
2576
      for os_name, osp in self.op.osparams.items():
2577
        if not isinstance(osp, dict):
2578
          raise errors.OpPrereqError(("Invalid 'osparams' parameter on"
2579
                                      " input"), errors.ECODE_INVAL)
2580
        if os_name not in self.new_osp:
2581
          self.new_osp[os_name] = {}
2582

    
2583
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2584
                                                  use_none=True)
2585

    
2586
        if not self.new_osp[os_name]:
2587
          # we removed all parameters
2588
          del self.new_osp[os_name]
2589
        else:
2590
          # check the parameter validity (remote check)
2591
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2592
                         os_name, self.new_osp[os_name])
2593

    
2594
    # changes to the hypervisor list
2595
    if self.op.enabled_hypervisors is not None:
2596
      self.hv_list = self.op.enabled_hypervisors
2597
      if not self.hv_list:
2598
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2599
                                   " least one member",
2600
                                   errors.ECODE_INVAL)
2601
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2602
      if invalid_hvs:
2603
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2604
                                   " entries: %s" %
2605
                                   utils.CommaJoin(invalid_hvs),
2606
                                   errors.ECODE_INVAL)
2607
      for hv in self.hv_list:
2608
        # if the hypervisor doesn't already exist in the cluster
2609
        # hvparams, we initialize it to empty, and then (in both
2610
        # cases) we make sure to fill the defaults, as we might not
2611
        # have a complete defaults list if the hypervisor wasn't
2612
        # enabled before
2613
        if hv not in new_hvp:
2614
          new_hvp[hv] = {}
2615
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2616
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2617
    else:
2618
      self.hv_list = cluster.enabled_hypervisors
2619

    
2620
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2621
      # either the enabled list has changed, or the parameters have, validate
2622
      for hv_name, hv_params in self.new_hvparams.items():
2623
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2624
            (self.op.enabled_hypervisors and
2625
             hv_name in self.op.enabled_hypervisors)):
2626
          # either this is a new hypervisor, or its parameters have changed
2627
          hv_class = hypervisor.GetHypervisor(hv_name)
2628
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2629
          hv_class.CheckParameterSyntax(hv_params)
2630
          _CheckHVParams(self, node_list, hv_name, hv_params)
2631

    
2632
    if self.op.os_hvp:
2633
      # no need to check any newly-enabled hypervisors, since the
2634
      # defaults have already been checked in the above code-block
2635
      for os_name, os_hvp in self.new_os_hvp.items():
2636
        for hv_name, hv_params in os_hvp.items():
2637
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2638
          # we need to fill in the new os_hvp on top of the actual hv_p
2639
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2640
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2641
          hv_class = hypervisor.GetHypervisor(hv_name)
2642
          hv_class.CheckParameterSyntax(new_osp)
2643
          _CheckHVParams(self, node_list, hv_name, new_osp)
2644

    
2645

    
2646
  def Exec(self, feedback_fn):
2647
    """Change the parameters of the cluster.
2648

2649
    """
2650
    if self.op.vg_name is not None:
2651
      new_volume = self.op.vg_name
2652
      if not new_volume:
2653
        new_volume = None
2654
      if new_volume != self.cfg.GetVGName():
2655
        self.cfg.SetVGName(new_volume)
2656
      else:
2657
        feedback_fn("Cluster LVM configuration already in desired"
2658
                    " state, not changing")
2659
    if self.op.hvparams:
2660
      self.cluster.hvparams = self.new_hvparams
2661
    if self.op.os_hvp:
2662
      self.cluster.os_hvp = self.new_os_hvp
2663
    if self.op.enabled_hypervisors is not None:
2664
      self.cluster.hvparams = self.new_hvparams
2665
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2666
    if self.op.beparams:
2667
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2668
    if self.op.nicparams:
2669
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2670
    if self.op.osparams:
2671
      self.cluster.osparams = self.new_osp
2672

    
2673
    if self.op.candidate_pool_size is not None:
2674
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2675
      # we need to update the pool size here, otherwise the save will fail
2676
      _AdjustCandidatePool(self, [])
2677

    
2678
    if self.op.maintain_node_health is not None:
2679
      self.cluster.maintain_node_health = self.op.maintain_node_health
2680

    
2681
    if self.op.add_uids is not None:
2682
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2683

    
2684
    if self.op.remove_uids is not None:
2685
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2686

    
2687
    if self.op.uid_pool is not None:
2688
      self.cluster.uid_pool = self.op.uid_pool
2689

    
2690
    self.cfg.Update(self.cluster, feedback_fn)
2691

    
2692

    
2693
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2694
  """Distribute additional files which are part of the cluster configuration.
2695

2696
  ConfigWriter takes care of distributing the config and ssconf files, but
2697
  there are more files which should be distributed to all nodes. This function
2698
  makes sure those are copied.
2699

2700
  @param lu: calling logical unit
2701
  @param additional_nodes: list of nodes not in the config to distribute to
2702

2703
  """
2704
  # 1. Gather target nodes
2705
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2706
  dist_nodes = lu.cfg.GetOnlineNodeList()
2707
  if additional_nodes is not None:
2708
    dist_nodes.extend(additional_nodes)
2709
  if myself.name in dist_nodes:
2710
    dist_nodes.remove(myself.name)
2711

    
2712
  # 2. Gather files to distribute
2713
  dist_files = set([constants.ETC_HOSTS,
2714
                    constants.SSH_KNOWN_HOSTS_FILE,
2715
                    constants.RAPI_CERT_FILE,
2716
                    constants.RAPI_USERS_FILE,
2717
                    constants.CONFD_HMAC_KEY,
2718
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
2719
                   ])
2720

    
2721
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2722
  for hv_name in enabled_hypervisors:
2723
    hv_class = hypervisor.GetHypervisor(hv_name)
2724
    dist_files.update(hv_class.GetAncillaryFiles())
2725

    
2726
  # 3. Perform the files upload
2727
  for fname in dist_files:
2728
    if os.path.exists(fname):
2729
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2730
      for to_node, to_result in result.items():
2731
        msg = to_result.fail_msg
2732
        if msg:
2733
          msg = ("Copy of file %s to node %s failed: %s" %
2734
                 (fname, to_node, msg))
2735
          lu.proc.LogWarning(msg)
2736

    
2737

    
2738
class LURedistributeConfig(NoHooksLU):
2739
  """Force the redistribution of cluster configuration.
2740

2741
  This is a very simple LU.
2742

2743
  """
2744
  _OP_REQP = []
2745
  REQ_BGL = False
2746

    
2747
  def ExpandNames(self):
2748
    self.needed_locks = {
2749
      locking.LEVEL_NODE: locking.ALL_SET,
2750
    }
2751
    self.share_locks[locking.LEVEL_NODE] = 1
2752

    
2753
  def Exec(self, feedback_fn):
2754
    """Redistribute the configuration.
2755

2756
    """
2757
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2758
    _RedistributeAncillaryFiles(self)
2759

    
2760

    
2761
def _WaitForSync(lu, instance, disks=None, oneshot=False):
2762
  """Sleep and poll for an instance's disk to sync.
2763

2764
  """
2765
  if not instance.disks or disks is not None and not disks:
2766
    return True
2767

    
2768
  disks = _ExpandCheckDisks(instance, disks)
2769

    
2770
  if not oneshot:
2771
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2772

    
2773
  node = instance.primary_node
2774

    
2775
  for dev in disks:
2776
    lu.cfg.SetDiskID(dev, node)
2777

    
2778
  # TODO: Convert to utils.Retry
2779

    
2780
  retries = 0
2781
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2782
  while True:
2783
    max_time = 0
2784
    done = True
2785
    cumul_degraded = False
2786
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2787
    msg = rstats.fail_msg
2788
    if msg:
2789
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2790
      retries += 1
2791
      if retries >= 10:
2792
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2793
                                 " aborting." % node)
2794
      time.sleep(6)
2795
      continue
2796
    rstats = rstats.payload
2797
    retries = 0
2798
    for i, mstat in enumerate(rstats):
2799
      if mstat is None:
2800
        lu.LogWarning("Can't compute data for node %s/%s",
2801
                           node, disks[i].iv_name)
2802
        continue
2803

    
2804
      cumul_degraded = (cumul_degraded or
2805
                        (mstat.is_degraded and mstat.sync_percent is None))
2806
      if mstat.sync_percent is not None:
2807
        done = False
2808
        if mstat.estimated_time is not None:
2809
          rem_time = ("%s remaining (estimated)" %
2810
                      utils.FormatSeconds(mstat.estimated_time))
2811
          max_time = mstat.estimated_time
2812
        else:
2813
          rem_time = "no time estimate"
2814
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2815
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
2816

    
2817
    # if we're done but degraded, let's do a few small retries, to
2818
    # make sure we see a stable and not transient situation; therefore
2819
    # we force restart of the loop
2820
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2821
      logging.info("Degraded disks found, %d retries left", degr_retries)
2822
      degr_retries -= 1
2823
      time.sleep(1)
2824
      continue
2825

    
2826
    if done or oneshot:
2827
      break
2828

    
2829
    time.sleep(min(60, max_time))
2830

    
2831
  if done:
2832
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2833
  return not cumul_degraded
2834

    
2835

    
2836
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2837
  """Check that mirrors are not degraded.
2838

2839
  The ldisk parameter, if True, will change the test from the
2840
  is_degraded attribute (which represents overall non-ok status for
2841
  the device(s)) to the ldisk (representing the local storage status).
2842

2843
  """
2844
  lu.cfg.SetDiskID(dev, node)
2845

    
2846
  result = True
2847

    
2848
  if on_primary or dev.AssembleOnSecondary():
2849
    rstats = lu.rpc.call_blockdev_find(node, dev)
2850
    msg = rstats.fail_msg
2851
    if msg:
2852
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2853
      result = False
2854
    elif not rstats.payload:
2855
      lu.LogWarning("Can't find disk on node %s", node)
2856
      result = False
2857
    else:
2858
      if ldisk:
2859
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2860
      else:
2861
        result = result and not rstats.payload.is_degraded
2862

    
2863
  if dev.children:
2864
    for child in dev.children:
2865
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2866

    
2867
  return result
2868

    
2869

    
2870
class LUDiagnoseOS(NoHooksLU):
2871
  """Logical unit for OS diagnose/query.
2872

2873
  """
2874
  _OP_REQP = ["output_fields", "names"]
2875
  REQ_BGL = False
2876
  _FIELDS_STATIC = utils.FieldSet()
2877
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2878
                                   "parameters", "api_versions")
2879

    
2880
  def CheckArguments(self):
2881
    if self.op.names:
2882
      raise errors.OpPrereqError("Selective OS query not supported",
2883
                                 errors.ECODE_INVAL)
2884

    
2885
    _CheckOutputFields(static=self._FIELDS_STATIC,
2886
                       dynamic=self._FIELDS_DYNAMIC,
2887
                       selected=self.op.output_fields)
2888

    
2889
  def ExpandNames(self):
2890
    # Lock all nodes, in shared mode
2891
    # Temporary removal of locks, should be reverted later
2892
    # TODO: reintroduce locks when they are lighter-weight
2893
    self.needed_locks = {}
2894
    #self.share_locks[locking.LEVEL_NODE] = 1
2895
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2896

    
2897
  @staticmethod
2898
  def _DiagnoseByOS(rlist):
2899
    """Remaps a per-node return list into an a per-os per-node dictionary
2900

2901
    @param rlist: a map with node names as keys and OS objects as values
2902

2903
    @rtype: dict
2904
    @return: a dictionary with osnames as keys and as value another
2905
        map, with nodes as keys and tuples of (path, status, diagnose,
2906
        variants, parameters, api_versions) as values, eg::
2907

2908
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2909
                                     (/srv/..., False, "invalid api")],
2910
                           "node2": [(/srv/..., True, "", [], [])]}
2911
          }
2912

2913
    """
2914
    all_os = {}
2915
    # we build here the list of nodes that didn't fail the RPC (at RPC
2916
    # level), so that nodes with a non-responding node daemon don't
2917
    # make all OSes invalid
2918
    good_nodes = [node_name for node_name in rlist
2919
                  if not rlist[node_name].fail_msg]
2920
    for node_name, nr in rlist.items():
2921
      if nr.fail_msg or not nr.payload:
2922
        continue
2923
      for (name, path, status, diagnose, variants,
2924
           params, api_versions) in nr.payload:
2925
        if name not in all_os:
2926
          # build a list of nodes for this os containing empty lists
2927
          # for each node in node_list
2928
          all_os[name] = {}
2929
          for nname in good_nodes:
2930
            all_os[name][nname] = []
2931
        # convert params from [name, help] to (name, help)
2932
        params = [tuple(v) for v in params]
2933
        all_os[name][node_name].append((path, status, diagnose,
2934
                                        variants, params, api_versions))
2935
    return all_os
2936

    
2937
  def Exec(self, feedback_fn):
2938
    """Compute the list of OSes.
2939

2940
    """
2941
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2942
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2943
    pol = self._DiagnoseByOS(node_data)
2944
    output = []
2945

    
2946
    for os_name, os_data in pol.items():
2947
      row = []
2948
      valid = True
2949
      (variants, params, api_versions) = null_state = (set(), set(), set())
2950
      for idx, osl in enumerate(os_data.values()):
2951
        valid = bool(valid and osl and osl[0][1])
2952
        if not valid:
2953
          (variants, params, api_versions) = null_state
2954
          break
2955
        node_variants, node_params, node_api = osl[0][3:6]
2956
        if idx == 0: # first entry
2957
          variants = set(node_variants)
2958
          params = set(node_params)
2959
          api_versions = set(node_api)
2960
        else: # keep consistency
2961
          variants.intersection_update(node_variants)
2962
          params.intersection_update(node_params)
2963
          api_versions.intersection_update(node_api)
2964

    
2965
      for field in self.op.output_fields:
2966
        if field == "name":
2967
          val = os_name
2968
        elif field == "valid":
2969
          val = valid
2970
        elif field == "node_status":
2971
          # this is just a copy of the dict
2972
          val = {}
2973
          for node_name, nos_list in os_data.items():
2974
            val[node_name] = nos_list
2975
        elif field == "variants":
2976
          val = list(variants)
2977
        elif field == "parameters":
2978
          val = list(params)
2979
        elif field == "api_versions":
2980
          val = list(api_versions)
2981
        else:
2982
          raise errors.ParameterError(field)
2983
        row.append(val)
2984
      output.append(row)
2985

    
2986
    return output
2987

    
2988

    
2989
class LURemoveNode(LogicalUnit):
2990
  """Logical unit for removing a node.
2991

2992
  """
2993
  HPATH = "node-remove"
2994
  HTYPE = constants.HTYPE_NODE
2995
  _OP_REQP = ["node_name"]
2996

    
2997
  def BuildHooksEnv(self):
2998
    """Build hooks env.
2999

3000
    This doesn't run on the target node in the pre phase as a failed
3001
    node would then be impossible to remove.
3002

3003
    """
3004
    env = {
3005
      "OP_TARGET": self.op.node_name,
3006
      "NODE_NAME": self.op.node_name,
3007
      }
3008
    all_nodes = self.cfg.GetNodeList()
3009
    try:
3010
      all_nodes.remove(self.op.node_name)
3011
    except ValueError:
3012
      logging.warning("Node %s which is about to be removed not found"
3013
                      " in the all nodes list", self.op.node_name)
3014
    return env, all_nodes, all_nodes
3015

    
3016
  def CheckPrereq(self):
3017
    """Check prerequisites.
3018

3019
    This checks:
3020
     - the node exists in the configuration
3021
     - it does not have primary or secondary instances
3022
     - it's not the master
3023

3024
    Any errors are signaled by raising errors.OpPrereqError.
3025

3026
    """
3027
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3028
    node = self.cfg.GetNodeInfo(self.op.node_name)
3029
    assert node is not None
3030

    
3031
    instance_list = self.cfg.GetInstanceList()
3032

    
3033
    masternode = self.cfg.GetMasterNode()
3034
    if node.name == masternode:
3035
      raise errors.OpPrereqError("Node is the master node,"
3036
                                 " you need to failover first.",
3037
                                 errors.ECODE_INVAL)
3038

    
3039
    for instance_name in instance_list:
3040
      instance = self.cfg.GetInstanceInfo(instance_name)
3041
      if node.name in instance.all_nodes:
3042
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3043
                                   " please remove first." % instance_name,
3044
                                   errors.ECODE_INVAL)
3045
    self.op.node_name = node.name
3046
    self.node = node
3047

    
3048
  def Exec(self, feedback_fn):
3049
    """Removes the node from the cluster.
3050

3051
    """
3052
    node = self.node
3053
    logging.info("Stopping the node daemon and removing configs from node %s",
3054
                 node.name)
3055

    
3056
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3057

    
3058
    # Promote nodes to master candidate as needed
3059
    _AdjustCandidatePool(self, exceptions=[node.name])
3060
    self.context.RemoveNode(node.name)
3061

    
3062
    # Run post hooks on the node before it's removed
3063
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3064
    try:
3065
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3066
    except:
3067
      # pylint: disable-msg=W0702
3068
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3069

    
3070
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3071
    msg = result.fail_msg
3072
    if msg:
3073
      self.LogWarning("Errors encountered on the remote node while leaving"
3074
                      " the cluster: %s", msg)
3075

    
3076
    # Remove node from our /etc/hosts
3077
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3078
      # FIXME: this should be done via an rpc call to node daemon
3079
      utils.RemoveHostFromEtcHosts(node.name)
3080
      _RedistributeAncillaryFiles(self)
3081

    
3082

    
3083
class LUQueryNodes(NoHooksLU):
3084
  """Logical unit for querying nodes.
3085

3086
  """
3087
  # pylint: disable-msg=W0142
3088
  _OP_REQP = ["output_fields", "names", "use_locking"]
3089
  REQ_BGL = False
3090

    
3091
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3092
                    "master_candidate", "offline", "drained"]
3093

    
3094
  _FIELDS_DYNAMIC = utils.FieldSet(
3095
    "dtotal", "dfree",
3096
    "mtotal", "mnode", "mfree",
3097
    "bootid",
3098
    "ctotal", "cnodes", "csockets",
3099
    )
3100

    
3101
  _FIELDS_STATIC = utils.FieldSet(*[
3102
    "pinst_cnt", "sinst_cnt",
3103
    "pinst_list", "sinst_list",
3104
    "pip", "sip", "tags",
3105
    "master",
3106
    "role"] + _SIMPLE_FIELDS
3107
    )
3108

    
3109
  def CheckArguments(self):
3110
    _CheckOutputFields(static=self._FIELDS_STATIC,
3111
                       dynamic=self._FIELDS_DYNAMIC,
3112
                       selected=self.op.output_fields)
3113

    
3114
  def ExpandNames(self):
3115
    self.needed_locks = {}
3116
    self.share_locks[locking.LEVEL_NODE] = 1
3117

    
3118
    if self.op.names:
3119
      self.wanted = _GetWantedNodes(self, self.op.names)
3120
    else:
3121
      self.wanted = locking.ALL_SET
3122

    
3123
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3124
    self.do_locking = self.do_node_query and self.op.use_locking
3125
    if self.do_locking:
3126
      # if we don't request only static fields, we need to lock the nodes
3127
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
3128

    
3129
  def Exec(self, feedback_fn):
3130
    """Computes the list of nodes and their attributes.
3131

3132
    """
3133
    all_info = self.cfg.GetAllNodesInfo()
3134
    if self.do_locking:
3135
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
3136
    elif self.wanted != locking.ALL_SET:
3137
      nodenames = self.wanted
3138
      missing = set(nodenames).difference(all_info.keys())
3139
      if missing:
3140
        raise errors.OpExecError(
3141
          "Some nodes were removed before retrieving their data: %s" % missing)
3142
    else:
3143
      nodenames = all_info.keys()
3144

    
3145
    nodenames = utils.NiceSort(nodenames)
3146
    nodelist = [all_info[name] for name in nodenames]
3147

    
3148
    # begin data gathering
3149

    
3150
    if self.do_node_query:
3151
      live_data = {}
3152
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3153
                                          self.cfg.GetHypervisorType())
3154
      for name in nodenames:
3155
        nodeinfo = node_data[name]
3156
        if not nodeinfo.fail_msg and nodeinfo.payload:
3157
          nodeinfo = nodeinfo.payload
3158
          fn = utils.TryConvert
3159
          live_data[name] = {
3160
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3161
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3162
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
3163
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3164
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
3165
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3166
            "bootid": nodeinfo.get('bootid', None),
3167
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3168
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3169
            }
3170
        else:
3171
          live_data[name] = {}
3172
    else:
3173
      live_data = dict.fromkeys(nodenames, {})
3174

    
3175
    node_to_primary = dict([(name, set()) for name in nodenames])
3176
    node_to_secondary = dict([(name, set()) for name in nodenames])
3177

    
3178
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
3179
                             "sinst_cnt", "sinst_list"))
3180
    if inst_fields & frozenset(self.op.output_fields):
3181
      inst_data = self.cfg.GetAllInstancesInfo()
3182

    
3183
      for inst in inst_data.values():
3184
        if inst.primary_node in node_to_primary:
3185
          node_to_primary[inst.primary_node].add(inst.name)
3186
        for secnode in inst.secondary_nodes:
3187
          if secnode in node_to_secondary:
3188
            node_to_secondary[secnode].add(inst.name)
3189

    
3190
    master_node = self.cfg.GetMasterNode()
3191

    
3192
    # end data gathering
3193

    
3194
    output = []
3195
    for node in nodelist:
3196
      node_output = []
3197
      for field in self.op.output_fields:
3198
        if field in self._SIMPLE_FIELDS:
3199
          val = getattr(node, field)
3200
        elif field == "pinst_list":
3201
          val = list(node_to_primary[node.name])
3202
        elif field == "sinst_list":
3203
          val = list(node_to_secondary[node.name])
3204
        elif field == "pinst_cnt":
3205
          val = len(node_to_primary[node.name])
3206
        elif field == "sinst_cnt":
3207
          val = len(node_to_secondary[node.name])
3208
        elif field == "pip":
3209
          val = node.primary_ip
3210
        elif field == "sip":
3211
          val = node.secondary_ip
3212
        elif field == "tags":
3213
          val = list(node.GetTags())
3214
        elif field == "master":
3215
          val = node.name == master_node
3216
        elif self._FIELDS_DYNAMIC.Matches(field):
3217
          val = live_data[node.name].get(field, None)
3218
        elif field == "role":
3219
          if node.name == master_node:
3220
            val = "M"
3221
          elif node.master_candidate:
3222
            val = "C"
3223
          elif node.drained:
3224
            val = "D"
3225
          elif node.offline:
3226
            val = "O"
3227
          else:
3228
            val = "R"
3229
        else:
3230
          raise errors.ParameterError(field)
3231
        node_output.append(val)
3232
      output.append(node_output)
3233

    
3234
    return output
3235

    
3236

    
3237
class LUQueryNodeVolumes(NoHooksLU):
3238
  """Logical unit for getting volumes on node(s).
3239

3240
  """
3241
  _OP_REQP = ["nodes", "output_fields"]
3242
  REQ_BGL = False
3243
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3244
  _FIELDS_STATIC = utils.FieldSet("node")
3245

    
3246
  def CheckArguments(self):
3247
    _CheckOutputFields(static=self._FIELDS_STATIC,
3248
                       dynamic=self._FIELDS_DYNAMIC,
3249
                       selected=self.op.output_fields)
3250

    
3251
  def ExpandNames(self):
3252
    self.needed_locks = {}
3253
    self.share_locks[locking.LEVEL_NODE] = 1
3254
    if not self.op.nodes:
3255
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3256
    else:
3257
      self.needed_locks[locking.LEVEL_NODE] = \
3258
        _GetWantedNodes(self, self.op.nodes)
3259

    
3260
  def Exec(self, feedback_fn):
3261
    """Computes the list of nodes and their attributes.
3262

3263
    """
3264
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3265
    volumes = self.rpc.call_node_volumes(nodenames)
3266

    
3267
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3268
             in self.cfg.GetInstanceList()]
3269

    
3270
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3271

    
3272
    output = []
3273
    for node in nodenames:
3274
      nresult = volumes[node]
3275
      if nresult.offline:
3276
        continue
3277
      msg = nresult.fail_msg
3278
      if msg:
3279
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3280
        continue
3281

    
3282
      node_vols = nresult.payload[:]
3283
      node_vols.sort(key=lambda vol: vol['dev'])
3284

    
3285
      for vol in node_vols:
3286
        node_output = []
3287
        for field in self.op.output_fields:
3288
          if field == "node":
3289
            val = node
3290
          elif field == "phys":
3291
            val = vol['dev']
3292
          elif field == "vg":
3293
            val = vol['vg']
3294
          elif field == "name":
3295
            val = vol['name']
3296
          elif field == "size":
3297
            val = int(float(vol['size']))
3298
          elif field == "instance":
3299
            for inst in ilist:
3300
              if node not in lv_by_node[inst]:
3301
                continue
3302
              if vol['name'] in lv_by_node[inst][node]:
3303
                val = inst.name
3304
                break
3305
            else:
3306
              val = '-'
3307
          else:
3308
            raise errors.ParameterError(field)
3309
          node_output.append(str(val))
3310

    
3311
        output.append(node_output)
3312

    
3313
    return output
3314

    
3315

    
3316
class LUQueryNodeStorage(NoHooksLU):
3317
  """Logical unit for getting information on storage units on node(s).
3318

3319
  """
3320
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
3321
  _OP_DEFS = [("name", None)]
3322
  REQ_BGL = False
3323
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3324

    
3325
  def CheckArguments(self):
3326
    _CheckStorageType(self.op.storage_type)
3327

    
3328
    _CheckOutputFields(static=self._FIELDS_STATIC,
3329
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3330
                       selected=self.op.output_fields)
3331

    
3332
  def ExpandNames(self):
3333
    self.needed_locks = {}
3334
    self.share_locks[locking.LEVEL_NODE] = 1
3335

    
3336
    if self.op.nodes:
3337
      self.needed_locks[locking.LEVEL_NODE] = \
3338
        _GetWantedNodes(self, self.op.nodes)
3339
    else:
3340
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3341

    
3342
  def Exec(self, feedback_fn):
3343
    """Computes the list of nodes and their attributes.
3344

3345
    """
3346
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3347

    
3348
    # Always get name to sort by
3349
    if constants.SF_NAME in self.op.output_fields:
3350
      fields = self.op.output_fields[:]
3351
    else:
3352
      fields = [constants.SF_NAME] + self.op.output_fields
3353

    
3354
    # Never ask for node or type as it's only known to the LU
3355
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3356
      while extra in fields:
3357
        fields.remove(extra)
3358

    
3359
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3360
    name_idx = field_idx[constants.SF_NAME]
3361

    
3362
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3363
    data = self.rpc.call_storage_list(self.nodes,
3364
                                      self.op.storage_type, st_args,
3365
                                      self.op.name, fields)
3366

    
3367
    result = []
3368

    
3369
    for node in utils.NiceSort(self.nodes):
3370
      nresult = data[node]
3371
      if nresult.offline:
3372
        continue
3373

    
3374
      msg = nresult.fail_msg
3375
      if msg:
3376
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3377
        continue
3378

    
3379
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3380

    
3381
      for name in utils.NiceSort(rows.keys()):
3382
        row = rows[name]
3383

    
3384
        out = []
3385

    
3386
        for field in self.op.output_fields:
3387
          if field == constants.SF_NODE:
3388
            val = node
3389
          elif field == constants.SF_TYPE:
3390
            val = self.op.storage_type
3391
          elif field in field_idx:
3392
            val = row[field_idx[field]]
3393
          else:
3394
            raise errors.ParameterError(field)
3395

    
3396
          out.append(val)
3397

    
3398
        result.append(out)
3399

    
3400
    return result
3401

    
3402

    
3403
class LUModifyNodeStorage(NoHooksLU):
3404
  """Logical unit for modifying a storage volume on a node.
3405

3406
  """
3407
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3408
  REQ_BGL = False
3409

    
3410
  def CheckArguments(self):
3411
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3412

    
3413
    _CheckStorageType(self.op.storage_type)
3414

    
3415
  def ExpandNames(self):
3416
    self.needed_locks = {
3417
      locking.LEVEL_NODE: self.op.node_name,
3418
      }
3419

    
3420
  def CheckPrereq(self):
3421
    """Check prerequisites.
3422

3423
    """
3424
    storage_type = self.op.storage_type
3425

    
3426
    try:
3427
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3428
    except KeyError:
3429
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3430
                                 " modified" % storage_type,
3431
                                 errors.ECODE_INVAL)
3432

    
3433
    diff = set(self.op.changes.keys()) - modifiable
3434
    if diff:
3435
      raise errors.OpPrereqError("The following fields can not be modified for"
3436
                                 " storage units of type '%s': %r" %
3437
                                 (storage_type, list(diff)),
3438
                                 errors.ECODE_INVAL)
3439

    
3440
  def Exec(self, feedback_fn):
3441
    """Computes the list of nodes and their attributes.
3442

3443
    """
3444
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3445
    result = self.rpc.call_storage_modify(self.op.node_name,
3446
                                          self.op.storage_type, st_args,
3447
                                          self.op.name, self.op.changes)
3448
    result.Raise("Failed to modify storage unit '%s' on %s" %
3449
                 (self.op.name, self.op.node_name))
3450

    
3451

    
3452
class LUAddNode(LogicalUnit):
3453
  """Logical unit for adding node to the cluster.
3454

3455
  """
3456
  HPATH = "node-add"
3457
  HTYPE = constants.HTYPE_NODE
3458
  _OP_REQP = ["node_name"]
3459
  _OP_DEFS = [("secondary_ip", None)]
3460

    
3461
  def CheckArguments(self):
3462
    # validate/normalize the node name
3463
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3464

    
3465
  def BuildHooksEnv(self):
3466
    """Build hooks env.
3467

3468
    This will run on all nodes before, and on all nodes + the new node after.
3469

3470
    """
3471
    env = {
3472
      "OP_TARGET": self.op.node_name,
3473
      "NODE_NAME": self.op.node_name,
3474
      "NODE_PIP": self.op.primary_ip,
3475
      "NODE_SIP": self.op.secondary_ip,
3476
      }
3477
    nodes_0 = self.cfg.GetNodeList()
3478
    nodes_1 = nodes_0 + [self.op.node_name, ]
3479
    return env, nodes_0, nodes_1
3480

    
3481
  def CheckPrereq(self):
3482
    """Check prerequisites.
3483

3484
    This checks:
3485
     - the new node is not already in the config
3486
     - it is resolvable
3487
     - its parameters (single/dual homed) matches the cluster
3488

3489
    Any errors are signaled by raising errors.OpPrereqError.
3490

3491
    """
3492
    node_name = self.op.node_name
3493
    cfg = self.cfg
3494

    
3495
    dns_data = utils.GetHostInfo(node_name)
3496

    
3497
    node = dns_data.name
3498
    primary_ip = self.op.primary_ip = dns_data.ip
3499
    if self.op.secondary_ip is None:
3500
      self.op.secondary_ip = primary_ip
3501
    if not utils.IsValidIP(self.op.secondary_ip):
3502
      raise errors.OpPrereqError("Invalid secondary IP given",
3503
                                 errors.ECODE_INVAL)
3504
    secondary_ip = self.op.secondary_ip
3505

    
3506
    node_list = cfg.GetNodeList()
3507
    if not self.op.readd and node in node_list:
3508
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3509
                                 node, errors.ECODE_EXISTS)
3510
    elif self.op.readd and node not in node_list:
3511
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3512
                                 errors.ECODE_NOENT)
3513

    
3514
    self.changed_primary_ip = False
3515

    
3516
    for existing_node_name in node_list:
3517
      existing_node = cfg.GetNodeInfo(existing_node_name)
3518

    
3519
      if self.op.readd and node == existing_node_name:
3520
        if existing_node.secondary_ip != secondary_ip:
3521
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3522
                                     " address configuration as before",
3523
                                     errors.ECODE_INVAL)
3524
        if existing_node.primary_ip != primary_ip:
3525
          self.changed_primary_ip = True
3526

    
3527
        continue
3528

    
3529
      if (existing_node.primary_ip == primary_ip or
3530
          existing_node.secondary_ip == primary_ip or
3531
          existing_node.primary_ip == secondary_ip or
3532
          existing_node.secondary_ip == secondary_ip):
3533
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3534
                                   " existing node %s" % existing_node.name,
3535
                                   errors.ECODE_NOTUNIQUE)
3536

    
3537
    # check that the type of the node (single versus dual homed) is the
3538
    # same as for the master
3539
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3540
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3541
    newbie_singlehomed = secondary_ip == primary_ip
3542
    if master_singlehomed != newbie_singlehomed:
3543
      if master_singlehomed:
3544
        raise errors.OpPrereqError("The master has no private ip but the"
3545
                                   " new node has one",
3546
                                   errors.ECODE_INVAL)
3547
      else:
3548
        raise errors.OpPrereqError("The master has a private ip but the"
3549
                                   " new node doesn't have one",
3550
                                   errors.ECODE_INVAL)
3551

    
3552
    # checks reachability
3553
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3554
      raise errors.OpPrereqError("Node not reachable by ping",
3555
                                 errors.ECODE_ENVIRON)
3556

    
3557
    if not newbie_singlehomed:
3558
      # check reachability from my secondary ip to newbie's secondary ip
3559
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3560
                           source=myself.secondary_ip):
3561
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3562
                                   " based ping to noded port",
3563
                                   errors.ECODE_ENVIRON)
3564

    
3565
    if self.op.readd:
3566
      exceptions = [node]
3567
    else:
3568
      exceptions = []
3569

    
3570
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3571

    
3572
    if self.op.readd:
3573
      self.new_node = self.cfg.GetNodeInfo(node)
3574
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3575
    else:
3576
      self.new_node = objects.Node(name=node,
3577
                                   primary_ip=primary_ip,
3578
                                   secondary_ip=secondary_ip,
3579
                                   master_candidate=self.master_candidate,
3580
                                   offline=False, drained=False)
3581

    
3582
  def Exec(self, feedback_fn):
3583
    """Adds the new node to the cluster.
3584

3585
    """
3586
    new_node = self.new_node
3587
    node = new_node.name
3588

    
3589
    # for re-adds, reset the offline/drained/master-candidate flags;
3590
    # we need to reset here, otherwise offline would prevent RPC calls
3591
    # later in the procedure; this also means that if the re-add
3592
    # fails, we are left with a non-offlined, broken node
3593
    if self.op.readd:
3594
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3595
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3596
      # if we demote the node, we do cleanup later in the procedure
3597
      new_node.master_candidate = self.master_candidate
3598
      if self.changed_primary_ip:
3599
        new_node.primary_ip = self.op.primary_ip
3600

    
3601
    # notify the user about any possible mc promotion
3602
    if new_node.master_candidate:
3603
      self.LogInfo("Node will be a master candidate")
3604

    
3605
    # check connectivity
3606
    result = self.rpc.call_version([node])[node]
3607
    result.Raise("Can't get version information from node %s" % node)
3608
    if constants.PROTOCOL_VERSION == result.payload:
3609
      logging.info("Communication to node %s fine, sw version %s match",
3610
                   node, result.payload)
3611
    else:
3612
      raise errors.OpExecError("Version mismatch master version %s,"
3613
                               " node version %s" %
3614
                               (constants.PROTOCOL_VERSION, result.payload))
3615

    
3616
    # setup ssh on node
3617
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3618
      logging.info("Copy ssh key to node %s", node)
3619
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3620
      keyarray = []
3621
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3622
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3623
                  priv_key, pub_key]
3624

    
3625
      for i in keyfiles:
3626
        keyarray.append(utils.ReadFile(i))
3627

    
3628
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3629
                                      keyarray[2], keyarray[3], keyarray[4],
3630
                                      keyarray[5])
3631
      result.Raise("Cannot transfer ssh keys to the new node")
3632

    
3633
    # Add node to our /etc/hosts, and add key to known_hosts
3634
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3635
      # FIXME: this should be done via an rpc call to node daemon
3636
      utils.AddHostToEtcHosts(new_node.name)
3637

    
3638
    if new_node.secondary_ip != new_node.primary_ip:
3639
      result = self.rpc.call_node_has_ip_address(new_node.name,
3640
                                                 new_node.secondary_ip)
3641
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3642
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3643
      if not result.payload:
3644
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3645
                                 " you gave (%s). Please fix and re-run this"
3646
                                 " command." % new_node.secondary_ip)
3647

    
3648
    node_verify_list = [self.cfg.GetMasterNode()]
3649
    node_verify_param = {
3650
      constants.NV_NODELIST: [node],
3651
      # TODO: do a node-net-test as well?
3652
    }
3653

    
3654
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3655
                                       self.cfg.GetClusterName())
3656
    for verifier in node_verify_list:
3657
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3658
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3659
      if nl_payload:
3660
        for failed in nl_payload:
3661
          feedback_fn("ssh/hostname verification failed"
3662
                      " (checking from %s): %s" %
3663
                      (verifier, nl_payload[failed]))
3664
        raise errors.OpExecError("ssh/hostname verification failed.")
3665

    
3666
    if self.op.readd:
3667
      _RedistributeAncillaryFiles(self)
3668
      self.context.ReaddNode(new_node)
3669
      # make sure we redistribute the config
3670
      self.cfg.Update(new_node, feedback_fn)
3671
      # and make sure the new node will not have old files around
3672
      if not new_node.master_candidate:
3673
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3674
        msg = result.fail_msg
3675
        if msg:
3676
          self.LogWarning("Node failed to demote itself from master"
3677
                          " candidate status: %s" % msg)
3678
    else:
3679
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3680
      self.context.AddNode(new_node, self.proc.GetECId())
3681

    
3682

    
3683
class LUSetNodeParams(LogicalUnit):
3684
  """Modifies the parameters of a node.
3685

3686
  """
3687
  HPATH = "node-modify"
3688
  HTYPE = constants.HTYPE_NODE
3689
  _OP_REQP = ["node_name"]
3690
  REQ_BGL = False
3691

    
3692
  def CheckArguments(self):
3693
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3694
    _CheckBooleanOpField(self.op, 'master_candidate')
3695
    _CheckBooleanOpField(self.op, 'offline')
3696
    _CheckBooleanOpField(self.op, 'drained')
3697
    _CheckBooleanOpField(self.op, 'auto_promote')
3698
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3699
    if all_mods.count(None) == 3:
3700
      raise errors.OpPrereqError("Please pass at least one modification",
3701
                                 errors.ECODE_INVAL)
3702
    if all_mods.count(True) > 1:
3703
      raise errors.OpPrereqError("Can't set the node into more than one"
3704
                                 " state at the same time",
3705
                                 errors.ECODE_INVAL)
3706

    
3707
    # Boolean value that tells us whether we're offlining or draining the node
3708
    self.offline_or_drain = (self.op.offline == True or
3709
                             self.op.drained == True)
3710
    self.deoffline_or_drain = (self.op.offline == False or
3711
                               self.op.drained == False)
3712
    self.might_demote = (self.op.master_candidate == False or
3713
                         self.offline_or_drain)
3714

    
3715
    self.lock_all = self.op.auto_promote and self.might_demote
3716

    
3717

    
3718
  def ExpandNames(self):
3719
    if self.lock_all:
3720
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3721
    else:
3722
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3723

    
3724
  def BuildHooksEnv(self):
3725
    """Build hooks env.
3726

3727
    This runs on the master node.
3728

3729
    """
3730
    env = {
3731
      "OP_TARGET": self.op.node_name,
3732
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3733
      "OFFLINE": str(self.op.offline),
3734
      "DRAINED": str(self.op.drained),
3735
      }
3736
    nl = [self.cfg.GetMasterNode(),
3737
          self.op.node_name]
3738
    return env, nl, nl
3739

    
3740
  def CheckPrereq(self):
3741
    """Check prerequisites.
3742

3743
    This only checks the instance list against the existing names.
3744

3745
    """
3746
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3747

    
3748
    if (self.op.master_candidate is not None or
3749
        self.op.drained is not None or
3750
        self.op.offline is not None):
3751
      # we can't change the master's node flags
3752
      if self.op.node_name == self.cfg.GetMasterNode():
3753
        raise errors.OpPrereqError("The master role can be changed"
3754
                                   " only via masterfailover",
3755
                                   errors.ECODE_INVAL)
3756

    
3757

    
3758
    if node.master_candidate and self.might_demote and not self.lock_all:
3759
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3760
      # check if after removing the current node, we're missing master
3761
      # candidates
3762
      (mc_remaining, mc_should, _) = \
3763
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3764
      if mc_remaining < mc_should:
3765
        raise errors.OpPrereqError("Not enough master candidates, please"
3766
                                   " pass auto_promote to allow promotion",
3767
                                   errors.ECODE_INVAL)
3768

    
3769
    if (self.op.master_candidate == True and
3770
        ((node.offline and not self.op.offline == False) or
3771
         (node.drained and not self.op.drained == False))):
3772
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3773
                                 " to master_candidate" % node.name,
3774
                                 errors.ECODE_INVAL)
3775

    
3776
    # If we're being deofflined/drained, we'll MC ourself if needed
3777
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3778
        self.op.master_candidate == True and not node.master_candidate):
3779
      self.op.master_candidate = _DecideSelfPromotion(self)
3780
      if self.op.master_candidate:
3781
        self.LogInfo("Autopromoting node to master candidate")
3782

    
3783
    return
3784

    
3785
  def Exec(self, feedback_fn):
3786
    """Modifies a node.
3787

3788
    """
3789
    node = self.node
3790

    
3791
    result = []
3792
    changed_mc = False
3793

    
3794
    if self.op.offline is not None:
3795
      node.offline = self.op.offline
3796
      result.append(("offline", str(self.op.offline)))
3797
      if self.op.offline == True:
3798
        if node.master_candidate:
3799
          node.master_candidate = False
3800
          changed_mc = True
3801
          result.append(("master_candidate", "auto-demotion due to offline"))
3802
        if node.drained:
3803
          node.drained = False
3804
          result.append(("drained", "clear drained status due to offline"))
3805

    
3806
    if self.op.master_candidate is not None:
3807
      node.master_candidate = self.op.master_candidate
3808
      changed_mc = True
3809
      result.append(("master_candidate", str(self.op.master_candidate)))
3810
      if self.op.master_candidate == False:
3811
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3812
        msg = rrc.fail_msg
3813
        if msg:
3814
          self.LogWarning("Node failed to demote itself: %s" % msg)
3815

    
3816
    if self.op.drained is not None:
3817
      node.drained = self.op.drained
3818
      result.append(("drained", str(self.op.drained)))
3819
      if self.op.drained == True:
3820
        if node.master_candidate:
3821
          node.master_candidate = False
3822
          changed_mc = True
3823
          result.append(("master_candidate", "auto-demotion due to drain"))
3824
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3825
          msg = rrc.fail_msg
3826
          if msg:
3827
            self.LogWarning("Node failed to demote itself: %s" % msg)
3828
        if node.offline:
3829
          node.offline = False
3830
          result.append(("offline", "clear offline status due to drain"))
3831

    
3832
    # we locked all nodes, we adjust the CP before updating this node
3833
    if self.lock_all:
3834
      _AdjustCandidatePool(self, [node.name])
3835

    
3836
    # this will trigger configuration file update, if needed
3837
    self.cfg.Update(node, feedback_fn)
3838

    
3839
    # this will trigger job queue propagation or cleanup
3840
    if changed_mc:
3841
      self.context.ReaddNode(node)
3842

    
3843
    return result
3844

    
3845

    
3846
class LUPowercycleNode(NoHooksLU):
3847
  """Powercycles a node.
3848

3849
  """
3850
  _OP_REQP = ["node_name", "force"]
3851
  REQ_BGL = False
3852

    
3853
  def CheckArguments(self):
3854
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3855
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3856
      raise errors.OpPrereqError("The node is the master and the force"
3857
                                 " parameter was not set",
3858
                                 errors.ECODE_INVAL)
3859

    
3860
  def ExpandNames(self):
3861
    """Locking for PowercycleNode.
3862

3863
    This is a last-resort option and shouldn't block on other
3864
    jobs. Therefore, we grab no locks.
3865

3866
    """
3867
    self.needed_locks = {}
3868

    
3869
  def Exec(self, feedback_fn):
3870
    """Reboots a node.
3871

3872
    """
3873
    result = self.rpc.call_node_powercycle(self.op.node_name,
3874
                                           self.cfg.GetHypervisorType())
3875
    result.Raise("Failed to schedule the reboot")
3876
    return result.payload
3877

    
3878

    
3879
class LUQueryClusterInfo(NoHooksLU):
3880
  """Query cluster configuration.
3881

3882
  """
3883
  _OP_REQP = []
3884
  REQ_BGL = False
3885

    
3886
  def ExpandNames(self):
3887
    self.needed_locks = {}
3888

    
3889
  def Exec(self, feedback_fn):
3890
    """Return cluster config.
3891

3892
    """
3893
    cluster = self.cfg.GetClusterInfo()
3894
    os_hvp = {}
3895

    
3896
    # Filter just for enabled hypervisors
3897
    for os_name, hv_dict in cluster.os_hvp.items():
3898
      os_hvp[os_name] = {}
3899
      for hv_name, hv_params in hv_dict.items():
3900
        if hv_name in cluster.enabled_hypervisors:
3901
          os_hvp[os_name][hv_name] = hv_params
3902

    
3903
    result = {
3904
      "software_version": constants.RELEASE_VERSION,
3905
      "protocol_version": constants.PROTOCOL_VERSION,
3906
      "config_version": constants.CONFIG_VERSION,
3907
      "os_api_version": max(constants.OS_API_VERSIONS),
3908
      "export_version": constants.EXPORT_VERSION,
3909
      "architecture": (platform.architecture()[0], platform.machine()),
3910
      "name": cluster.cluster_name,
3911
      "master": cluster.master_node,
3912
      "default_hypervisor": cluster.enabled_hypervisors[0],
3913
      "enabled_hypervisors": cluster.enabled_hypervisors,
3914
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3915
                        for hypervisor_name in cluster.enabled_hypervisors]),
3916
      "os_hvp": os_hvp,
3917
      "beparams": cluster.beparams,
3918
      "osparams": cluster.osparams,
3919
      "nicparams": cluster.nicparams,
3920
      "candidate_pool_size": cluster.candidate_pool_size,
3921
      "master_netdev": cluster.master_netdev,
3922
      "volume_group_name": cluster.volume_group_name,
3923
      "file_storage_dir": cluster.file_storage_dir,
3924
      "maintain_node_health": cluster.maintain_node_health,
3925
      "ctime": cluster.ctime,
3926
      "mtime": cluster.mtime,
3927
      "uuid": cluster.uuid,
3928
      "tags": list(cluster.GetTags()),
3929
      "uid_pool": cluster.uid_pool,
3930
      }
3931

    
3932
    return result
3933

    
3934

    
3935
class LUQueryConfigValues(NoHooksLU):
3936
  """Return configuration values.
3937

3938
  """
3939
  _OP_REQP = []
3940
  REQ_BGL = False
3941
  _FIELDS_DYNAMIC = utils.FieldSet()
3942
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3943
                                  "watcher_pause")
3944

    
3945
  def CheckArguments(self):
3946
    _CheckOutputFields(static=self._FIELDS_STATIC,
3947
                       dynamic=self._FIELDS_DYNAMIC,
3948
                       selected=self.op.output_fields)
3949

    
3950
  def ExpandNames(self):
3951
    self.needed_locks = {}
3952

    
3953
  def Exec(self, feedback_fn):
3954
    """Dump a representation of the cluster config to the standard output.
3955

3956
    """
3957
    values = []
3958
    for field in self.op.output_fields:
3959
      if field == "cluster_name":
3960
        entry = self.cfg.GetClusterName()
3961
      elif field == "master_node":
3962
        entry = self.cfg.GetMasterNode()
3963
      elif field == "drain_flag":
3964
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3965
      elif field == "watcher_pause":
3966
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3967
      else:
3968
        raise errors.ParameterError(field)
3969
      values.append(entry)
3970
    return values
3971

    
3972

    
3973
class LUActivateInstanceDisks(NoHooksLU):
3974
  """Bring up an instance's disks.
3975

3976
  """
3977
  _OP_REQP = ["instance_name"]
3978
  _OP_DEFS = [("ignore_size", False)]
3979
  REQ_BGL = False
3980

    
3981
  def ExpandNames(self):
3982
    self._ExpandAndLockInstance()
3983
    self.needed_locks[locking.LEVEL_NODE] = []
3984
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3985

    
3986
  def DeclareLocks(self, level):
3987
    if level == locking.LEVEL_NODE:
3988
      self._LockInstancesNodes()
3989

    
3990
  def CheckPrereq(self):
3991
    """Check prerequisites.
3992

3993
    This checks that the instance is in the cluster.
3994

3995
    """
3996
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3997
    assert self.instance is not None, \
3998
      "Cannot retrieve locked instance %s" % self.op.instance_name
3999
    _CheckNodeOnline(self, self.instance.primary_node)
4000

    
4001
  def Exec(self, feedback_fn):
4002
    """Activate the disks.
4003

4004
    """
4005
    disks_ok, disks_info = \
4006
              _AssembleInstanceDisks(self, self.instance,
4007
                                     ignore_size=self.op.ignore_size)
4008
    if not disks_ok:
4009
      raise errors.OpExecError("Cannot activate block devices")
4010

    
4011
    return disks_info
4012

    
4013

    
4014
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4015
                           ignore_size=False):
4016
  """Prepare the block devices for an instance.
4017

4018
  This sets up the block devices on all nodes.
4019

4020
  @type lu: L{LogicalUnit}
4021
  @param lu: the logical unit on whose behalf we execute
4022
  @type instance: L{objects.Instance}
4023
  @param instance: the instance for whose disks we assemble
4024
  @type disks: list of L{objects.Disk} or None
4025
  @param disks: which disks to assemble (or all, if None)
4026
  @type ignore_secondaries: boolean
4027
  @param ignore_secondaries: if true, errors on secondary nodes
4028
      won't result in an error return from the function
4029
  @type ignore_size: boolean
4030
  @param ignore_size: if true, the current known size of the disk
4031
      will not be used during the disk activation, useful for cases
4032
      when the size is wrong
4033
  @return: False if the operation failed, otherwise a list of
4034
      (host, instance_visible_name, node_visible_name)
4035
      with the mapping from node devices to instance devices
4036

4037
  """
4038
  device_info = []
4039
  disks_ok = True
4040
  iname = instance.name
4041
  disks = _ExpandCheckDisks(instance, disks)
4042

    
4043
  # With the two passes mechanism we try to reduce the window of
4044
  # opportunity for the race condition of switching DRBD to primary
4045
  # before handshaking occured, but we do not eliminate it
4046

    
4047
  # The proper fix would be to wait (with some limits) until the
4048
  # connection has been made and drbd transitions from WFConnection
4049
  # into any other network-connected state (Connected, SyncTarget,
4050
  # SyncSource, etc.)
4051

    
4052
  # 1st pass, assemble on all nodes in secondary mode
4053
  for inst_disk in disks:
4054
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4055
      if ignore_size:
4056
        node_disk = node_disk.Copy()
4057
        node_disk.UnsetSize()
4058
      lu.cfg.SetDiskID(node_disk, node)
4059
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4060
      msg = result.fail_msg
4061
      if msg:
4062
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4063
                           " (is_primary=False, pass=1): %s",
4064
                           inst_disk.iv_name, node, msg)
4065
        if not ignore_secondaries:
4066
          disks_ok = False
4067

    
4068
  # FIXME: race condition on drbd migration to primary
4069

    
4070
  # 2nd pass, do only the primary node
4071
  for inst_disk in disks:
4072
    dev_path = None
4073

    
4074
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4075
      if node != instance.primary_node:
4076
        continue
4077
      if ignore_size:
4078
        node_disk = node_disk.Copy()
4079
        node_disk.UnsetSize()
4080
      lu.cfg.SetDiskID(node_disk, node)
4081
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4082
      msg = result.fail_msg
4083
      if msg:
4084
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4085
                           " (is_primary=True, pass=2): %s",
4086
                           inst_disk.iv_name, node, msg)
4087
        disks_ok = False
4088
      else:
4089
        dev_path = result.payload
4090

    
4091
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4092

    
4093
  # leave the disks configured for the primary node
4094
  # this is a workaround that would be fixed better by
4095
  # improving the logical/physical id handling
4096
  for disk in disks:
4097
    lu.cfg.SetDiskID(disk, instance.primary_node)
4098

    
4099
  return disks_ok, device_info
4100

    
4101

    
4102
def _StartInstanceDisks(lu, instance, force):
4103
  """Start the disks of an instance.
4104

4105
  """
4106
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4107
                                           ignore_secondaries=force)
4108
  if not disks_ok:
4109
    _ShutdownInstanceDisks(lu, instance)
4110
    if force is not None and not force:
4111
      lu.proc.LogWarning("", hint="If the message above refers to a"
4112
                         " secondary node,"
4113
                         " you can retry the operation using '--force'.")
4114
    raise errors.OpExecError("Disk consistency error")
4115

    
4116

    
4117
class LUDeactivateInstanceDisks(NoHooksLU):
4118
  """Shutdown an instance's disks.
4119

4120
  """
4121
  _OP_REQP = ["instance_name"]
4122
  REQ_BGL = False
4123

    
4124
  def ExpandNames(self):
4125
    self._ExpandAndLockInstance()
4126
    self.needed_locks[locking.LEVEL_NODE] = []
4127
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4128

    
4129
  def DeclareLocks(self, level):
4130
    if level == locking.LEVEL_NODE:
4131
      self._LockInstancesNodes()
4132

    
4133
  def CheckPrereq(self):
4134
    """Check prerequisites.
4135

4136
    This checks that the instance is in the cluster.
4137

4138
    """
4139
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4140
    assert self.instance is not None, \
4141
      "Cannot retrieve locked instance %s" % self.op.instance_name
4142

    
4143
  def Exec(self, feedback_fn):
4144
    """Deactivate the disks
4145

4146
    """
4147
    instance = self.instance
4148
    _SafeShutdownInstanceDisks(self, instance)
4149

    
4150

    
4151
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4152
  """Shutdown block devices of an instance.
4153

4154
  This function checks if an instance is running, before calling
4155
  _ShutdownInstanceDisks.
4156

4157
  """
4158
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4159
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4160

    
4161

    
4162
def _ExpandCheckDisks(instance, disks):
4163
  """Return the instance disks selected by the disks list
4164

4165
  @type disks: list of L{objects.Disk} or None
4166
  @param disks: selected disks
4167
  @rtype: list of L{objects.Disk}
4168
  @return: selected instance disks to act on
4169

4170
  """
4171
  if disks is None:
4172
    return instance.disks
4173
  else:
4174
    if not set(disks).issubset(instance.disks):
4175
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4176
                                   " target instance")
4177
    return disks
4178

    
4179

    
4180
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4181
  """Shutdown block devices of an instance.
4182

4183
  This does the shutdown on all nodes of the instance.
4184

4185
  If the ignore_primary is false, errors on the primary node are
4186
  ignored.
4187

4188
  """
4189
  all_result = True
4190
  disks = _ExpandCheckDisks(instance, disks)
4191

    
4192
  for disk in disks:
4193
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4194
      lu.cfg.SetDiskID(top_disk, node)
4195
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4196
      msg = result.fail_msg
4197
      if msg:
4198
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4199
                      disk.iv_name, node, msg)
4200
        if not ignore_primary or node != instance.primary_node:
4201
          all_result = False
4202
  return all_result
4203

    
4204

    
4205
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4206
  """Checks if a node has enough free memory.
4207

4208
  This function check if a given node has the needed amount of free
4209
  memory. In case the node has less memory or we cannot get the
4210
  information from the node, this function raise an OpPrereqError
4211
  exception.
4212

4213
  @type lu: C{LogicalUnit}
4214
  @param lu: a logical unit from which we get configuration data
4215
  @type node: C{str}
4216
  @param node: the node to check
4217
  @type reason: C{str}
4218
  @param reason: string to use in the error message
4219
  @type requested: C{int}
4220
  @param requested: the amount of memory in MiB to check for
4221
  @type hypervisor_name: C{str}
4222
  @param hypervisor_name: the hypervisor to ask for memory stats
4223
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4224
      we cannot check the node
4225

4226
  """
4227
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4228
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4229
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4230
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4231
  if not isinstance(free_mem, int):
4232
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4233
                               " was '%s'" % (node, free_mem),
4234
                               errors.ECODE_ENVIRON)
4235
  if requested > free_mem:
4236
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4237
                               " needed %s MiB, available %s MiB" %
4238
                               (node, reason, requested, free_mem),
4239
                               errors.ECODE_NORES)
4240

    
4241

    
4242
def _CheckNodesFreeDisk(lu, nodenames, requested):
4243
  """Checks if nodes have enough free disk space in the default VG.
4244

4245
  This function check if all given nodes have the needed amount of
4246
  free disk. In case any node has less disk or we cannot get the
4247
  information from the node, this function raise an OpPrereqError
4248
  exception.
4249

4250
  @type lu: C{LogicalUnit}
4251
  @param lu: a logical unit from which we get configuration data
4252
  @type nodenames: C{list}
4253
  @param nodenames: the list of node names to check
4254
  @type requested: C{int}
4255
  @param requested: the amount of disk in MiB to check for
4256
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4257
      we cannot check the node
4258

4259
  """
4260
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4261
                                   lu.cfg.GetHypervisorType())
4262
  for node in nodenames:
4263
    info = nodeinfo[node]
4264
    info.Raise("Cannot get current information from node %s" % node,
4265
               prereq=True, ecode=errors.ECODE_ENVIRON)
4266
    vg_free = info.payload.get("vg_free", None)
4267
    if not isinstance(vg_free, int):
4268
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4269
                                 " result was '%s'" % (node, vg_free),
4270
                                 errors.ECODE_ENVIRON)
4271
    if requested > vg_free:
4272
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
4273
                                 " required %d MiB, available %d MiB" %
4274
                                 (node, requested, vg_free),
4275
                                 errors.ECODE_NORES)
4276

    
4277

    
4278
class LUStartupInstance(LogicalUnit):
4279
  """Starts an instance.
4280

4281
  """
4282
  HPATH = "instance-start"
4283
  HTYPE = constants.HTYPE_INSTANCE
4284
  _OP_REQP = ["instance_name", "force"]
4285
  _OP_DEFS = [
4286
    ("beparams", _EmptyDict),
4287
    ("hvparams", _EmptyDict),
4288
    ]
4289
  REQ_BGL = False
4290

    
4291
  def ExpandNames(self):
4292
    self._ExpandAndLockInstance()
4293

    
4294
  def BuildHooksEnv(self):
4295
    """Build hooks env.
4296

4297
    This runs on master, primary and secondary nodes of the instance.
4298

4299
    """
4300
    env = {
4301
      "FORCE": self.op.force,
4302
      }
4303
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4304
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4305
    return env, nl, nl
4306

    
4307
  def CheckPrereq(self):
4308
    """Check prerequisites.
4309

4310
    This checks that the instance is in the cluster.
4311

4312
    """
4313
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4314
    assert self.instance is not None, \
4315
      "Cannot retrieve locked instance %s" % self.op.instance_name
4316

    
4317
    # extra beparams
4318
    if self.op.beparams:
4319
      if not isinstance(self.op.beparams, dict):
4320
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4321
                                   " dict" % (type(self.op.beparams), ),
4322
                                   errors.ECODE_INVAL)
4323
      # fill the beparams dict
4324
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4325

    
4326
    # extra hvparams
4327
    if self.op.hvparams:
4328
      if not isinstance(self.op.hvparams, dict):
4329
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4330
                                   " dict" % (type(self.op.hvparams), ),
4331
                                   errors.ECODE_INVAL)
4332

    
4333
      # check hypervisor parameter syntax (locally)
4334
      cluster = self.cfg.GetClusterInfo()
4335
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4336
      filled_hvp = cluster.FillHV(instance)
4337
      filled_hvp.update(self.op.hvparams)
4338
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4339
      hv_type.CheckParameterSyntax(filled_hvp)
4340
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4341

    
4342
    _CheckNodeOnline(self, instance.primary_node)
4343

    
4344
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4345
    # check bridges existence
4346
    _CheckInstanceBridgesExist(self, instance)
4347

    
4348
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4349
                                              instance.name,
4350
                                              instance.hypervisor)
4351
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4352
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4353
    if not remote_info.payload: # not running already
4354
      _CheckNodeFreeMemory(self, instance.primary_node,
4355
                           "starting instance %s" % instance.name,
4356
                           bep[constants.BE_MEMORY], instance.hypervisor)
4357

    
4358
  def Exec(self, feedback_fn):
4359
    """Start the instance.
4360

4361
    """
4362
    instance = self.instance
4363
    force = self.op.force
4364

    
4365
    self.cfg.MarkInstanceUp(instance.name)
4366

    
4367
    node_current = instance.primary_node
4368

    
4369
    _StartInstanceDisks(self, instance, force)
4370

    
4371
    result = self.rpc.call_instance_start(node_current, instance,
4372
                                          self.op.hvparams, self.op.beparams)
4373
    msg = result.fail_msg
4374
    if msg:
4375
      _ShutdownInstanceDisks(self, instance)
4376
      raise errors.OpExecError("Could not start instance: %s" % msg)
4377

    
4378

    
4379
class LURebootInstance(LogicalUnit):
4380
  """Reboot an instance.
4381

4382
  """
4383
  HPATH = "instance-reboot"
4384
  HTYPE = constants.HTYPE_INSTANCE
4385
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4386
  _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4387
  REQ_BGL = False
4388

    
4389
  def CheckArguments(self):
4390
    if self.op.reboot_type not in constants.REBOOT_TYPES:
4391
      raise errors.OpPrereqError("Invalid reboot type '%s', not one of %s" %
4392
                                  (self.op.reboot_type,
4393
                                   utils.CommaJoin(constants.REBOOT_TYPES)),
4394
                                 errors.ECODE_INVAL)
4395

    
4396
  def ExpandNames(self):
4397
    self._ExpandAndLockInstance()
4398

    
4399
  def BuildHooksEnv(self):
4400
    """Build hooks env.
4401

4402
    This runs on master, primary and secondary nodes of the instance.
4403

4404
    """
4405
    env = {
4406
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4407
      "REBOOT_TYPE": self.op.reboot_type,
4408
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4409
      }
4410
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4411
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4412
    return env, nl, nl
4413

    
4414
  def CheckPrereq(self):
4415
    """Check prerequisites.
4416

4417
    This checks that the instance is in the cluster.
4418

4419
    """
4420
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4421
    assert self.instance is not None, \
4422
      "Cannot retrieve locked instance %s" % self.op.instance_name
4423

    
4424
    _CheckNodeOnline(self, instance.primary_node)
4425

    
4426
    # check bridges existence
4427
    _CheckInstanceBridgesExist(self, instance)
4428

    
4429
  def Exec(self, feedback_fn):
4430
    """Reboot the instance.
4431

4432
    """
4433
    instance = self.instance
4434
    ignore_secondaries = self.op.ignore_secondaries
4435
    reboot_type = self.op.reboot_type
4436

    
4437
    node_current = instance.primary_node
4438

    
4439
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4440
                       constants.INSTANCE_REBOOT_HARD]:
4441
      for disk in instance.disks:
4442
        self.cfg.SetDiskID(disk, node_current)
4443
      result = self.rpc.call_instance_reboot(node_current, instance,
4444
                                             reboot_type,
4445
                                             self.op.shutdown_timeout)
4446
      result.Raise("Could not reboot instance")
4447
    else:
4448
      result = self.rpc.call_instance_shutdown(node_current, instance,
4449
                                               self.op.shutdown_timeout)
4450
      result.Raise("Could not shutdown instance for full reboot")
4451
      _ShutdownInstanceDisks(self, instance)
4452
      _StartInstanceDisks(self, instance, ignore_secondaries)
4453
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4454
      msg = result.fail_msg
4455
      if msg:
4456
        _ShutdownInstanceDisks(self, instance)
4457
        raise errors.OpExecError("Could not start instance for"
4458
                                 " full reboot: %s" % msg)
4459

    
4460
    self.cfg.MarkInstanceUp(instance.name)
4461

    
4462

    
4463
class LUShutdownInstance(LogicalUnit):
4464
  """Shutdown an instance.
4465

4466
  """
4467
  HPATH = "instance-stop"
4468
  HTYPE = constants.HTYPE_INSTANCE
4469
  _OP_REQP = ["instance_name"]
4470
  _OP_DEFS = [("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4471
  REQ_BGL = False
4472

    
4473
  def ExpandNames(self):
4474
    self._ExpandAndLockInstance()
4475

    
4476
  def BuildHooksEnv(self):
4477
    """Build hooks env.
4478

4479
    This runs on master, primary and secondary nodes of the instance.
4480

4481
    """
4482
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4483
    env["TIMEOUT"] = self.op.timeout
4484
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4485
    return env, nl, nl
4486

    
4487
  def CheckPrereq(self):
4488
    """Check prerequisites.
4489

4490
    This checks that the instance is in the cluster.
4491

4492
    """
4493
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4494
    assert self.instance is not None, \
4495
      "Cannot retrieve locked instance %s" % self.op.instance_name
4496
    _CheckNodeOnline(self, self.instance.primary_node)
4497

    
4498
  def Exec(self, feedback_fn):
4499
    """Shutdown the instance.
4500

4501
    """
4502
    instance = self.instance
4503
    node_current = instance.primary_node
4504
    timeout = self.op.timeout
4505
    self.cfg.MarkInstanceDown(instance.name)
4506
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4507
    msg = result.fail_msg
4508
    if msg:
4509
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4510

    
4511
    _ShutdownInstanceDisks(self, instance)
4512

    
4513

    
4514
class LUReinstallInstance(LogicalUnit):
4515
  """Reinstall an instance.
4516

4517
  """
4518
  HPATH = "instance-reinstall"
4519
  HTYPE = constants.HTYPE_INSTANCE
4520
  _OP_REQP = ["instance_name"]
4521
  _OP_DEFS = [
4522
    ("os_type", None),
4523
    ("force_variant", False),
4524
    ]
4525
  REQ_BGL = False
4526

    
4527
  def ExpandNames(self):
4528
    self._ExpandAndLockInstance()
4529

    
4530
  def BuildHooksEnv(self):
4531
    """Build hooks env.
4532

4533
    This runs on master, primary and secondary nodes of the instance.
4534

4535
    """
4536
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4537
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4538
    return env, nl, nl
4539

    
4540
  def CheckPrereq(self):
4541
    """Check prerequisites.
4542

4543
    This checks that the instance is in the cluster and is not running.
4544

4545
    """
4546
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4547
    assert instance is not None, \
4548
      "Cannot retrieve locked instance %s" % self.op.instance_name
4549
    _CheckNodeOnline(self, instance.primary_node)
4550

    
4551
    if instance.disk_template == constants.DT_DISKLESS:
4552
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4553
                                 self.op.instance_name,
4554
                                 errors.ECODE_INVAL)
4555
    _CheckInstanceDown(self, instance, "cannot reinstall")
4556

    
4557
    if self.op.os_type is not None:
4558
      # OS verification
4559
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4560
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4561

    
4562
    self.instance = instance
4563

    
4564
  def Exec(self, feedback_fn):
4565
    """Reinstall the instance.
4566

4567
    """
4568
    inst = self.instance
4569

    
4570
    if self.op.os_type is not None:
4571
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4572
      inst.os = self.op.os_type
4573
      self.cfg.Update(inst, feedback_fn)
4574

    
4575
    _StartInstanceDisks(self, inst, None)
4576
    try:
4577
      feedback_fn("Running the instance OS create scripts...")
4578
      # FIXME: pass debug option from opcode to backend
4579
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4580
                                             self.op.debug_level)
4581
      result.Raise("Could not install OS for instance %s on node %s" %
4582
                   (inst.name, inst.primary_node))
4583
    finally:
4584
      _ShutdownInstanceDisks(self, inst)
4585

    
4586

    
4587
class LURecreateInstanceDisks(LogicalUnit):
4588
  """Recreate an instance's missing disks.
4589

4590
  """
4591
  HPATH = "instance-recreate-disks"
4592
  HTYPE = constants.HTYPE_INSTANCE
4593
  _OP_REQP = ["instance_name", "disks"]
4594
  REQ_BGL = False
4595

    
4596
  def CheckArguments(self):
4597
    """Check the arguments.
4598

4599
    """
4600
    if not isinstance(self.op.disks, list):
4601
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4602
    for item in self.op.disks:
4603
      if (not isinstance(item, int) or
4604
          item < 0):
4605
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4606
                                   str(item), errors.ECODE_INVAL)
4607

    
4608
  def ExpandNames(self):
4609
    self._ExpandAndLockInstance()
4610

    
4611
  def BuildHooksEnv(self):
4612
    """Build hooks env.
4613

4614
    This runs on master, primary and secondary nodes of the instance.
4615

4616
    """
4617
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4618
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4619
    return env, nl, nl
4620

    
4621
  def CheckPrereq(self):
4622
    """Check prerequisites.
4623

4624
    This checks that the instance is in the cluster and is not running.
4625

4626
    """
4627
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4628
    assert instance is not None, \
4629
      "Cannot retrieve locked instance %s" % self.op.instance_name
4630
    _CheckNodeOnline(self, instance.primary_node)
4631

    
4632
    if instance.disk_template == constants.DT_DISKLESS:
4633
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4634
                                 self.op.instance_name, errors.ECODE_INVAL)
4635
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4636

    
4637
    if not self.op.disks:
4638
      self.op.disks = range(len(instance.disks))
4639
    else:
4640
      for idx in self.op.disks:
4641
        if idx >= len(instance.disks):
4642
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4643
                                     errors.ECODE_INVAL)
4644

    
4645
    self.instance = instance
4646

    
4647
  def Exec(self, feedback_fn):
4648
    """Recreate the disks.
4649

4650
    """
4651
    to_skip = []
4652
    for idx, _ in enumerate(self.instance.disks):
4653
      if idx not in self.op.disks: # disk idx has not been passed in
4654
        to_skip.append(idx)
4655
        continue
4656

    
4657
    _CreateDisks(self, self.instance, to_skip=to_skip)
4658

    
4659

    
4660
class LURenameInstance(LogicalUnit):
4661
  """Rename an instance.
4662

4663
  """
4664
  HPATH = "instance-rename"
4665
  HTYPE = constants.HTYPE_INSTANCE
4666
  _OP_REQP = ["instance_name", "new_name"]
4667
  _OP_DEFS = [("ignore_ip", False)]
4668

    
4669
  def BuildHooksEnv(self):
4670
    """Build hooks env.
4671

4672
    This runs on master, primary and secondary nodes of the instance.
4673

4674
    """
4675
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4676
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4677
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4678
    return env, nl, nl
4679

    
4680
  def CheckPrereq(self):
4681
    """Check prerequisites.
4682

4683
    This checks that the instance is in the cluster and is not running.
4684

4685
    """
4686
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4687
                                                self.op.instance_name)
4688
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4689
    assert instance is not None
4690
    _CheckNodeOnline(self, instance.primary_node)
4691
    _CheckInstanceDown(self, instance, "cannot rename")
4692
    self.instance = instance
4693

    
4694
    # new name verification
4695
    name_info = utils.GetHostInfo(self.op.new_name)
4696

    
4697
    self.op.new_name = new_name = name_info.name
4698
    instance_list = self.cfg.GetInstanceList()
4699
    if new_name in instance_list:
4700
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4701
                                 new_name, errors.ECODE_EXISTS)
4702

    
4703
    if not self.op.ignore_ip:
4704
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4705
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4706
                                   (name_info.ip, new_name),
4707
                                   errors.ECODE_NOTUNIQUE)
4708

    
4709

    
4710
  def Exec(self, feedback_fn):
4711
    """Reinstall the instance.
4712

4713
    """
4714
    inst = self.instance
4715
    old_name = inst.name
4716

    
4717
    if inst.disk_template == constants.DT_FILE:
4718
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4719

    
4720
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4721
    # Change the instance lock. This is definitely safe while we hold the BGL
4722
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4723
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4724

    
4725
    # re-read the instance from the configuration after rename
4726
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4727

    
4728
    if inst.disk_template == constants.DT_FILE:
4729
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4730
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4731
                                                     old_file_storage_dir,
4732
                                                     new_file_storage_dir)
4733
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4734
                   " (but the instance has been renamed in Ganeti)" %
4735
                   (inst.primary_node, old_file_storage_dir,
4736
                    new_file_storage_dir))
4737

    
4738
    _StartInstanceDisks(self, inst, None)
4739
    try:
4740
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4741
                                                 old_name, self.op.debug_level)
4742
      msg = result.fail_msg
4743
      if msg:
4744
        msg = ("Could not run OS rename script for instance %s on node %s"
4745
               " (but the instance has been renamed in Ganeti): %s" %
4746
               (inst.name, inst.primary_node, msg))
4747
        self.proc.LogWarning(msg)
4748
    finally:
4749
      _ShutdownInstanceDisks(self, inst)
4750

    
4751

    
4752
class LURemoveInstance(LogicalUnit):
4753
  """Remove an instance.
4754

4755
  """
4756
  HPATH = "instance-remove"
4757
  HTYPE = constants.HTYPE_INSTANCE
4758
  _OP_REQP = ["instance_name", "ignore_failures"]
4759
  _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4760
  REQ_BGL = False
4761

    
4762
  def ExpandNames(self):
4763
    self._ExpandAndLockInstance()
4764
    self.needed_locks[locking.LEVEL_NODE] = []
4765
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4766

    
4767
  def DeclareLocks(self, level):
4768
    if level == locking.LEVEL_NODE:
4769
      self._LockInstancesNodes()
4770

    
4771
  def BuildHooksEnv(self):
4772
    """Build hooks env.
4773

4774
    This runs on master, primary and secondary nodes of the instance.
4775

4776
    """
4777
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4778
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4779
    nl = [self.cfg.GetMasterNode()]
4780
    nl_post = list(self.instance.all_nodes) + nl
4781
    return env, nl, nl_post
4782

    
4783
  def CheckPrereq(self):
4784
    """Check prerequisites.
4785

4786
    This checks that the instance is in the cluster.
4787

4788
    """
4789
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4790
    assert self.instance is not None, \
4791
      "Cannot retrieve locked instance %s" % self.op.instance_name
4792

    
4793
  def Exec(self, feedback_fn):
4794
    """Remove the instance.
4795

4796
    """
4797
    instance = self.instance
4798
    logging.info("Shutting down instance %s on node %s",
4799
                 instance.name, instance.primary_node)
4800

    
4801
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4802
                                             self.op.shutdown_timeout)
4803
    msg = result.fail_msg
4804
    if msg:
4805
      if self.op.ignore_failures:
4806
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4807
      else:
4808
        raise errors.OpExecError("Could not shutdown instance %s on"
4809
                                 " node %s: %s" %
4810
                                 (instance.name, instance.primary_node, msg))
4811

    
4812
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4813

    
4814

    
4815
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4816
  """Utility function to remove an instance.
4817

4818
  """
4819
  logging.info("Removing block devices for instance %s", instance.name)
4820

    
4821
  if not _RemoveDisks(lu, instance):
4822
    if not ignore_failures:
4823
      raise errors.OpExecError("Can't remove instance's disks")
4824
    feedback_fn("Warning: can't remove instance's disks")
4825

    
4826
  logging.info("Removing instance %s out of cluster config", instance.name)
4827

    
4828
  lu.cfg.RemoveInstance(instance.name)
4829

    
4830
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4831
    "Instance lock removal conflict"
4832

    
4833
  # Remove lock for the instance
4834
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4835

    
4836

    
4837
class LUQueryInstances(NoHooksLU):
4838
  """Logical unit for querying instances.
4839

4840
  """
4841
  # pylint: disable-msg=W0142
4842
  _OP_REQP = ["output_fields", "names", "use_locking"]
4843
  REQ_BGL = False
4844
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4845
                    "serial_no", "ctime", "mtime", "uuid"]
4846
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4847
                                    "admin_state",
4848
                                    "disk_template", "ip", "mac", "bridge",
4849
                                    "nic_mode", "nic_link",
4850
                                    "sda_size", "sdb_size", "vcpus", "tags",
4851
                                    "network_port", "beparams",
4852
                                    r"(disk)\.(size)/([0-9]+)",
4853
                                    r"(disk)\.(sizes)", "disk_usage",
4854
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4855
                                    r"(nic)\.(bridge)/([0-9]+)",
4856
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4857
                                    r"(disk|nic)\.(count)",
4858
                                    "hvparams",
4859
                                    ] + _SIMPLE_FIELDS +
4860
                                  ["hv/%s" % name
4861
                                   for name in constants.HVS_PARAMETERS
4862
                                   if name not in constants.HVC_GLOBALS] +
4863
                                  ["be/%s" % name
4864
                                   for name in constants.BES_PARAMETERS])
4865
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4866

    
4867

    
4868
  def CheckArguments(self):
4869
    _CheckOutputFields(static=self._FIELDS_STATIC,
4870
                       dynamic=self._FIELDS_DYNAMIC,
4871
                       selected=self.op.output_fields)
4872

    
4873
  def ExpandNames(self):
4874
    self.needed_locks = {}
4875
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4876
    self.share_locks[locking.LEVEL_NODE] = 1
4877

    
4878
    if self.op.names:
4879
      self.wanted = _GetWantedInstances(self, self.op.names)
4880
    else:
4881
      self.wanted = locking.ALL_SET
4882

    
4883
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4884
    self.do_locking = self.do_node_query and self.op.use_locking
4885
    if self.do_locking:
4886
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4887
      self.needed_locks[locking.LEVEL_NODE] = []
4888
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4889

    
4890
  def DeclareLocks(self, level):
4891
    if level == locking.LEVEL_NODE and self.do_locking:
4892
      self._LockInstancesNodes()
4893

    
4894
  def Exec(self, feedback_fn):
4895
    """Computes the list of nodes and their attributes.
4896

4897
    """
4898
    # pylint: disable-msg=R0912
4899
    # way too many branches here
4900
    all_info = self.cfg.GetAllInstancesInfo()
4901
    if self.wanted == locking.ALL_SET:
4902
      # caller didn't specify instance names, so ordering is not important
4903
      if self.do_locking:
4904
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4905
      else:
4906
        instance_names = all_info.keys()
4907
      instance_names = utils.NiceSort(instance_names)
4908
    else:
4909
      # caller did specify names, so we must keep the ordering
4910
      if self.do_locking:
4911
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4912
      else:
4913
        tgt_set = all_info.keys()
4914
      missing = set(self.wanted).difference(tgt_set)
4915
      if missing:
4916
        raise errors.OpExecError("Some instances were removed before"
4917
                                 " retrieving their data: %s" % missing)
4918
      instance_names = self.wanted
4919

    
4920
    instance_list = [all_info[iname] for iname in instance_names]
4921

    
4922
    # begin data gathering
4923

    
4924
    nodes = frozenset([inst.primary_node for inst in instance_list])
4925
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4926

    
4927
    bad_nodes = []
4928
    off_nodes = []
4929
    if self.do_node_query:
4930
      live_data = {}
4931
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4932
      for name in nodes:
4933
        result = node_data[name]
4934
        if result.offline:
4935
          # offline nodes will be in both lists
4936
          off_nodes.append(name)
4937
        if result.fail_msg:
4938
          bad_nodes.append(name)
4939
        else:
4940
          if result.payload:
4941
            live_data.update(result.payload)
4942
          # else no instance is alive
4943
    else:
4944
      live_data = dict([(name, {}) for name in instance_names])
4945

    
4946
    # end data gathering
4947

    
4948
    HVPREFIX = "hv/"
4949
    BEPREFIX = "be/"
4950
    output = []
4951
    cluster = self.cfg.GetClusterInfo()
4952
    for instance in instance_list:
4953
      iout = []
4954
      i_hv = cluster.FillHV(instance, skip_globals=True)
4955
      i_be = cluster.FillBE(instance)
4956
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
4957
      for field in self.op.output_fields:
4958
        st_match = self._FIELDS_STATIC.Matches(field)
4959
        if field in self._SIMPLE_FIELDS:
4960
          val = getattr(instance, field)
4961
        elif field == "pnode":
4962
          val = instance.primary_node
4963
        elif field == "snodes":
4964
          val = list(instance.secondary_nodes)
4965
        elif field == "admin_state":
4966
          val = instance.admin_up
4967
        elif field == "oper_state":
4968
          if instance.primary_node in bad_nodes:
4969
            val = None
4970
          else:
4971
            val = bool(live_data.get(instance.name))
4972
        elif field == "status":
4973
          if instance.primary_node in off_nodes:
4974
            val = "ERROR_nodeoffline"
4975
          elif instance.primary_node in bad_nodes:
4976
            val = "ERROR_nodedown"
4977
          else:
4978
            running = bool(live_data.get(instance.name))
4979
            if running:
4980
              if instance.admin_up:
4981
                val = "running"
4982
              else:
4983
                val = "ERROR_up"
4984
            else:
4985
              if instance.admin_up:
4986
                val = "ERROR_down"
4987
              else:
4988
                val = "ADMIN_down"
4989
        elif field == "oper_ram":
4990
          if instance.primary_node in bad_nodes:
4991
            val = None
4992
          elif instance.name in live_data:
4993
            val = live_data[instance.name].get("memory", "?")
4994
          else:
4995
            val = "-"
4996
        elif field == "vcpus":
4997
          val = i_be[constants.BE_VCPUS]
4998
        elif field == "disk_template":
4999
          val = instance.disk_template
5000
        elif field == "ip":
5001
          if instance.nics:
5002
            val = instance.nics[0].ip
5003
          else:
5004
            val = None
5005
        elif field == "nic_mode":
5006
          if instance.nics:
5007
            val = i_nicp[0][constants.NIC_MODE]
5008
          else:
5009
            val = None
5010
        elif field == "nic_link":
5011
          if instance.nics:
5012
            val = i_nicp[0][constants.NIC_LINK]
5013
          else:
5014
            val = None
5015
        elif field == "bridge":
5016
          if (instance.nics and
5017
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5018
            val = i_nicp[0][constants.NIC_LINK]
5019
          else:
5020
            val = None
5021
        elif field == "mac":
5022
          if instance.nics:
5023
            val = instance.nics[0].mac
5024
          else:
5025
            val = None
5026
        elif field == "sda_size" or field == "sdb_size":
5027
          idx = ord(field[2]) - ord('a')
5028
          try:
5029
            val = instance.FindDisk(idx).size
5030
          except errors.OpPrereqError:
5031
            val = None
5032
        elif field == "disk_usage": # total disk usage per node
5033
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5034
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5035
        elif field == "tags":
5036
          val = list(instance.GetTags())
5037
        elif field == "hvparams":
5038
          val = i_hv
5039
        elif (field.startswith(HVPREFIX) and
5040
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5041
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5042
          val = i_hv.get(field[len(HVPREFIX):], None)
5043
        elif field == "beparams":
5044
          val = i_be
5045
        elif (field.startswith(BEPREFIX) and
5046
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5047
          val = i_be.get(field[len(BEPREFIX):], None)
5048
        elif st_match and st_match.groups():
5049
          # matches a variable list
5050
          st_groups = st_match.groups()
5051
          if st_groups and st_groups[0] == "disk":
5052
            if st_groups[1] == "count":
5053
              val = len(instance.disks)
5054
            elif st_groups[1] == "sizes":
5055
              val = [disk.size for disk in instance.disks]
5056
            elif st_groups[1] == "size":
5057
              try:
5058
                val = instance.FindDisk(st_groups[2]).size
5059
              except errors.OpPrereqError:
5060
                val = None
5061
            else:
5062
              assert False, "Unhandled disk parameter"
5063
          elif st_groups[0] == "nic":
5064
            if st_groups[1] == "count":
5065
              val = len(instance.nics)
5066
            elif st_groups[1] == "macs":
5067
              val = [nic.mac for nic in instance.nics]
5068
            elif st_groups[1] == "ips":
5069
              val = [nic.ip for nic in instance.nics]
5070
            elif st_groups[1] == "modes":
5071
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5072
            elif st_groups[1] == "links":
5073
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5074
            elif st_groups[1] == "bridges":
5075
              val = []
5076
              for nicp in i_nicp:
5077
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5078
                  val.append(nicp[constants.NIC_LINK])
5079
                else:
5080
                  val.append(None)
5081
            else:
5082
              # index-based item
5083
              nic_idx = int(st_groups[2])
5084
              if nic_idx >= len(instance.nics):
5085
                val = None
5086
              else:
5087
                if st_groups[1] == "mac":
5088
                  val = instance.nics[nic_idx].mac
5089
                elif st_groups[1] == "ip":
5090
                  val = instance.nics[nic_idx].ip
5091
                elif st_groups[1] == "mode":
5092
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5093
                elif st_groups[1] == "link":
5094
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5095
                elif st_groups[1] == "bridge":
5096
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5097
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5098
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5099
                  else:
5100
                    val = None
5101
                else:
5102
                  assert False, "Unhandled NIC parameter"
5103
          else:
5104
            assert False, ("Declared but unhandled variable parameter '%s'" %
5105
                           field)
5106
        else:
5107
          assert False, "Declared but unhandled parameter '%s'" % field
5108
        iout.append(val)
5109
      output.append(iout)
5110

    
5111
    return output
5112

    
5113

    
5114
class LUFailoverInstance(LogicalUnit):
5115
  """Failover an instance.
5116

5117
  """
5118
  HPATH = "instance-failover"
5119
  HTYPE = constants.HTYPE_INSTANCE
5120
  _OP_REQP = ["instance_name", "ignore_consistency"]
5121
  _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5122
  REQ_BGL = False
5123

    
5124
  def ExpandNames(self):
5125
    self._ExpandAndLockInstance()
5126
    self.needed_locks[locking.LEVEL_NODE] = []
5127
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5128

    
5129
  def DeclareLocks(self, level):
5130
    if level == locking.LEVEL_NODE:
5131
      self._LockInstancesNodes()
5132

    
5133
  def BuildHooksEnv(self):
5134
    """Build hooks env.
5135

5136
    This runs on master, primary and secondary nodes of the instance.
5137

5138
    """
5139
    instance = self.instance
5140
    source_node = instance.primary_node
5141
    target_node = instance.secondary_nodes[0]
5142
    env = {
5143
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5144
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5145
      "OLD_PRIMARY": source_node,
5146
      "OLD_SECONDARY": target_node,
5147
      "NEW_PRIMARY": target_node,
5148
      "NEW_SECONDARY": source_node,
5149
      }
5150
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5151
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5152
    nl_post = list(nl)
5153
    nl_post.append(source_node)
5154
    return env, nl, nl_post
5155

    
5156
  def CheckPrereq(self):
5157
    """Check prerequisites.
5158

5159
    This checks that the instance is in the cluster.
5160

5161
    """
5162
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5163
    assert self.instance is not None, \
5164
      "Cannot retrieve locked instance %s" % self.op.instance_name
5165

    
5166
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5167
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5168
      raise errors.OpPrereqError("Instance's disk layout is not"
5169
                                 " network mirrored, cannot failover.",
5170
                                 errors.ECODE_STATE)
5171

    
5172
    secondary_nodes = instance.secondary_nodes
5173
    if not secondary_nodes:
5174
      raise errors.ProgrammerError("no secondary node but using "
5175
                                   "a mirrored disk template")
5176

    
5177
    target_node = secondary_nodes[0]
5178
    _CheckNodeOnline(self, target_node)
5179
    _CheckNodeNotDrained(self, target_node)
5180
    if instance.admin_up:
5181
      # check memory requirements on the secondary node
5182
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5183
                           instance.name, bep[constants.BE_MEMORY],
5184
                           instance.hypervisor)
5185
    else:
5186
      self.LogInfo("Not checking memory on the secondary node as"
5187
                   " instance will not be started")
5188

    
5189
    # check bridge existance
5190
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5191

    
5192
  def Exec(self, feedback_fn):
5193
    """Failover an instance.
5194

5195
    The failover is done by shutting it down on its present node and
5196
    starting it on the secondary.
5197

5198
    """
5199
    instance = self.instance
5200

    
5201
    source_node = instance.primary_node
5202
    target_node = instance.secondary_nodes[0]
5203

    
5204
    if instance.admin_up:
5205
      feedback_fn("* checking disk consistency between source and target")
5206
      for dev in instance.disks:
5207
        # for drbd, these are drbd over lvm
5208
        if not _CheckDiskConsistency(self, dev, target_node, False):
5209
          if not self.op.ignore_consistency:
5210
            raise errors.OpExecError("Disk %s is degraded on target node,"
5211
                                     " aborting failover." % dev.iv_name)
5212
    else:
5213
      feedback_fn("* not checking disk consistency as instance is not running")
5214

    
5215
    feedback_fn("* shutting down instance on source node")
5216
    logging.info("Shutting down instance %s on node %s",
5217
                 instance.name, source_node)
5218

    
5219
    result = self.rpc.call_instance_shutdown(source_node, instance,
5220
                                             self.op.shutdown_timeout)
5221
    msg = result.fail_msg
5222
    if msg:
5223
      if self.op.ignore_consistency:
5224
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5225
                             " Proceeding anyway. Please make sure node"
5226
                             " %s is down. Error details: %s",
5227
                             instance.name, source_node, source_node, msg)
5228
      else:
5229
        raise errors.OpExecError("Could not shutdown instance %s on"
5230
                                 " node %s: %s" %
5231
                                 (instance.name, source_node, msg))
5232

    
5233
    feedback_fn("* deactivating the instance's disks on source node")
5234
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5235
      raise errors.OpExecError("Can't shut down the instance's disks.")
5236

    
5237
    instance.primary_node = target_node
5238
    # distribute new instance config to the other nodes
5239
    self.cfg.Update(instance, feedback_fn)
5240

    
5241
    # Only start the instance if it's marked as up
5242
    if instance.admin_up:
5243
      feedback_fn("* activating the instance's disks on target node")
5244
      logging.info("Starting instance %s on node %s",
5245
                   instance.name, target_node)
5246

    
5247
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5248
                                           ignore_secondaries=True)
5249
      if not disks_ok:
5250
        _ShutdownInstanceDisks(self, instance)
5251
        raise errors.OpExecError("Can't activate the instance's disks")
5252

    
5253
      feedback_fn("* starting the instance on the target node")
5254
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5255
      msg = result.fail_msg
5256
      if msg:
5257
        _ShutdownInstanceDisks(self, instance)
5258
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5259
                                 (instance.name, target_node, msg))
5260

    
5261

    
5262
class LUMigrateInstance(LogicalUnit):
5263
  """Migrate an instance.
5264

5265
  This is migration without shutting down, compared to the failover,
5266
  which is done with shutdown.
5267

5268
  """
5269
  HPATH = "instance-migrate"
5270
  HTYPE = constants.HTYPE_INSTANCE
5271
  _OP_REQP = ["instance_name", "live", "cleanup"]
5272

    
5273
  REQ_BGL = False
5274

    
5275
  def ExpandNames(self):
5276
    self._ExpandAndLockInstance()
5277

    
5278
    self.needed_locks[locking.LEVEL_NODE] = []
5279
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5280

    
5281
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5282
                                       self.op.live, self.op.cleanup)
5283
    self.tasklets = [self._migrater]
5284

    
5285
  def DeclareLocks(self, level):
5286
    if level == locking.LEVEL_NODE:
5287
      self._LockInstancesNodes()
5288

    
5289
  def BuildHooksEnv(self):
5290
    """Build hooks env.
5291

5292
    This runs on master, primary and secondary nodes of the instance.
5293

5294
    """
5295
    instance = self._migrater.instance
5296
    source_node = instance.primary_node
5297
    target_node = instance.secondary_nodes[0]
5298
    env = _BuildInstanceHookEnvByObject(self, instance)
5299
    env["MIGRATE_LIVE"] = self.op.live
5300
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5301
    env.update({
5302
        "OLD_PRIMARY": source_node,
5303
        "OLD_SECONDARY": target_node,
5304
        "NEW_PRIMARY": target_node,
5305
        "NEW_SECONDARY": source_node,
5306
        })
5307
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5308
    nl_post = list(nl)
5309
    nl_post.append(source_node)
5310
    return env, nl, nl_post
5311

    
5312

    
5313
class LUMoveInstance(LogicalUnit):
5314
  """Move an instance by data-copying.
5315

5316
  """
5317
  HPATH = "instance-move"
5318
  HTYPE = constants.HTYPE_INSTANCE
5319
  _OP_REQP = ["instance_name", "target_node"]
5320
  _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5321
  REQ_BGL = False
5322

    
5323
  def ExpandNames(self):
5324
    self._ExpandAndLockInstance()
5325
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5326
    self.op.target_node = target_node
5327
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5328
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5329

    
5330
  def DeclareLocks(self, level):
5331
    if level == locking.LEVEL_NODE:
5332
      self._LockInstancesNodes(primary_only=True)
5333

    
5334
  def BuildHooksEnv(self):
5335
    """Build hooks env.
5336

5337
    This runs on master, primary and secondary nodes of the instance.
5338

5339
    """
5340
    env = {
5341
      "TARGET_NODE": self.op.target_node,
5342
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5343
      }
5344
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5345
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5346
                                       self.op.target_node]
5347
    return env, nl, nl
5348

    
5349
  def CheckPrereq(self):
5350
    """Check prerequisites.
5351

5352
    This checks that the instance is in the cluster.
5353

5354
    """
5355
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5356
    assert self.instance is not None, \
5357
      "Cannot retrieve locked instance %s" % self.op.instance_name
5358

    
5359
    node = self.cfg.GetNodeInfo(self.op.target_node)
5360
    assert node is not None, \
5361
      "Cannot retrieve locked node %s" % self.op.target_node
5362

    
5363
    self.target_node = target_node = node.name
5364

    
5365
    if target_node == instance.primary_node:
5366
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5367
                                 (instance.name, target_node),
5368
                                 errors.ECODE_STATE)
5369

    
5370
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5371

    
5372
    for idx, dsk in enumerate(instance.disks):
5373
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5374
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5375
                                   " cannot copy" % idx, errors.ECODE_STATE)
5376

    
5377
    _CheckNodeOnline(self, target_node)
5378
    _CheckNodeNotDrained(self, target_node)
5379

    
5380
    if instance.admin_up:
5381
      # check memory requirements on the secondary node
5382
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5383
                           instance.name, bep[constants.BE_MEMORY],
5384
                           instance.hypervisor)
5385
    else:
5386
      self.LogInfo("Not checking memory on the secondary node as"
5387
                   " instance will not be started")
5388

    
5389
    # check bridge existance
5390
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5391

    
5392
  def Exec(self, feedback_fn):
5393
    """Move an instance.
5394

5395
    The move is done by shutting it down on its present node, copying
5396
    the data over (slow) and starting it on the new node.
5397

5398
    """
5399
    instance = self.instance
5400

    
5401
    source_node = instance.primary_node
5402
    target_node = self.target_node
5403

    
5404
    self.LogInfo("Shutting down instance %s on source node %s",
5405
                 instance.name, source_node)
5406

    
5407
    result = self.rpc.call_instance_shutdown(source_node, instance,
5408
                                             self.op.shutdown_timeout)
5409
    msg = result.fail_msg
5410
    if msg:
5411
      if self.op.ignore_consistency:
5412
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5413
                             " Proceeding anyway. Please make sure node"
5414
                             " %s is down. Error details: %s",
5415
                             instance.name, source_node, source_node, msg)
5416
      else:
5417
        raise errors.OpExecError("Could not shutdown instance %s on"
5418
                                 " node %s: %s" %
5419
                                 (instance.name, source_node, msg))
5420

    
5421
    # create the target disks
5422
    try:
5423
      _CreateDisks(self, instance, target_node=target_node)
5424
    except errors.OpExecError:
5425
      self.LogWarning("Device creation failed, reverting...")
5426
      try:
5427
        _RemoveDisks(self, instance, target_node=target_node)
5428
      finally:
5429
        self.cfg.ReleaseDRBDMinors(instance.name)
5430
        raise
5431

    
5432
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5433

    
5434
    errs = []
5435
    # activate, get path, copy the data over
5436
    for idx, disk in enumerate(instance.disks):
5437
      self.LogInfo("Copying data for disk %d", idx)
5438
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5439
                                               instance.name, True)
5440
      if result.fail_msg:
5441
        self.LogWarning("Can't assemble newly created disk %d: %s",
5442
                        idx, result.fail_msg)
5443
        errs.append(result.fail_msg)
5444
        break
5445
      dev_path = result.payload
5446
      result = self.rpc.call_blockdev_export(source_node, disk,
5447
                                             target_node, dev_path,
5448
                                             cluster_name)
5449
      if result.fail_msg:
5450
        self.LogWarning("Can't copy data over for disk %d: %s",
5451
                        idx, result.fail_msg)
5452
        errs.append(result.fail_msg)
5453
        break
5454

    
5455
    if errs:
5456
      self.LogWarning("Some disks failed to copy, aborting")
5457
      try:
5458
        _RemoveDisks(self, instance, target_node=target_node)
5459
      finally:
5460
        self.cfg.ReleaseDRBDMinors(instance.name)
5461
        raise errors.OpExecError("Errors during disk copy: %s" %
5462
                                 (",".join(errs),))
5463

    
5464
    instance.primary_node = target_node
5465
    self.cfg.Update(instance, feedback_fn)
5466

    
5467
    self.LogInfo("Removing the disks on the original node")
5468
    _RemoveDisks(self, instance, target_node=source_node)
5469

    
5470
    # Only start the instance if it's marked as up
5471
    if instance.admin_up:
5472
      self.LogInfo("Starting instance %s on node %s",
5473
                   instance.name, target_node)
5474

    
5475
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5476
                                           ignore_secondaries=True)
5477
      if not disks_ok:
5478
        _ShutdownInstanceDisks(self, instance)
5479
        raise errors.OpExecError("Can't activate the instance's disks")
5480

    
5481
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5482
      msg = result.fail_msg
5483
      if msg:
5484
        _ShutdownInstanceDisks(self, instance)
5485
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5486
                                 (instance.name, target_node, msg))
5487

    
5488

    
5489
class LUMigrateNode(LogicalUnit):
5490
  """Migrate all instances from a node.
5491

5492
  """
5493
  HPATH = "node-migrate"
5494
  HTYPE = constants.HTYPE_NODE
5495
  _OP_REQP = ["node_name", "live"]
5496
  REQ_BGL = False
5497

    
5498
  def ExpandNames(self):
5499
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5500

    
5501
    self.needed_locks = {
5502
      locking.LEVEL_NODE: [self.op.node_name],
5503
      }
5504

    
5505
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5506

    
5507
    # Create tasklets for migrating instances for all instances on this node
5508
    names = []
5509
    tasklets = []
5510

    
5511
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5512
      logging.debug("Migrating instance %s", inst.name)
5513
      names.append(inst.name)
5514

    
5515
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5516

    
5517
    self.tasklets = tasklets
5518

    
5519
    # Declare instance locks
5520
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5521

    
5522
  def DeclareLocks(self, level):
5523
    if level == locking.LEVEL_NODE:
5524
      self._LockInstancesNodes()
5525

    
5526
  def BuildHooksEnv(self):
5527
    """Build hooks env.
5528

5529
    This runs on the master, the primary and all the secondaries.
5530

5531
    """
5532
    env = {
5533
      "NODE_NAME": self.op.node_name,
5534
      }
5535

    
5536
    nl = [self.cfg.GetMasterNode()]
5537

    
5538
    return (env, nl, nl)
5539

    
5540

    
5541
class TLMigrateInstance(Tasklet):
5542
  def __init__(self, lu, instance_name, live, cleanup):
5543
    """Initializes this class.
5544

5545
    """
5546
    Tasklet.__init__(self, lu)
5547

    
5548
    # Parameters
5549
    self.instance_name = instance_name
5550
    self.live = live
5551
    self.cleanup = cleanup
5552

    
5553
  def CheckPrereq(self):
5554
    """Check prerequisites.
5555

5556
    This checks that the instance is in the cluster.
5557

5558
    """
5559
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5560
    instance = self.cfg.GetInstanceInfo(instance_name)
5561
    assert instance is not None
5562

    
5563
    if instance.disk_template != constants.DT_DRBD8:
5564
      raise errors.OpPrereqError("Instance's disk layout is not"
5565
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5566

    
5567
    secondary_nodes = instance.secondary_nodes
5568
    if not secondary_nodes:
5569
      raise errors.ConfigurationError("No secondary node but using"
5570
                                      " drbd8 disk template")
5571

    
5572
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5573

    
5574
    target_node = secondary_nodes[0]
5575
    # check memory requirements on the secondary node
5576
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5577
                         instance.name, i_be[constants.BE_MEMORY],
5578
                         instance.hypervisor)
5579

    
5580
    # check bridge existance
5581
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5582

    
5583
    if not self.cleanup:
5584
      _CheckNodeNotDrained(self.lu, target_node)
5585
      result = self.rpc.call_instance_migratable(instance.primary_node,
5586
                                                 instance)
5587
      result.Raise("Can't migrate, please use failover",
5588
                   prereq=True, ecode=errors.ECODE_STATE)
5589

    
5590
    self.instance = instance
5591

    
5592
  def _WaitUntilSync(self):
5593
    """Poll with custom rpc for disk sync.
5594

5595
    This uses our own step-based rpc call.
5596

5597
    """
5598
    self.feedback_fn("* wait until resync is done")
5599
    all_done = False
5600
    while not all_done:
5601
      all_done = True
5602
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5603
                                            self.nodes_ip,
5604
                                            self.instance.disks)
5605
      min_percent = 100
5606
      for node, nres in result.items():
5607
        nres.Raise("Cannot resync disks on node %s" % node)
5608
        node_done, node_percent = nres.payload
5609
        all_done = all_done and node_done
5610
        if node_percent is not None:
5611
          min_percent = min(min_percent, node_percent)
5612
      if not all_done:
5613
        if min_percent < 100:
5614
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5615
        time.sleep(2)
5616

    
5617
  def _EnsureSecondary(self, node):
5618
    """Demote a node to secondary.
5619

5620
    """
5621
    self.feedback_fn("* switching node %s to secondary mode" % node)
5622

    
5623
    for dev in self.instance.disks:
5624
      self.cfg.SetDiskID(dev, node)
5625

    
5626
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5627
                                          self.instance.disks)
5628
    result.Raise("Cannot change disk to secondary on node %s" % node)
5629

    
5630
  def _GoStandalone(self):
5631
    """Disconnect from the network.
5632

5633
    """
5634
    self.feedback_fn("* changing into standalone mode")
5635
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5636
                                               self.instance.disks)
5637
    for node, nres in result.items():
5638
      nres.Raise("Cannot disconnect disks node %s" % node)
5639

    
5640
  def _GoReconnect(self, multimaster):
5641
    """Reconnect to the network.
5642

5643
    """
5644
    if multimaster:
5645
      msg = "dual-master"
5646
    else:
5647
      msg = "single-master"
5648
    self.feedback_fn("* changing disks into %s mode" % msg)
5649
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5650
                                           self.instance.disks,
5651
                                           self.instance.name, multimaster)
5652
    for node, nres in result.items():
5653
      nres.Raise("Cannot change disks config on node %s" % node)
5654

    
5655
  def _ExecCleanup(self):
5656
    """Try to cleanup after a failed migration.
5657

5658
    The cleanup is done by:
5659
      - check that the instance is running only on one node
5660
        (and update the config if needed)
5661
      - change disks on its secondary node to secondary
5662
      - wait until disks are fully synchronized
5663
      - disconnect from the network
5664
      - change disks into single-master mode
5665
      - wait again until disks are fully synchronized
5666

5667
    """
5668
    instance = self.instance
5669
    target_node = self.target_node
5670
    source_node = self.source_node
5671

    
5672
    # check running on only one node
5673
    self.feedback_fn("* checking where the instance actually runs"
5674
                     " (if this hangs, the hypervisor might be in"
5675
                     " a bad state)")
5676
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5677
    for node, result in ins_l.items():
5678
      result.Raise("Can't contact node %s" % node)
5679

    
5680
    runningon_source = instance.name in ins_l[source_node].payload
5681
    runningon_target = instance.name in ins_l[target_node].payload
5682

    
5683
    if runningon_source and runningon_target:
5684
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5685
                               " or the hypervisor is confused. You will have"
5686
                               " to ensure manually that it runs only on one"
5687
                               " and restart this operation.")
5688

    
5689
    if not (runningon_source or runningon_target):
5690
      raise errors.OpExecError("Instance does not seem to be running at all."
5691
                               " In this case, it's safer to repair by"
5692
                               " running 'gnt-instance stop' to ensure disk"
5693
                               " shutdown, and then restarting it.")
5694

    
5695
    if runningon_target:
5696
      # the migration has actually succeeded, we need to update the config
5697
      self.feedback_fn("* instance running on secondary node (%s),"
5698
                       " updating config" % target_node)
5699
      instance.primary_node = target_node
5700
      self.cfg.Update(instance, self.feedback_fn)
5701
      demoted_node = source_node
5702
    else:
5703
      self.feedback_fn("* instance confirmed to be running on its"
5704
                       " primary node (%s)" % source_node)
5705
      demoted_node = target_node
5706

    
5707
    self._EnsureSecondary(demoted_node)
5708
    try:
5709
      self._WaitUntilSync()
5710
    except errors.OpExecError:
5711
      # we ignore here errors, since if the device is standalone, it
5712
      # won't be able to sync
5713
      pass
5714
    self._GoStandalone()
5715
    self._GoReconnect(False)
5716
    self._WaitUntilSync()
5717

    
5718
    self.feedback_fn("* done")
5719

    
5720
  def _RevertDiskStatus(self):
5721
    """Try to revert the disk status after a failed migration.
5722

5723
    """
5724
    target_node = self.target_node
5725
    try:
5726
      self._EnsureSecondary(target_node)
5727
      self._GoStandalone()
5728
      self._GoReconnect(False)
5729
      self._WaitUntilSync()
5730
    except errors.OpExecError, err:
5731
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5732
                         " drives: error '%s'\n"
5733
                         "Please look and recover the instance status" %
5734
                         str(err))
5735

    
5736
  def _AbortMigration(self):
5737
    """Call the hypervisor code to abort a started migration.
5738

5739
    """
5740
    instance = self.instance
5741
    target_node = self.target_node
5742
    migration_info = self.migration_info
5743

    
5744
    abort_result = self.rpc.call_finalize_migration(target_node,
5745
                                                    instance,
5746
                                                    migration_info,
5747
                                                    False)
5748
    abort_msg = abort_result.fail_msg
5749
    if abort_msg:
5750
      logging.error("Aborting migration failed on target node %s: %s",
5751
                    target_node, abort_msg)
5752
      # Don't raise an exception here, as we stil have to try to revert the
5753
      # disk status, even if this step failed.
5754

    
5755
  def _ExecMigration(self):
5756
    """Migrate an instance.
5757

5758
    The migrate is done by:
5759
      - change the disks into dual-master mode
5760
      - wait until disks are fully synchronized again
5761
      - migrate the instance
5762
      - change disks on the new secondary node (the old primary) to secondary
5763
      - wait until disks are fully synchronized
5764
      - change disks into single-master mode
5765

5766
    """
5767
    instance = self.instance
5768
    target_node = self.target_node
5769
    source_node = self.source_node
5770

    
5771
    self.feedback_fn("* checking disk consistency between source and target")
5772
    for dev in instance.disks:
5773
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5774
        raise errors.OpExecError("Disk %s is degraded or not fully"
5775
                                 " synchronized on target node,"
5776
                                 " aborting migrate." % dev.iv_name)
5777

    
5778
    # First get the migration information from the remote node
5779
    result = self.rpc.call_migration_info(source_node, instance)
5780
    msg = result.fail_msg
5781
    if msg:
5782
      log_err = ("Failed fetching source migration information from %s: %s" %
5783
                 (source_node, msg))
5784
      logging.error(log_err)
5785
      raise errors.OpExecError(log_err)
5786

    
5787
    self.migration_info = migration_info = result.payload
5788

    
5789
    # Then switch the disks to master/master mode
5790
    self._EnsureSecondary(target_node)
5791
    self._GoStandalone()
5792
    self._GoReconnect(True)
5793
    self._WaitUntilSync()
5794

    
5795
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5796
    result = self.rpc.call_accept_instance(target_node,
5797
                                           instance,
5798
                                           migration_info,
5799
                                           self.nodes_ip[target_node])
5800

    
5801
    msg = result.fail_msg
5802
    if msg:
5803
      logging.error("Instance pre-migration failed, trying to revert"
5804
                    " disk status: %s", msg)
5805
      self.feedback_fn("Pre-migration failed, aborting")
5806
      self._AbortMigration()
5807
      self._RevertDiskStatus()
5808
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5809
                               (instance.name, msg))
5810

    
5811
    self.feedback_fn("* migrating instance to %s" % target_node)
5812
    time.sleep(10)
5813
    result = self.rpc.call_instance_migrate(source_node, instance,
5814
                                            self.nodes_ip[target_node],
5815
                                            self.live)
5816
    msg = result.fail_msg
5817
    if msg:
5818
      logging.error("Instance migration failed, trying to revert"
5819
                    " disk status: %s", msg)
5820
      self.feedback_fn("Migration failed, aborting")
5821
      self._AbortMigration()
5822
      self._RevertDiskStatus()
5823
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5824
                               (instance.name, msg))
5825
    time.sleep(10)
5826

    
5827
    instance.primary_node = target_node
5828
    # distribute new instance config to the other nodes
5829
    self.cfg.Update(instance, self.feedback_fn)
5830

    
5831
    result = self.rpc.call_finalize_migration(target_node,
5832
                                              instance,
5833
                                              migration_info,
5834
                                              True)
5835
    msg = result.fail_msg
5836
    if msg:
5837
      logging.error("Instance migration succeeded, but finalization failed:"
5838
                    " %s", msg)
5839
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5840
                               msg)
5841

    
5842
    self._EnsureSecondary(source_node)
5843
    self._WaitUntilSync()
5844
    self._GoStandalone()
5845
    self._GoReconnect(False)
5846
    self._WaitUntilSync()
5847

    
5848
    self.feedback_fn("* done")
5849

    
5850
  def Exec(self, feedback_fn):
5851
    """Perform the migration.
5852

5853
    """
5854
    feedback_fn("Migrating instance %s" % self.instance.name)
5855

    
5856
    self.feedback_fn = feedback_fn
5857

    
5858
    self.source_node = self.instance.primary_node
5859
    self.target_node = self.instance.secondary_nodes[0]
5860
    self.all_nodes = [self.source_node, self.target_node]
5861
    self.nodes_ip = {
5862
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5863
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5864
      }
5865

    
5866
    if self.cleanup:
5867
      return self._ExecCleanup()
5868
    else:
5869
      return self._ExecMigration()
5870

    
5871

    
5872
def _CreateBlockDev(lu, node, instance, device, force_create,
5873
                    info, force_open):
5874
  """Create a tree of block devices on a given node.
5875

5876
  If this device type has to be created on secondaries, create it and
5877
  all its children.
5878

5879
  If not, just recurse to children keeping the same 'force' value.
5880

5881
  @param lu: the lu on whose behalf we execute
5882
  @param node: the node on which to create the device
5883
  @type instance: L{objects.Instance}
5884
  @param instance: the instance which owns the device
5885
  @type device: L{objects.Disk}
5886
  @param device: the device to create
5887
  @type force_create: boolean
5888
  @param force_create: whether to force creation of this device; this
5889
      will be change to True whenever we find a device which has
5890
      CreateOnSecondary() attribute
5891
  @param info: the extra 'metadata' we should attach to the device
5892
      (this will be represented as a LVM tag)
5893
  @type force_open: boolean
5894
  @param force_open: this parameter will be passes to the
5895
      L{backend.BlockdevCreate} function where it specifies
5896
      whether we run on primary or not, and it affects both
5897
      the child assembly and the device own Open() execution
5898

5899
  """
5900
  if device.CreateOnSecondary():
5901
    force_create = True
5902

    
5903
  if device.children:
5904
    for child in device.children:
5905
      _CreateBlockDev(lu, node, instance, child, force_create,
5906
                      info, force_open)
5907

    
5908
  if not force_create:
5909
    return
5910

    
5911
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5912

    
5913

    
5914
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5915
  """Create a single block device on a given node.
5916

5917
  This will not recurse over children of the device, so they must be
5918
  created in advance.
5919

5920
  @param lu: the lu on whose behalf we execute
5921
  @param node: the node on which to create the device
5922
  @type instance: L{objects.Instance}
5923
  @param instance: the instance which owns the device
5924
  @type device: L{objects.Disk}
5925
  @param device: the device to create
5926
  @param info: the extra 'metadata' we should attach to the device
5927
      (this will be represented as a LVM tag)
5928
  @type force_open: boolean
5929
  @param force_open: this parameter will be passes to the
5930
      L{backend.BlockdevCreate} function where it specifies
5931
      whether we run on primary or not, and it affects both
5932
      the child assembly and the device own Open() execution
5933

5934
  """
5935
  lu.cfg.SetDiskID(device, node)
5936
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5937
                                       instance.name, force_open, info)
5938
  result.Raise("Can't create block device %s on"
5939
               " node %s for instance %s" % (device, node, instance.name))
5940
  if device.physical_id is None:
5941
    device.physical_id = result.payload
5942

    
5943

    
5944
def _GenerateUniqueNames(lu, exts):
5945
  """Generate a suitable LV name.
5946

5947
  This will generate a logical volume name for the given instance.
5948

5949
  """
5950
  results = []
5951
  for val in exts:
5952
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5953
    results.append("%s%s" % (new_id, val))
5954
  return results
5955

    
5956

    
5957
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5958
                         p_minor, s_minor):
5959
  """Generate a drbd8 device complete with its children.
5960

5961
  """
5962
  port = lu.cfg.AllocatePort()
5963
  vgname = lu.cfg.GetVGName()
5964
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5965
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5966
                          logical_id=(vgname, names[0]))
5967
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5968
                          logical_id=(vgname, names[1]))
5969
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5970
                          logical_id=(primary, secondary, port,
5971
                                      p_minor, s_minor,
5972
                                      shared_secret),
5973
                          children=[dev_data, dev_meta],
5974
                          iv_name=iv_name)
5975
  return drbd_dev
5976

    
5977

    
5978
def _GenerateDiskTemplate(lu, template_name,
5979
                          instance_name, primary_node,
5980
                          secondary_nodes, disk_info,
5981
                          file_storage_dir, file_driver,
5982
                          base_index):
5983
  """Generate the entire disk layout for a given template type.
5984

5985
  """
5986
  #TODO: compute space requirements
5987

    
5988
  vgname = lu.cfg.GetVGName()
5989
  disk_count = len(disk_info)
5990
  disks = []
5991
  if template_name == constants.DT_DISKLESS:
5992
    pass
5993
  elif template_name == constants.DT_PLAIN:
5994
    if len(secondary_nodes) != 0:
5995
      raise errors.ProgrammerError("Wrong template configuration")
5996

    
5997
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5998
                                      for i in range(disk_count)])
5999
    for idx, disk in enumerate(disk_info):
6000
      disk_index = idx + base_index
6001
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6002
                              logical_id=(vgname, names[idx]),
6003
                              iv_name="disk/%d" % disk_index,
6004
                              mode=disk["mode"])
6005
      disks.append(disk_dev)
6006
  elif template_name == constants.DT_DRBD8:
6007
    if len(secondary_nodes) != 1:
6008
      raise errors.ProgrammerError("Wrong template configuration")
6009
    remote_node = secondary_nodes[0]
6010
    minors = lu.cfg.AllocateDRBDMinor(
6011
      [primary_node, remote_node] * len(disk_info), instance_name)
6012

    
6013
    names = []
6014
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6015
                                               for i in range(disk_count)]):
6016
      names.append(lv_prefix + "_data")
6017
      names.append(lv_prefix + "_meta")
6018
    for idx, disk in enumerate(disk_info):
6019
      disk_index = idx + base_index
6020
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6021
                                      disk["size"], names[idx*2:idx*2+2],
6022
                                      "disk/%d" % disk_index,
6023
                                      minors[idx*2], minors[idx*2+1])
6024
      disk_dev.mode = disk["mode"]
6025
      disks.append(disk_dev)
6026
  elif template_name == constants.DT_FILE:
6027
    if len(secondary_nodes) != 0:
6028
      raise errors.ProgrammerError("Wrong template configuration")
6029

    
6030
    _RequireFileStorage()
6031

    
6032
    for idx, disk in enumerate(disk_info):
6033
      disk_index = idx + base_index
6034
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6035
                              iv_name="disk/%d" % disk_index,
6036
                              logical_id=(file_driver,
6037
                                          "%s/disk%d" % (file_storage_dir,
6038
                                                         disk_index)),
6039
                              mode=disk["mode"])
6040
      disks.append(disk_dev)
6041
  else:
6042
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6043
  return disks
6044

    
6045

    
6046
def _GetInstanceInfoText(instance):
6047
  """Compute that text that should be added to the disk's metadata.
6048

6049
  """
6050
  return "originstname+%s" % instance.name
6051

    
6052

    
6053
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6054
  """Create all disks for an instance.
6055

6056
  This abstracts away some work from AddInstance.
6057

6058
  @type lu: L{LogicalUnit}
6059
  @param lu: the logical unit on whose behalf we execute
6060
  @type instance: L{objects.Instance}
6061
  @param instance: the instance whose disks we should create
6062
  @type to_skip: list
6063
  @param to_skip: list of indices to skip
6064
  @type target_node: string
6065
  @param target_node: if passed, overrides the target node for creation
6066
  @rtype: boolean
6067
  @return: the success of the creation
6068

6069
  """
6070
  info = _GetInstanceInfoText(instance)
6071
  if target_node is None:
6072
    pnode = instance.primary_node
6073
    all_nodes = instance.all_nodes
6074
  else:
6075
    pnode = target_node
6076
    all_nodes = [pnode]
6077

    
6078
  if instance.disk_template == constants.DT_FILE:
6079
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6080
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6081

    
6082
    result.Raise("Failed to create directory '%s' on"
6083
                 " node %s" % (file_storage_dir, pnode))
6084

    
6085
  # Note: this needs to be kept in sync with adding of disks in
6086
  # LUSetInstanceParams
6087
  for idx, device in enumerate(instance.disks):
6088
    if to_skip and idx in to_skip:
6089
      continue
6090
    logging.info("Creating volume %s for instance %s",
6091
                 device.iv_name, instance.name)
6092
    #HARDCODE
6093
    for node in all_nodes:
6094
      f_create = node == pnode
6095
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6096

    
6097

    
6098
def _RemoveDisks(lu, instance, target_node=None):
6099
  """Remove all disks for an instance.
6100

6101
  This abstracts away some work from `AddInstance()` and
6102
  `RemoveInstance()`. Note that in case some of the devices couldn't
6103
  be removed, the removal will continue with the other ones (compare
6104
  with `_CreateDisks()`).
6105

6106
  @type lu: L{LogicalUnit}
6107
  @param lu: the logical unit on whose behalf we execute
6108
  @type instance: L{objects.Instance}
6109
  @param instance: the instance whose disks we should remove
6110
  @type target_node: string
6111
  @param target_node: used to override the node on which to remove the disks
6112
  @rtype: boolean
6113
  @return: the success of the removal
6114

6115
  """
6116
  logging.info("Removing block devices for instance %s", instance.name)
6117

    
6118
  all_result = True
6119
  for device in instance.disks:
6120
    if target_node:
6121
      edata = [(target_node, device)]
6122
    else:
6123
      edata = device.ComputeNodeTree(instance.primary_node)
6124
    for node, disk in edata:
6125
      lu.cfg.SetDiskID(disk, node)
6126
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6127
      if msg:
6128
        lu.LogWarning("Could not remove block device %s on node %s,"
6129
                      " continuing anyway: %s", device.iv_name, node, msg)
6130
        all_result = False
6131

    
6132
  if instance.disk_template == constants.DT_FILE:
6133
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6134
    if target_node:
6135
      tgt = target_node
6136
    else:
6137
      tgt = instance.primary_node
6138
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6139
    if result.fail_msg:
6140
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6141
                    file_storage_dir, instance.primary_node, result.fail_msg)
6142
      all_result = False
6143

    
6144
  return all_result
6145

    
6146

    
6147
def _ComputeDiskSize(disk_template, disks):
6148
  """Compute disk size requirements in the volume group
6149

6150
  """
6151
  # Required free disk space as a function of disk and swap space
6152
  req_size_dict = {
6153
    constants.DT_DISKLESS: None,
6154
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6155
    # 128 MB are added for drbd metadata for each disk
6156
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6157
    constants.DT_FILE: None,
6158
  }
6159

    
6160
  if disk_template not in req_size_dict:
6161
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6162
                                 " is unknown" %  disk_template)
6163

    
6164
  return req_size_dict[disk_template]
6165

    
6166

    
6167
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6168
  """Hypervisor parameter validation.
6169

6170
  This function abstract the hypervisor parameter validation to be
6171
  used in both instance create and instance modify.
6172

6173
  @type lu: L{LogicalUnit}
6174
  @param lu: the logical unit for which we check
6175
  @type nodenames: list
6176
  @param nodenames: the list of nodes on which we should check
6177
  @type hvname: string
6178
  @param hvname: the name of the hypervisor we should use
6179
  @type hvparams: dict
6180
  @param hvparams: the parameters which we need to check
6181
  @raise errors.OpPrereqError: if the parameters are not valid
6182

6183
  """
6184
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6185
                                                  hvname,
6186
                                                  hvparams)
6187
  for node in nodenames:
6188
    info = hvinfo[node]
6189
    if info.offline:
6190
      continue
6191
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6192

    
6193

    
6194
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6195
  """OS parameters validation.
6196

6197
  @type lu: L{LogicalUnit}
6198
  @param lu: the logical unit for which we check
6199
  @type required: boolean
6200
  @param required: whether the validation should fail if the OS is not
6201
      found
6202
  @type nodenames: list
6203
  @param nodenames: the list of nodes on which we should check
6204
  @type osname: string
6205
  @param osname: the name of the hypervisor we should use
6206
  @type osparams: dict
6207
  @param osparams: the parameters which we need to check
6208
  @raise errors.OpPrereqError: if the parameters are not valid
6209

6210
  """
6211
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6212
                                   [constants.OS_VALIDATE_PARAMETERS],
6213
                                   osparams)
6214
  for node, nres in result.items():
6215
    # we don't check for offline cases since this should be run only
6216
    # against the master node and/or an instance's nodes
6217
    nres.Raise("OS Parameters validation failed on node %s" % node)
6218
    if not nres.payload:
6219
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6220
                 osname, node)
6221

    
6222

    
6223
class LUCreateInstance(LogicalUnit):
6224
  """Create an instance.
6225

6226
  """
6227
  HPATH = "instance-add"
6228
  HTYPE = constants.HTYPE_INSTANCE
6229
  _OP_REQP = ["instance_name", "disks",
6230
              "mode", "start",
6231
              "wait_for_sync", "ip_check", "nics",
6232
              "hvparams", "beparams", "osparams"]
6233
  _OP_DEFS = [
6234
    ("name_check", True),
6235
    ("no_install", False),
6236
    ("os_type", None),
6237
    ("force_variant", False),
6238
    ("source_handshake", None),
6239
    ("source_x509_ca", None),
6240
    ("source_instance_name", None),
6241
    ("src_node", None),
6242
    ("src_path", None),
6243
    ("pnode", None),
6244
    ("snode", None),
6245
    ("iallocator", None),
6246
    ("hypervisor", None),
6247
    ("disk_template", None),
6248
    ("identify_defaults", None),
6249
    ]
6250
  REQ_BGL = False
6251

    
6252
  def CheckArguments(self):
6253
    """Check arguments.
6254

6255
    """
6256
    # do not require name_check to ease forward/backward compatibility
6257
    # for tools
6258
    if self.op.no_install and self.op.start:
6259
      self.LogInfo("No-installation mode selected, disabling startup")
6260
      self.op.start = False
6261
    # validate/normalize the instance name
6262
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6263
    if self.op.ip_check and not self.op.name_check:
6264
      # TODO: make the ip check more flexible and not depend on the name check
6265
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
6266
                                 errors.ECODE_INVAL)
6267

    
6268
    # check nics' parameter names
6269
    for nic in self.op.nics:
6270
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6271

    
6272
    # check disks. parameter names and consistent adopt/no-adopt strategy
6273
    has_adopt = has_no_adopt = False
6274
    for disk in self.op.disks:
6275
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6276
      if "adopt" in disk:
6277
        has_adopt = True
6278
      else:
6279
        has_no_adopt = True
6280
    if has_adopt and has_no_adopt:
6281
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6282
                                 errors.ECODE_INVAL)
6283
    if has_adopt:
6284
      if self.op.disk_template != constants.DT_PLAIN:
6285
        raise errors.OpPrereqError("Disk adoption is only supported for the"
6286
                                   " 'plain' disk template",
6287
                                   errors.ECODE_INVAL)
6288
      if self.op.iallocator is not None:
6289
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6290
                                   " iallocator script", errors.ECODE_INVAL)
6291
      if self.op.mode == constants.INSTANCE_IMPORT:
6292
        raise errors.OpPrereqError("Disk adoption not allowed for"
6293
                                   " instance import", errors.ECODE_INVAL)
6294

    
6295
    self.adopt_disks = has_adopt
6296

    
6297
    # verify creation mode
6298
    if self.op.mode not in constants.INSTANCE_CREATE_MODES:
6299
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6300
                                 self.op.mode, errors.ECODE_INVAL)
6301

    
6302
    # instance name verification
6303
    if self.op.name_check:
6304
      self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6305
      self.op.instance_name = self.hostname1.name
6306
      # used in CheckPrereq for ip ping check
6307
      self.check_ip = self.hostname1.ip
6308
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6309
      raise errors.OpPrereqError("Remote imports require names to be checked" %
6310
                                 errors.ECODE_INVAL)
6311
    else:
6312
      self.check_ip = None
6313

    
6314
    # file storage checks
6315
    if (self.op.file_driver and
6316
        not self.op.file_driver in constants.FILE_DRIVER):
6317
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6318
                                 self.op.file_driver, errors.ECODE_INVAL)
6319

    
6320
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6321
      raise errors.OpPrereqError("File storage directory path not absolute",
6322
                                 errors.ECODE_INVAL)
6323

    
6324
    ### Node/iallocator related checks
6325
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6326
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6327
                                 " node must be given",
6328
                                 errors.ECODE_INVAL)
6329

    
6330
    self._cds = _GetClusterDomainSecret()
6331

    
6332
    if self.op.mode == constants.INSTANCE_IMPORT:
6333
      # On import force_variant must be True, because if we forced it at
6334
      # initial install, our only chance when importing it back is that it
6335
      # works again!
6336
      self.op.force_variant = True
6337

    
6338
      if self.op.no_install:
6339
        self.LogInfo("No-installation mode has no effect during import")
6340

    
6341
    elif self.op.mode == constants.INSTANCE_CREATE:
6342
      if self.op.os_type is None:
6343
        raise errors.OpPrereqError("No guest OS specified",
6344
                                   errors.ECODE_INVAL)
6345
      if self.op.disk_template is None:
6346
        raise errors.OpPrereqError("No disk template specified",
6347
                                   errors.ECODE_INVAL)
6348

    
6349
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6350
      # Check handshake to ensure both clusters have the same domain secret
6351
      src_handshake = self.op.source_handshake
6352
      if not src_handshake:
6353
        raise errors.OpPrereqError("Missing source handshake",
6354
                                   errors.ECODE_INVAL)
6355

    
6356
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6357
                                                           src_handshake)
6358
      if errmsg:
6359
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6360
                                   errors.ECODE_INVAL)
6361

    
6362
      # Load and check source CA
6363
      self.source_x509_ca_pem = self.op.source_x509_ca
6364
      if not self.source_x509_ca_pem:
6365
        raise errors.OpPrereqError("Missing source X509 CA",
6366
                                   errors.ECODE_INVAL)
6367

    
6368
      try:
6369
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6370
                                                    self._cds)
6371
      except OpenSSL.crypto.Error, err:
6372
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6373
                                   (err, ), errors.ECODE_INVAL)
6374

    
6375
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6376
      if errcode is not None:
6377
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6378
                                   errors.ECODE_INVAL)
6379

    
6380
      self.source_x509_ca = cert
6381

    
6382
      src_instance_name = self.op.source_instance_name
6383
      if not src_instance_name:
6384
        raise errors.OpPrereqError("Missing source instance name",
6385
                                   errors.ECODE_INVAL)
6386

    
6387
      self.source_instance_name = \
6388
        utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6389

    
6390
    else:
6391
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6392
                                 self.op.mode, errors.ECODE_INVAL)
6393

    
6394
  def ExpandNames(self):
6395
    """ExpandNames for CreateInstance.
6396

6397
    Figure out the right locks for instance creation.
6398

6399
    """
6400
    self.needed_locks = {}
6401

    
6402
    instance_name = self.op.instance_name
6403
    # this is just a preventive check, but someone might still add this
6404
    # instance in the meantime, and creation will fail at lock-add time
6405
    if instance_name in self.cfg.GetInstanceList():
6406
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6407
                                 instance_name, errors.ECODE_EXISTS)
6408

    
6409
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6410

    
6411
    if self.op.iallocator:
6412
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6413
    else:
6414
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6415
      nodelist = [self.op.pnode]
6416
      if self.op.snode is not None:
6417
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6418
        nodelist.append(self.op.snode)
6419
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6420

    
6421
    # in case of import lock the source node too
6422
    if self.op.mode == constants.INSTANCE_IMPORT:
6423
      src_node = self.op.src_node
6424
      src_path = self.op.src_path
6425

    
6426
      if src_path is None:
6427
        self.op.src_path = src_path = self.op.instance_name
6428

    
6429
      if src_node is None:
6430
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6431
        self.op.src_node = None
6432
        if os.path.isabs(src_path):
6433
          raise errors.OpPrereqError("Importing an instance from an absolute"
6434
                                     " path requires a source node option.",
6435
                                     errors.ECODE_INVAL)
6436
      else:
6437
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6438
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6439
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6440
        if not os.path.isabs(src_path):
6441
          self.op.src_path = src_path = \
6442
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6443

    
6444
  def _RunAllocator(self):
6445
    """Run the allocator based on input opcode.
6446

6447
    """
6448
    nics = [n.ToDict() for n in self.nics]
6449
    ial = IAllocator(self.cfg, self.rpc,
6450
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6451
                     name=self.op.instance_name,
6452
                     disk_template=self.op.disk_template,
6453
                     tags=[],
6454
                     os=self.op.os_type,
6455
                     vcpus=self.be_full[constants.BE_VCPUS],
6456
                     mem_size=self.be_full[constants.BE_MEMORY],
6457
                     disks=self.disks,
6458
                     nics=nics,
6459
                     hypervisor=self.op.hypervisor,
6460
                     )
6461

    
6462
    ial.Run(self.op.iallocator)
6463

    
6464
    if not ial.success:
6465
      raise errors.OpPrereqError("Can't compute nodes using"
6466
                                 " iallocator '%s': %s" %
6467
                                 (self.op.iallocator, ial.info),
6468
                                 errors.ECODE_NORES)
6469
    if len(ial.result) != ial.required_nodes:
6470
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6471
                                 " of nodes (%s), required %s" %
6472
                                 (self.op.iallocator, len(ial.result),
6473
                                  ial.required_nodes), errors.ECODE_FAULT)
6474
    self.op.pnode = ial.result[0]
6475
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6476
                 self.op.instance_name, self.op.iallocator,
6477
                 utils.CommaJoin(ial.result))
6478
    if ial.required_nodes == 2:
6479
      self.op.snode = ial.result[1]
6480

    
6481
  def BuildHooksEnv(self):
6482
    """Build hooks env.
6483

6484
    This runs on master, primary and secondary nodes of the instance.
6485

6486
    """
6487
    env = {
6488
      "ADD_MODE": self.op.mode,
6489
      }
6490
    if self.op.mode == constants.INSTANCE_IMPORT:
6491
      env["SRC_NODE"] = self.op.src_node
6492
      env["SRC_PATH"] = self.op.src_path
6493
      env["SRC_IMAGES"] = self.src_images
6494

    
6495
    env.update(_BuildInstanceHookEnv(
6496
      name=self.op.instance_name,
6497
      primary_node=self.op.pnode,
6498
      secondary_nodes=self.secondaries,
6499
      status=self.op.start,
6500
      os_type=self.op.os_type,
6501
      memory=self.be_full[constants.BE_MEMORY],
6502
      vcpus=self.be_full[constants.BE_VCPUS],
6503
      nics=_NICListToTuple(self, self.nics),
6504
      disk_template=self.op.disk_template,
6505
      disks=[(d["size"], d["mode"]) for d in self.disks],
6506
      bep=self.be_full,
6507
      hvp=self.hv_full,
6508
      hypervisor_name=self.op.hypervisor,
6509
    ))
6510

    
6511
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6512
          self.secondaries)
6513
    return env, nl, nl
6514

    
6515
  def _ReadExportInfo(self):
6516
    """Reads the export information from disk.
6517

6518
    It will override the opcode source node and path with the actual
6519
    information, if these two were not specified before.
6520

6521
    @return: the export information
6522

6523
    """
6524
    assert self.op.mode == constants.INSTANCE_IMPORT
6525

    
6526
    src_node = self.op.src_node
6527
    src_path = self.op.src_path
6528

    
6529
    if src_node is None:
6530
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6531
      exp_list = self.rpc.call_export_list(locked_nodes)
6532
      found = False
6533
      for node in exp_list:
6534
        if exp_list[node].fail_msg:
6535
          continue
6536
        if src_path in exp_list[node].payload:
6537
          found = True
6538
          self.op.src_node = src_node = node
6539
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6540
                                                       src_path)
6541
          break
6542
      if not found:
6543
        raise errors.OpPrereqError("No export found for relative path %s" %
6544
                                    src_path, errors.ECODE_INVAL)
6545

    
6546
    _CheckNodeOnline(self, src_node)
6547
    result = self.rpc.call_export_info(src_node, src_path)
6548
    result.Raise("No export or invalid export found in dir %s" % src_path)
6549

    
6550
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6551
    if not export_info.has_section(constants.INISECT_EXP):
6552
      raise errors.ProgrammerError("Corrupted export config",
6553
                                   errors.ECODE_ENVIRON)
6554

    
6555
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6556
    if (int(ei_version) != constants.EXPORT_VERSION):
6557
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6558
                                 (ei_version, constants.EXPORT_VERSION),
6559
                                 errors.ECODE_ENVIRON)
6560
    return export_info
6561

    
6562
  def _ReadExportParams(self, einfo):
6563
    """Use export parameters as defaults.
6564

6565
    In case the opcode doesn't specify (as in override) some instance
6566
    parameters, then try to use them from the export information, if
6567
    that declares them.
6568

6569
    """
6570
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6571

    
6572
    if self.op.disk_template is None:
6573
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6574
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6575
                                          "disk_template")
6576
      else:
6577
        raise errors.OpPrereqError("No disk template specified and the export"
6578
                                   " is missing the disk_template information",
6579
                                   errors.ECODE_INVAL)
6580

    
6581
    if not self.op.disks:
6582
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
6583
        disks = []
6584
        # TODO: import the disk iv_name too
6585
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6586
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6587
          disks.append({"size": disk_sz})
6588
        self.op.disks = disks
6589
      else:
6590
        raise errors.OpPrereqError("No disk info specified and the export"
6591
                                   " is missing the disk information",
6592
                                   errors.ECODE_INVAL)
6593

    
6594
    if (not self.op.nics and
6595
        einfo.has_option(constants.INISECT_INS, "nic_count")):
6596
      nics = []
6597
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6598
        ndict = {}
6599
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6600
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6601
          ndict[name] = v
6602
        nics.append(ndict)
6603
      self.op.nics = nics
6604

    
6605
    if (self.op.hypervisor is None and
6606
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6607
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6608
    if einfo.has_section(constants.INISECT_HYP):
6609
      # use the export parameters but do not override the ones
6610
      # specified by the user
6611
      for name, value in einfo.items(constants.INISECT_HYP):
6612
        if name not in self.op.hvparams:
6613
          self.op.hvparams[name] = value
6614

    
6615
    if einfo.has_section(constants.INISECT_BEP):
6616
      # use the parameters, without overriding
6617
      for name, value in einfo.items(constants.INISECT_BEP):
6618
        if name not in self.op.beparams:
6619
          self.op.beparams[name] = value
6620
    else:
6621
      # try to read the parameters old style, from the main section
6622
      for name in constants.BES_PARAMETERS:
6623
        if (name not in self.op.beparams and
6624
            einfo.has_option(constants.INISECT_INS, name)):
6625
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6626

    
6627
    if einfo.has_section(constants.INISECT_OSP):
6628
      # use the parameters, without overriding
6629
      for name, value in einfo.items(constants.INISECT_OSP):
6630
        if name not in self.op.osparams:
6631
          self.op.osparams[name] = value
6632

    
6633
  def _RevertToDefaults(self, cluster):
6634
    """Revert the instance parameters to the default values.
6635

6636
    """
6637
    # hvparams
6638
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6639
    for name in self.op.hvparams.keys():
6640
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6641
        del self.op.hvparams[name]
6642
    # beparams
6643
    be_defs = cluster.SimpleFillBE({})
6644
    for name in self.op.beparams.keys():
6645
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6646
        del self.op.beparams[name]
6647
    # nic params
6648
    nic_defs = cluster.SimpleFillNIC({})
6649
    for nic in self.op.nics:
6650
      for name in constants.NICS_PARAMETERS:
6651
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6652
          del nic[name]
6653
    # osparams
6654
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6655
    for name in self.op.osparams.keys():
6656
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
6657
        del self.op.osparams[name]
6658

    
6659
  def CheckPrereq(self):
6660
    """Check prerequisites.
6661

6662
    """
6663
    if self.op.mode == constants.INSTANCE_IMPORT:
6664
      export_info = self._ReadExportInfo()
6665
      self._ReadExportParams(export_info)
6666

    
6667
    _CheckDiskTemplate(self.op.disk_template)
6668

    
6669
    if (not self.cfg.GetVGName() and
6670
        self.op.disk_template not in constants.DTS_NOT_LVM):
6671
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6672
                                 " instances", errors.ECODE_STATE)
6673

    
6674
    if self.op.hypervisor is None:
6675
      self.op.hypervisor = self.cfg.GetHypervisorType()
6676

    
6677
    cluster = self.cfg.GetClusterInfo()
6678
    enabled_hvs = cluster.enabled_hypervisors
6679
    if self.op.hypervisor not in enabled_hvs:
6680
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6681
                                 " cluster (%s)" % (self.op.hypervisor,
6682
                                  ",".join(enabled_hvs)),
6683
                                 errors.ECODE_STATE)
6684

    
6685
    # check hypervisor parameter syntax (locally)
6686
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6687
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6688
                                      self.op.hvparams)
6689
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6690
    hv_type.CheckParameterSyntax(filled_hvp)
6691
    self.hv_full = filled_hvp
6692
    # check that we don't specify global parameters on an instance
6693
    _CheckGlobalHvParams(self.op.hvparams)
6694

    
6695
    # fill and remember the beparams dict
6696
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6697
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
6698

    
6699
    # build os parameters
6700
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6701

    
6702
    # now that hvp/bep are in final format, let's reset to defaults,
6703
    # if told to do so
6704
    if self.op.identify_defaults:
6705
      self._RevertToDefaults(cluster)
6706

    
6707
    # NIC buildup
6708
    self.nics = []
6709
    for idx, nic in enumerate(self.op.nics):
6710
      nic_mode_req = nic.get("mode", None)
6711
      nic_mode = nic_mode_req
6712
      if nic_mode is None:
6713
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6714

    
6715
      # in routed mode, for the first nic, the default ip is 'auto'
6716
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6717
        default_ip_mode = constants.VALUE_AUTO
6718
      else:
6719
        default_ip_mode = constants.VALUE_NONE
6720

    
6721
      # ip validity checks
6722
      ip = nic.get("ip", default_ip_mode)
6723
      if ip is None or ip.lower() == constants.VALUE_NONE:
6724
        nic_ip = None
6725
      elif ip.lower() == constants.VALUE_AUTO:
6726
        if not self.op.name_check:
6727
          raise errors.OpPrereqError("IP address set to auto but name checks"
6728
                                     " have been skipped. Aborting.",
6729
                                     errors.ECODE_INVAL)
6730
        nic_ip = self.hostname1.ip
6731
      else:
6732
        if not utils.IsValidIP(ip):
6733
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6734
                                     " like a valid IP" % ip,
6735
                                     errors.ECODE_INVAL)
6736
        nic_ip = ip
6737

    
6738
      # TODO: check the ip address for uniqueness
6739
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6740
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
6741
                                   errors.ECODE_INVAL)
6742

    
6743
      # MAC address verification
6744
      mac = nic.get("mac", constants.VALUE_AUTO)
6745
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6746
        mac = utils.NormalizeAndValidateMac(mac)
6747

    
6748
        try:
6749
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
6750
        except errors.ReservationError:
6751
          raise errors.OpPrereqError("MAC address %s already in use"
6752
                                     " in cluster" % mac,
6753
                                     errors.ECODE_NOTUNIQUE)
6754

    
6755
      # bridge verification
6756
      bridge = nic.get("bridge", None)
6757
      link = nic.get("link", None)
6758
      if bridge and link:
6759
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6760
                                   " at the same time", errors.ECODE_INVAL)
6761
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6762
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6763
                                   errors.ECODE_INVAL)
6764
      elif bridge:
6765
        link = bridge
6766

    
6767
      nicparams = {}
6768
      if nic_mode_req:
6769
        nicparams[constants.NIC_MODE] = nic_mode_req
6770
      if link:
6771
        nicparams[constants.NIC_LINK] = link
6772

    
6773
      check_params = cluster.SimpleFillNIC(nicparams)
6774
      objects.NIC.CheckParameterSyntax(check_params)
6775
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6776

    
6777
    # disk checks/pre-build
6778
    self.disks = []
6779
    for disk in self.op.disks:
6780
      mode = disk.get("mode", constants.DISK_RDWR)
6781
      if mode not in constants.DISK_ACCESS_SET:
6782
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6783
                                   mode, errors.ECODE_INVAL)
6784
      size = disk.get("size", None)
6785
      if size is None:
6786
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6787
      try:
6788
        size = int(size)
6789
      except (TypeError, ValueError):
6790
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6791
                                   errors.ECODE_INVAL)
6792
      new_disk = {"size": size, "mode": mode}
6793
      if "adopt" in disk:
6794
        new_disk["adopt"] = disk["adopt"]
6795
      self.disks.append(new_disk)
6796

    
6797
    if self.op.mode == constants.INSTANCE_IMPORT:
6798

    
6799
      # Check that the new instance doesn't have less disks than the export
6800
      instance_disks = len(self.disks)
6801
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6802
      if instance_disks < export_disks:
6803
        raise errors.OpPrereqError("Not enough disks to import."
6804
                                   " (instance: %d, export: %d)" %
6805
                                   (instance_disks, export_disks),
6806
                                   errors.ECODE_INVAL)
6807

    
6808
      disk_images = []
6809
      for idx in range(export_disks):
6810
        option = 'disk%d_dump' % idx
6811
        if export_info.has_option(constants.INISECT_INS, option):
6812
          # FIXME: are the old os-es, disk sizes, etc. useful?
6813
          export_name = export_info.get(constants.INISECT_INS, option)
6814
          image = utils.PathJoin(self.op.src_path, export_name)
6815
          disk_images.append(image)
6816
        else:
6817
          disk_images.append(False)
6818

    
6819
      self.src_images = disk_images
6820

    
6821
      old_name = export_info.get(constants.INISECT_INS, 'name')
6822
      try:
6823
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6824
      except (TypeError, ValueError), err:
6825
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
6826
                                   " an integer: %s" % str(err),
6827
                                   errors.ECODE_STATE)
6828
      if self.op.instance_name == old_name:
6829
        for idx, nic in enumerate(self.nics):
6830
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6831
            nic_mac_ini = 'nic%d_mac' % idx
6832
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6833

    
6834
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6835

    
6836
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6837
    if self.op.ip_check:
6838
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6839
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6840
                                   (self.check_ip, self.op.instance_name),
6841
                                   errors.ECODE_NOTUNIQUE)
6842

    
6843
    #### mac address generation
6844
    # By generating here the mac address both the allocator and the hooks get
6845
    # the real final mac address rather than the 'auto' or 'generate' value.
6846
    # There is a race condition between the generation and the instance object
6847
    # creation, which means that we know the mac is valid now, but we're not
6848
    # sure it will be when we actually add the instance. If things go bad
6849
    # adding the instance will abort because of a duplicate mac, and the
6850
    # creation job will fail.
6851
    for nic in self.nics:
6852
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6853
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6854

    
6855
    #### allocator run
6856

    
6857
    if self.op.iallocator is not None:
6858
      self._RunAllocator()
6859

    
6860
    #### node related checks
6861

    
6862
    # check primary node
6863
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6864
    assert self.pnode is not None, \
6865
      "Cannot retrieve locked node %s" % self.op.pnode
6866
    if pnode.offline:
6867
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6868
                                 pnode.name, errors.ECODE_STATE)
6869
    if pnode.drained:
6870
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6871
                                 pnode.name, errors.ECODE_STATE)
6872

    
6873
    self.secondaries = []
6874

    
6875
    # mirror node verification
6876
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6877
      if self.op.snode is None:
6878
        raise errors.OpPrereqError("The networked disk templates need"
6879
                                   " a mirror node", errors.ECODE_INVAL)
6880
      if self.op.snode == pnode.name:
6881
        raise errors.OpPrereqError("The secondary node cannot be the"
6882
                                   " primary node.", errors.ECODE_INVAL)
6883
      _CheckNodeOnline(self, self.op.snode)
6884
      _CheckNodeNotDrained(self, self.op.snode)
6885
      self.secondaries.append(self.op.snode)
6886

    
6887
    nodenames = [pnode.name] + self.secondaries
6888

    
6889
    req_size = _ComputeDiskSize(self.op.disk_template,
6890
                                self.disks)
6891

    
6892
    # Check lv size requirements, if not adopting
6893
    if req_size is not None and not self.adopt_disks:
6894
      _CheckNodesFreeDisk(self, nodenames, req_size)
6895

    
6896
    if self.adopt_disks: # instead, we must check the adoption data
6897
      all_lvs = set([i["adopt"] for i in self.disks])
6898
      if len(all_lvs) != len(self.disks):
6899
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6900
                                   errors.ECODE_INVAL)
6901
      for lv_name in all_lvs:
6902
        try:
6903
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6904
        except errors.ReservationError:
6905
          raise errors.OpPrereqError("LV named %s used by another instance" %
6906
                                     lv_name, errors.ECODE_NOTUNIQUE)
6907

    
6908
      node_lvs = self.rpc.call_lv_list([pnode.name],
6909
                                       self.cfg.GetVGName())[pnode.name]
6910
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6911
      node_lvs = node_lvs.payload
6912
      delta = all_lvs.difference(node_lvs.keys())
6913
      if delta:
6914
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6915
                                   utils.CommaJoin(delta),
6916
                                   errors.ECODE_INVAL)
6917
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6918
      if online_lvs:
6919
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6920
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6921
                                   errors.ECODE_STATE)
6922
      # update the size of disk based on what is found
6923
      for dsk in self.disks:
6924
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6925

    
6926
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6927

    
6928
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6929
    # check OS parameters (remotely)
6930
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
6931

    
6932
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6933

    
6934
    # memory check on primary node
6935
    if self.op.start:
6936
      _CheckNodeFreeMemory(self, self.pnode.name,
6937
                           "creating instance %s" % self.op.instance_name,
6938
                           self.be_full[constants.BE_MEMORY],
6939
                           self.op.hypervisor)
6940

    
6941
    self.dry_run_result = list(nodenames)
6942

    
6943
  def Exec(self, feedback_fn):
6944
    """Create and add the instance to the cluster.
6945

6946
    """
6947
    instance = self.op.instance_name
6948
    pnode_name = self.pnode.name
6949

    
6950
    ht_kind = self.op.hypervisor
6951
    if ht_kind in constants.HTS_REQ_PORT:
6952
      network_port = self.cfg.AllocatePort()
6953
    else:
6954
      network_port = None
6955

    
6956
    if constants.ENABLE_FILE_STORAGE:
6957
      # this is needed because os.path.join does not accept None arguments
6958
      if self.op.file_storage_dir is None:
6959
        string_file_storage_dir = ""
6960
      else:
6961
        string_file_storage_dir = self.op.file_storage_dir
6962

    
6963
      # build the full file storage dir path
6964
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6965
                                        string_file_storage_dir, instance)
6966
    else:
6967
      file_storage_dir = ""
6968

    
6969
    disks = _GenerateDiskTemplate(self,
6970
                                  self.op.disk_template,
6971
                                  instance, pnode_name,
6972
                                  self.secondaries,
6973
                                  self.disks,
6974
                                  file_storage_dir,
6975
                                  self.op.file_driver,
6976
                                  0)
6977

    
6978
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6979
                            primary_node=pnode_name,
6980
                            nics=self.nics, disks=disks,
6981
                            disk_template=self.op.disk_template,
6982
                            admin_up=False,
6983
                            network_port=network_port,
6984
                            beparams=self.op.beparams,
6985
                            hvparams=self.op.hvparams,
6986
                            hypervisor=self.op.hypervisor,
6987
                            osparams=self.op.osparams,
6988
                            )
6989

    
6990
    if self.adopt_disks:
6991
      # rename LVs to the newly-generated names; we need to construct
6992
      # 'fake' LV disks with the old data, plus the new unique_id
6993
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6994
      rename_to = []
6995
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6996
        rename_to.append(t_dsk.logical_id)
6997
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6998
        self.cfg.SetDiskID(t_dsk, pnode_name)
6999
      result = self.rpc.call_blockdev_rename(pnode_name,
7000
                                             zip(tmp_disks, rename_to))
7001
      result.Raise("Failed to rename adoped LVs")
7002
    else:
7003
      feedback_fn("* creating instance disks...")
7004
      try:
7005
        _CreateDisks(self, iobj)
7006
      except errors.OpExecError:
7007
        self.LogWarning("Device creation failed, reverting...")
7008
        try:
7009
          _RemoveDisks(self, iobj)
7010
        finally:
7011
          self.cfg.ReleaseDRBDMinors(instance)
7012
          raise
7013

    
7014
    feedback_fn("adding instance %s to cluster config" % instance)
7015

    
7016
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7017

    
7018
    # Declare that we don't want to remove the instance lock anymore, as we've
7019
    # added the instance to the config
7020
    del self.remove_locks[locking.LEVEL_INSTANCE]
7021
    # Unlock all the nodes
7022
    if self.op.mode == constants.INSTANCE_IMPORT:
7023
      nodes_keep = [self.op.src_node]
7024
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7025
                       if node != self.op.src_node]
7026
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7027
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7028
    else:
7029
      self.context.glm.release(locking.LEVEL_NODE)
7030
      del self.acquired_locks[locking.LEVEL_NODE]
7031

    
7032
    if self.op.wait_for_sync:
7033
      disk_abort = not _WaitForSync(self, iobj)
7034
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7035
      # make sure the disks are not degraded (still sync-ing is ok)
7036
      time.sleep(15)
7037
      feedback_fn("* checking mirrors status")
7038
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7039
    else:
7040
      disk_abort = False
7041

    
7042
    if disk_abort:
7043
      _RemoveDisks(self, iobj)
7044
      self.cfg.RemoveInstance(iobj.name)
7045
      # Make sure the instance lock gets removed
7046
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7047
      raise errors.OpExecError("There are some degraded disks for"
7048
                               " this instance")
7049

    
7050
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7051
      if self.op.mode == constants.INSTANCE_CREATE:
7052
        if not self.op.no_install:
7053
          feedback_fn("* running the instance OS create scripts...")
7054
          # FIXME: pass debug option from opcode to backend
7055
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7056
                                                 self.op.debug_level)
7057
          result.Raise("Could not add os for instance %s"
7058
                       " on node %s" % (instance, pnode_name))
7059

    
7060
      elif self.op.mode == constants.INSTANCE_IMPORT:
7061
        feedback_fn("* running the instance OS import scripts...")
7062

    
7063
        transfers = []
7064

    
7065
        for idx, image in enumerate(self.src_images):
7066
          if not image:
7067
            continue
7068

    
7069
          # FIXME: pass debug option from opcode to backend
7070
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7071
                                             constants.IEIO_FILE, (image, ),
7072
                                             constants.IEIO_SCRIPT,
7073
                                             (iobj.disks[idx], idx),
7074
                                             None)
7075
          transfers.append(dt)
7076

    
7077
        import_result = \
7078
          masterd.instance.TransferInstanceData(self, feedback_fn,
7079
                                                self.op.src_node, pnode_name,
7080
                                                self.pnode.secondary_ip,
7081
                                                iobj, transfers)
7082
        if not compat.all(import_result):
7083
          self.LogWarning("Some disks for instance %s on node %s were not"
7084
                          " imported successfully" % (instance, pnode_name))
7085

    
7086
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7087
        feedback_fn("* preparing remote import...")
7088
        connect_timeout = constants.RIE_CONNECT_TIMEOUT
7089
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7090

    
7091
        disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7092
                                                     self.source_x509_ca,
7093
                                                     self._cds, timeouts)
7094
        if not compat.all(disk_results):
7095
          # TODO: Should the instance still be started, even if some disks
7096
          # failed to import (valid for local imports, too)?
7097
          self.LogWarning("Some disks for instance %s on node %s were not"
7098
                          " imported successfully" % (instance, pnode_name))
7099

    
7100
        # Run rename script on newly imported instance
7101
        assert iobj.name == instance
7102
        feedback_fn("Running rename script for %s" % instance)
7103
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7104
                                                   self.source_instance_name,
7105
                                                   self.op.debug_level)
7106
        if result.fail_msg:
7107
          self.LogWarning("Failed to run rename script for %s on node"
7108
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7109

    
7110
      else:
7111
        # also checked in the prereq part
7112
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7113
                                     % self.op.mode)
7114

    
7115
    if self.op.start:
7116
      iobj.admin_up = True
7117
      self.cfg.Update(iobj, feedback_fn)
7118
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7119
      feedback_fn("* starting instance...")
7120
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7121
      result.Raise("Could not start instance")
7122

    
7123
    return list(iobj.all_nodes)
7124

    
7125

    
7126
class LUConnectConsole(NoHooksLU):
7127
  """Connect to an instance's console.
7128

7129
  This is somewhat special in that it returns the command line that
7130
  you need to run on the master node in order to connect to the
7131
  console.
7132

7133
  """
7134
  _OP_REQP = ["instance_name"]
7135
  REQ_BGL = False
7136

    
7137
  def ExpandNames(self):
7138
    self._ExpandAndLockInstance()
7139

    
7140
  def CheckPrereq(self):
7141
    """Check prerequisites.
7142

7143
    This checks that the instance is in the cluster.
7144

7145
    """
7146
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7147
    assert self.instance is not None, \
7148
      "Cannot retrieve locked instance %s" % self.op.instance_name
7149
    _CheckNodeOnline(self, self.instance.primary_node)
7150

    
7151
  def Exec(self, feedback_fn):
7152
    """Connect to the console of an instance
7153

7154
    """
7155
    instance = self.instance
7156
    node = instance.primary_node
7157

    
7158
    node_insts = self.rpc.call_instance_list([node],
7159
                                             [instance.hypervisor])[node]
7160
    node_insts.Raise("Can't get node information from %s" % node)
7161

    
7162
    if instance.name not in node_insts.payload:
7163
      raise errors.OpExecError("Instance %s is not running." % instance.name)
7164

    
7165
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7166

    
7167
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7168
    cluster = self.cfg.GetClusterInfo()
7169
    # beparams and hvparams are passed separately, to avoid editing the
7170
    # instance and then saving the defaults in the instance itself.
7171
    hvparams = cluster.FillHV(instance)
7172
    beparams = cluster.FillBE(instance)
7173
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7174

    
7175
    # build ssh cmdline
7176
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7177

    
7178

    
7179
class LUReplaceDisks(LogicalUnit):
7180
  """Replace the disks of an instance.
7181

7182
  """
7183
  HPATH = "mirrors-replace"
7184
  HTYPE = constants.HTYPE_INSTANCE
7185
  _OP_REQP = ["instance_name", "mode", "disks"]
7186
  _OP_DEFS = [
7187
    ("remote_node", None),
7188
    ("iallocator", None),
7189
    ("early_release", None),
7190
    ]
7191
  REQ_BGL = False
7192

    
7193
  def CheckArguments(self):
7194
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7195
                                  self.op.iallocator)
7196

    
7197
  def ExpandNames(self):
7198
    self._ExpandAndLockInstance()
7199

    
7200
    if self.op.iallocator is not None:
7201
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7202

    
7203
    elif self.op.remote_node is not None:
7204
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7205
      self.op.remote_node = remote_node
7206

    
7207
      # Warning: do not remove the locking of the new secondary here
7208
      # unless DRBD8.AddChildren is changed to work in parallel;
7209
      # currently it doesn't since parallel invocations of
7210
      # FindUnusedMinor will conflict
7211
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7212
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7213

    
7214
    else:
7215
      self.needed_locks[locking.LEVEL_NODE] = []
7216
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7217

    
7218
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7219
                                   self.op.iallocator, self.op.remote_node,
7220
                                   self.op.disks, False, self.op.early_release)
7221

    
7222
    self.tasklets = [self.replacer]
7223

    
7224
  def DeclareLocks(self, level):
7225
    # If we're not already locking all nodes in the set we have to declare the
7226
    # instance's primary/secondary nodes.
7227
    if (level == locking.LEVEL_NODE and
7228
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7229
      self._LockInstancesNodes()
7230

    
7231
  def BuildHooksEnv(self):
7232
    """Build hooks env.
7233

7234
    This runs on the master, the primary and all the secondaries.
7235

7236
    """
7237
    instance = self.replacer.instance
7238
    env = {
7239
      "MODE": self.op.mode,
7240
      "NEW_SECONDARY": self.op.remote_node,
7241
      "OLD_SECONDARY": instance.secondary_nodes[0],
7242
      }
7243
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7244
    nl = [
7245
      self.cfg.GetMasterNode(),
7246
      instance.primary_node,
7247
      ]
7248
    if self.op.remote_node is not None:
7249
      nl.append(self.op.remote_node)
7250
    return env, nl, nl
7251

    
7252

    
7253
class TLReplaceDisks(Tasklet):
7254
  """Replaces disks for an instance.
7255

7256
  Note: Locking is not within the scope of this class.
7257

7258
  """
7259
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7260
               disks, delay_iallocator, early_release):
7261
    """Initializes this class.
7262

7263
    """
7264
    Tasklet.__init__(self, lu)
7265

    
7266
    # Parameters
7267
    self.instance_name = instance_name
7268
    self.mode = mode
7269
    self.iallocator_name = iallocator_name
7270
    self.remote_node = remote_node
7271
    self.disks = disks
7272
    self.delay_iallocator = delay_iallocator
7273
    self.early_release = early_release
7274

    
7275
    # Runtime data
7276
    self.instance = None
7277
    self.new_node = None
7278
    self.target_node = None
7279
    self.other_node = None
7280
    self.remote_node_info = None
7281
    self.node_secondary_ip = None
7282

    
7283
  @staticmethod
7284
  def CheckArguments(mode, remote_node, iallocator):
7285
    """Helper function for users of this class.
7286

7287
    """
7288
    # check for valid parameter combination
7289
    if mode == constants.REPLACE_DISK_CHG:
7290
      if remote_node is None and iallocator is None:
7291
        raise errors.OpPrereqError("When changing the secondary either an"
7292
                                   " iallocator script must be used or the"
7293
                                   " new node given", errors.ECODE_INVAL)
7294

    
7295
      if remote_node is not None and iallocator is not None:
7296
        raise errors.OpPrereqError("Give either the iallocator or the new"
7297
                                   " secondary, not both", errors.ECODE_INVAL)
7298

    
7299
    elif remote_node is not None or iallocator is not None:
7300
      # Not replacing the secondary
7301
      raise errors.OpPrereqError("The iallocator and new node options can"
7302
                                 " only be used when changing the"
7303
                                 " secondary node", errors.ECODE_INVAL)
7304

    
7305
  @staticmethod
7306
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7307
    """Compute a new secondary node using an IAllocator.
7308

7309
    """
7310
    ial = IAllocator(lu.cfg, lu.rpc,
7311
                     mode=constants.IALLOCATOR_MODE_RELOC,
7312
                     name=instance_name,
7313
                     relocate_from=relocate_from)
7314

    
7315
    ial.Run(iallocator_name)
7316

    
7317
    if not ial.success:
7318
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7319
                                 " %s" % (iallocator_name, ial.info),
7320
                                 errors.ECODE_NORES)
7321

    
7322
    if len(ial.result) != ial.required_nodes:
7323
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7324
                                 " of nodes (%s), required %s" %
7325
                                 (iallocator_name,
7326
                                  len(ial.result), ial.required_nodes),
7327
                                 errors.ECODE_FAULT)
7328

    
7329
    remote_node_name = ial.result[0]
7330

    
7331
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7332
               instance_name, remote_node_name)
7333

    
7334
    return remote_node_name
7335

    
7336
  def _FindFaultyDisks(self, node_name):
7337
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7338
                                    node_name, True)
7339

    
7340
  def CheckPrereq(self):
7341
    """Check prerequisites.
7342

7343
    This checks that the instance is in the cluster.
7344

7345
    """
7346
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7347
    assert instance is not None, \
7348
      "Cannot retrieve locked instance %s" % self.instance_name
7349

    
7350
    if instance.disk_template != constants.DT_DRBD8:
7351
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7352
                                 " instances", errors.ECODE_INVAL)
7353

    
7354
    if len(instance.secondary_nodes) != 1:
7355
      raise errors.OpPrereqError("The instance has a strange layout,"
7356
                                 " expected one secondary but found %d" %
7357
                                 len(instance.secondary_nodes),
7358
                                 errors.ECODE_FAULT)
7359

    
7360
    if not self.delay_iallocator:
7361
      self._CheckPrereq2()
7362

    
7363
  def _CheckPrereq2(self):
7364
    """Check prerequisites, second part.
7365

7366
    This function should always be part of CheckPrereq. It was separated and is
7367
    now called from Exec because during node evacuation iallocator was only
7368
    called with an unmodified cluster model, not taking planned changes into
7369
    account.
7370

7371
    """
7372
    instance = self.instance
7373
    secondary_node = instance.secondary_nodes[0]
7374

    
7375
    if self.iallocator_name is None:
7376
      remote_node = self.remote_node
7377
    else:
7378
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7379
                                       instance.name, instance.secondary_nodes)
7380

    
7381
    if remote_node is not None:
7382
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7383
      assert self.remote_node_info is not None, \
7384
        "Cannot retrieve locked node %s" % remote_node
7385
    else:
7386
      self.remote_node_info = None
7387

    
7388
    if remote_node == self.instance.primary_node:
7389
      raise errors.OpPrereqError("The specified node is the primary node of"
7390
                                 " the instance.", errors.ECODE_INVAL)
7391

    
7392
    if remote_node == secondary_node:
7393
      raise errors.OpPrereqError("The specified node is already the"
7394
                                 " secondary node of the instance.",
7395
                                 errors.ECODE_INVAL)
7396

    
7397
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7398
                                    constants.REPLACE_DISK_CHG):
7399
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7400
                                 errors.ECODE_INVAL)
7401

    
7402
    if self.mode == constants.REPLACE_DISK_AUTO:
7403
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7404
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7405

    
7406
      if faulty_primary and faulty_secondary:
7407
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7408
                                   " one node and can not be repaired"
7409
                                   " automatically" % self.instance_name,
7410
                                   errors.ECODE_STATE)
7411

    
7412
      if faulty_primary:
7413
        self.disks = faulty_primary
7414
        self.target_node = instance.primary_node
7415
        self.other_node = secondary_node
7416
        check_nodes = [self.target_node, self.other_node]
7417
      elif faulty_secondary:
7418
        self.disks = faulty_secondary
7419
        self.target_node = secondary_node
7420
        self.other_node = instance.primary_node
7421
        check_nodes = [self.target_node, self.other_node]
7422
      else:
7423
        self.disks = []
7424
        check_nodes = []
7425

    
7426
    else:
7427
      # Non-automatic modes
7428
      if self.mode == constants.REPLACE_DISK_PRI:
7429
        self.target_node = instance.primary_node
7430
        self.other_node = secondary_node
7431
        check_nodes = [self.target_node, self.other_node]
7432

    
7433
      elif self.mode == constants.REPLACE_DISK_SEC:
7434
        self.target_node = secondary_node
7435
        self.other_node = instance.primary_node
7436
        check_nodes = [self.target_node, self.other_node]
7437

    
7438
      elif self.mode == constants.REPLACE_DISK_CHG:
7439
        self.new_node = remote_node
7440
        self.other_node = instance.primary_node
7441
        self.target_node = secondary_node
7442
        check_nodes = [self.new_node, self.other_node]
7443

    
7444
        _CheckNodeNotDrained(self.lu, remote_node)
7445

    
7446
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7447
        assert old_node_info is not None
7448
        if old_node_info.offline and not self.early_release:
7449
          # doesn't make sense to delay the release
7450
          self.early_release = True
7451
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7452
                          " early-release mode", secondary_node)
7453

    
7454
      else:
7455
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7456
                                     self.mode)
7457

    
7458
      # If not specified all disks should be replaced
7459
      if not self.disks:
7460
        self.disks = range(len(self.instance.disks))
7461

    
7462
    for node in check_nodes:
7463
      _CheckNodeOnline(self.lu, node)
7464

    
7465
    # Check whether disks are valid
7466
    for disk_idx in self.disks:
7467
      instance.FindDisk(disk_idx)
7468

    
7469
    # Get secondary node IP addresses
7470
    node_2nd_ip = {}
7471

    
7472
    for node_name in [self.target_node, self.other_node, self.new_node]:
7473
      if node_name is not None:
7474
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7475

    
7476
    self.node_secondary_ip = node_2nd_ip
7477

    
7478
  def Exec(self, feedback_fn):
7479
    """Execute disk replacement.
7480

7481
    This dispatches the disk replacement to the appropriate handler.
7482

7483
    """
7484
    if self.delay_iallocator:
7485
      self._CheckPrereq2()
7486

    
7487
    if not self.disks:
7488
      feedback_fn("No disks need replacement")
7489
      return
7490

    
7491
    feedback_fn("Replacing disk(s) %s for %s" %
7492
                (utils.CommaJoin(self.disks), self.instance.name))
7493

    
7494
    activate_disks = (not self.instance.admin_up)
7495

    
7496
    # Activate the instance disks if we're replacing them on a down instance
7497
    if activate_disks:
7498
      _StartInstanceDisks(self.lu, self.instance, True)
7499

    
7500
    try:
7501
      # Should we replace the secondary node?
7502
      if self.new_node is not None:
7503
        fn = self._ExecDrbd8Secondary
7504
      else:
7505
        fn = self._ExecDrbd8DiskOnly
7506

    
7507
      return fn(feedback_fn)
7508

    
7509
    finally:
7510
      # Deactivate the instance disks if we're replacing them on a
7511
      # down instance
7512
      if activate_disks:
7513
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7514

    
7515
  def _CheckVolumeGroup(self, nodes):
7516
    self.lu.LogInfo("Checking volume groups")
7517

    
7518
    vgname = self.cfg.GetVGName()
7519

    
7520
    # Make sure volume group exists on all involved nodes
7521
    results = self.rpc.call_vg_list(nodes)
7522
    if not results:
7523
      raise errors.OpExecError("Can't list volume groups on the nodes")
7524

    
7525
    for node in nodes:
7526
      res = results[node]
7527
      res.Raise("Error checking node %s" % node)
7528
      if vgname not in res.payload:
7529
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7530
                                 (vgname, node))
7531

    
7532
  def _CheckDisksExistence(self, nodes):
7533
    # Check disk existence
7534
    for idx, dev in enumerate(self.instance.disks):
7535
      if idx not in self.disks:
7536
        continue
7537

    
7538
      for node in nodes:
7539
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7540
        self.cfg.SetDiskID(dev, node)
7541

    
7542
        result = self.rpc.call_blockdev_find(node, dev)
7543

    
7544
        msg = result.fail_msg
7545
        if msg or not result.payload:
7546
          if not msg:
7547
            msg = "disk not found"
7548
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7549
                                   (idx, node, msg))
7550

    
7551
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7552
    for idx, dev in enumerate(self.instance.disks):
7553
      if idx not in self.disks:
7554
        continue
7555

    
7556
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7557
                      (idx, node_name))
7558

    
7559
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7560
                                   ldisk=ldisk):
7561
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7562
                                 " replace disks for instance %s" %
7563
                                 (node_name, self.instance.name))
7564

    
7565
  def _CreateNewStorage(self, node_name):
7566
    vgname = self.cfg.GetVGName()
7567
    iv_names = {}
7568

    
7569
    for idx, dev in enumerate(self.instance.disks):
7570
      if idx not in self.disks:
7571
        continue
7572

    
7573
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7574

    
7575
      self.cfg.SetDiskID(dev, node_name)
7576

    
7577
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7578
      names = _GenerateUniqueNames(self.lu, lv_names)
7579

    
7580
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7581
                             logical_id=(vgname, names[0]))
7582
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7583
                             logical_id=(vgname, names[1]))
7584

    
7585
      new_lvs = [lv_data, lv_meta]
7586
      old_lvs = dev.children
7587
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7588

    
7589
      # we pass force_create=True to force the LVM creation
7590
      for new_lv in new_lvs:
7591
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7592
                        _GetInstanceInfoText(self.instance), False)
7593

    
7594
    return iv_names
7595

    
7596
  def _CheckDevices(self, node_name, iv_names):
7597
    for name, (dev, _, _) in iv_names.iteritems():
7598
      self.cfg.SetDiskID(dev, node_name)
7599

    
7600
      result = self.rpc.call_blockdev_find(node_name, dev)
7601

    
7602
      msg = result.fail_msg
7603
      if msg or not result.payload:
7604
        if not msg:
7605
          msg = "disk not found"
7606
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7607
                                 (name, msg))
7608

    
7609
      if result.payload.is_degraded:
7610
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7611

    
7612
  def _RemoveOldStorage(self, node_name, iv_names):
7613
    for name, (_, old_lvs, _) in iv_names.iteritems():
7614
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7615

    
7616
      for lv in old_lvs:
7617
        self.cfg.SetDiskID(lv, node_name)
7618

    
7619
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7620
        if msg:
7621
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7622
                             hint="remove unused LVs manually")
7623

    
7624
  def _ReleaseNodeLock(self, node_name):
7625
    """Releases the lock for a given node."""
7626
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7627

    
7628
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7629
    """Replace a disk on the primary or secondary for DRBD 8.
7630

7631
    The algorithm for replace is quite complicated:
7632

7633
      1. for each disk to be replaced:
7634

7635
        1. create new LVs on the target node with unique names
7636
        1. detach old LVs from the drbd device
7637
        1. rename old LVs to name_replaced.<time_t>
7638
        1. rename new LVs to old LVs
7639
        1. attach the new LVs (with the old names now) to the drbd device
7640

7641
      1. wait for sync across all devices
7642

7643
      1. for each modified disk:
7644

7645
        1. remove old LVs (which have the name name_replaces.<time_t>)
7646

7647
    Failures are not very well handled.
7648

7649
    """
7650
    steps_total = 6
7651

    
7652
    # Step: check device activation
7653
    self.lu.LogStep(1, steps_total, "Check device existence")
7654
    self._CheckDisksExistence([self.other_node, self.target_node])
7655
    self._CheckVolumeGroup([self.target_node, self.other_node])
7656

    
7657
    # Step: check other node consistency
7658
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7659
    self._CheckDisksConsistency(self.other_node,
7660
                                self.other_node == self.instance.primary_node,
7661
                                False)
7662

    
7663
    # Step: create new storage
7664
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7665
    iv_names = self._CreateNewStorage(self.target_node)
7666

    
7667
    # Step: for each lv, detach+rename*2+attach
7668
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7669
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7670
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7671

    
7672
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7673
                                                     old_lvs)
7674
      result.Raise("Can't detach drbd from local storage on node"
7675
                   " %s for device %s" % (self.target_node, dev.iv_name))
7676
      #dev.children = []
7677
      #cfg.Update(instance)
7678

    
7679
      # ok, we created the new LVs, so now we know we have the needed
7680
      # storage; as such, we proceed on the target node to rename
7681
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7682
      # using the assumption that logical_id == physical_id (which in
7683
      # turn is the unique_id on that node)
7684

    
7685
      # FIXME(iustin): use a better name for the replaced LVs
7686
      temp_suffix = int(time.time())
7687
      ren_fn = lambda d, suff: (d.physical_id[0],
7688
                                d.physical_id[1] + "_replaced-%s" % suff)
7689

    
7690
      # Build the rename list based on what LVs exist on the node
7691
      rename_old_to_new = []
7692
      for to_ren in old_lvs:
7693
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7694
        if not result.fail_msg and result.payload:
7695
          # device exists
7696
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7697

    
7698
      self.lu.LogInfo("Renaming the old LVs on the target node")
7699
      result = self.rpc.call_blockdev_rename(self.target_node,
7700
                                             rename_old_to_new)
7701
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7702

    
7703
      # Now we rename the new LVs to the old LVs
7704
      self.lu.LogInfo("Renaming the new LVs on the target node")
7705
      rename_new_to_old = [(new, old.physical_id)
7706
                           for old, new in zip(old_lvs, new_lvs)]
7707
      result = self.rpc.call_blockdev_rename(self.target_node,
7708
                                             rename_new_to_old)
7709
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7710

    
7711
      for old, new in zip(old_lvs, new_lvs):
7712
        new.logical_id = old.logical_id
7713
        self.cfg.SetDiskID(new, self.target_node)
7714

    
7715
      for disk in old_lvs:
7716
        disk.logical_id = ren_fn(disk, temp_suffix)
7717
        self.cfg.SetDiskID(disk, self.target_node)
7718

    
7719
      # Now that the new lvs have the old name, we can add them to the device
7720
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7721
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7722
                                                  new_lvs)
7723
      msg = result.fail_msg
7724
      if msg:
7725
        for new_lv in new_lvs:
7726
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7727
                                               new_lv).fail_msg
7728
          if msg2:
7729
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7730
                               hint=("cleanup manually the unused logical"
7731
                                     "volumes"))
7732
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7733

    
7734
      dev.children = new_lvs
7735

    
7736
      self.cfg.Update(self.instance, feedback_fn)
7737

    
7738
    cstep = 5
7739
    if self.early_release:
7740
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7741
      cstep += 1
7742
      self._RemoveOldStorage(self.target_node, iv_names)
7743
      # WARNING: we release both node locks here, do not do other RPCs
7744
      # than WaitForSync to the primary node
7745
      self._ReleaseNodeLock([self.target_node, self.other_node])
7746

    
7747
    # Wait for sync
7748
    # This can fail as the old devices are degraded and _WaitForSync
7749
    # does a combined result over all disks, so we don't check its return value
7750
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7751
    cstep += 1
7752
    _WaitForSync(self.lu, self.instance)
7753

    
7754
    # Check all devices manually
7755
    self._CheckDevices(self.instance.primary_node, iv_names)
7756

    
7757
    # Step: remove old storage
7758
    if not self.early_release:
7759
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7760
      cstep += 1
7761
      self._RemoveOldStorage(self.target_node, iv_names)
7762

    
7763
  def _ExecDrbd8Secondary(self, feedback_fn):
7764
    """Replace the secondary node for DRBD 8.
7765

7766
    The algorithm for replace is quite complicated:
7767
      - for all disks of the instance:
7768
        - create new LVs on the new node with same names
7769
        - shutdown the drbd device on the old secondary
7770
        - disconnect the drbd network on the primary
7771
        - create the drbd device on the new secondary
7772
        - network attach the drbd on the primary, using an artifice:
7773
          the drbd code for Attach() will connect to the network if it
7774
          finds a device which is connected to the good local disks but
7775
          not network enabled
7776
      - wait for sync across all devices
7777
      - remove all disks from the old secondary
7778

7779
    Failures are not very well handled.
7780

7781
    """
7782
    steps_total = 6
7783

    
7784
    # Step: check device activation
7785
    self.lu.LogStep(1, steps_total, "Check device existence")
7786
    self._CheckDisksExistence([self.instance.primary_node])
7787
    self._CheckVolumeGroup([self.instance.primary_node])
7788

    
7789
    # Step: check other node consistency
7790
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7791
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7792

    
7793
    # Step: create new storage
7794
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7795
    for idx, dev in enumerate(self.instance.disks):
7796
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7797
                      (self.new_node, idx))
7798
      # we pass force_create=True to force LVM creation
7799
      for new_lv in dev.children:
7800
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7801
                        _GetInstanceInfoText(self.instance), False)
7802

    
7803
    # Step 4: dbrd minors and drbd setups changes
7804
    # after this, we must manually remove the drbd minors on both the
7805
    # error and the success paths
7806
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7807
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7808
                                         for dev in self.instance.disks],
7809
                                        self.instance.name)
7810
    logging.debug("Allocated minors %r", minors)
7811

    
7812
    iv_names = {}
7813
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7814
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7815
                      (self.new_node, idx))
7816
      # create new devices on new_node; note that we create two IDs:
7817
      # one without port, so the drbd will be activated without
7818
      # networking information on the new node at this stage, and one
7819
      # with network, for the latter activation in step 4
7820
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7821
      if self.instance.primary_node == o_node1:
7822
        p_minor = o_minor1
7823
      else:
7824
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7825
        p_minor = o_minor2
7826

    
7827
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7828
                      p_minor, new_minor, o_secret)
7829
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7830
                    p_minor, new_minor, o_secret)
7831

    
7832
      iv_names[idx] = (dev, dev.children, new_net_id)
7833
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7834
                    new_net_id)
7835
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7836
                              logical_id=new_alone_id,
7837
                              children=dev.children,
7838
                              size=dev.size)
7839
      try:
7840
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7841
                              _GetInstanceInfoText(self.instance), False)
7842
      except errors.GenericError:
7843
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7844
        raise
7845

    
7846
    # We have new devices, shutdown the drbd on the old secondary
7847
    for idx, dev in enumerate(self.instance.disks):
7848
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7849
      self.cfg.SetDiskID(dev, self.target_node)
7850
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7851
      if msg:
7852
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7853
                           "node: %s" % (idx, msg),
7854
                           hint=("Please cleanup this device manually as"
7855
                                 " soon as possible"))
7856

    
7857
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7858
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7859
                                               self.node_secondary_ip,
7860
                                               self.instance.disks)\
7861
                                              [self.instance.primary_node]
7862

    
7863
    msg = result.fail_msg
7864
    if msg:
7865
      # detaches didn't succeed (unlikely)
7866
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7867
      raise errors.OpExecError("Can't detach the disks from the network on"
7868
                               " old node: %s" % (msg,))
7869

    
7870
    # if we managed to detach at least one, we update all the disks of
7871
    # the instance to point to the new secondary
7872
    self.lu.LogInfo("Updating instance configuration")
7873
    for dev, _, new_logical_id in iv_names.itervalues():
7874
      dev.logical_id = new_logical_id
7875
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7876

    
7877
    self.cfg.Update(self.instance, feedback_fn)
7878

    
7879
    # and now perform the drbd attach
7880
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7881
                    " (standalone => connected)")
7882
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7883
                                            self.new_node],
7884
                                           self.node_secondary_ip,
7885
                                           self.instance.disks,
7886
                                           self.instance.name,
7887
                                           False)
7888
    for to_node, to_result in result.items():
7889
      msg = to_result.fail_msg
7890
      if msg:
7891
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7892
                           to_node, msg,
7893
                           hint=("please do a gnt-instance info to see the"
7894
                                 " status of disks"))
7895
    cstep = 5
7896
    if self.early_release:
7897
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7898
      cstep += 1
7899
      self._RemoveOldStorage(self.target_node, iv_names)
7900
      # WARNING: we release all node locks here, do not do other RPCs
7901
      # than WaitForSync to the primary node
7902
      self._ReleaseNodeLock([self.instance.primary_node,
7903
                             self.target_node,
7904
                             self.new_node])
7905

    
7906
    # Wait for sync
7907
    # This can fail as the old devices are degraded and _WaitForSync
7908
    # does a combined result over all disks, so we don't check its return value
7909
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7910
    cstep += 1
7911
    _WaitForSync(self.lu, self.instance)
7912

    
7913
    # Check all devices manually
7914
    self._CheckDevices(self.instance.primary_node, iv_names)
7915

    
7916
    # Step: remove old storage
7917
    if not self.early_release:
7918
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7919
      self._RemoveOldStorage(self.target_node, iv_names)
7920

    
7921

    
7922
class LURepairNodeStorage(NoHooksLU):
7923
  """Repairs the volume group on a node.
7924

7925
  """
7926
  _OP_REQP = ["node_name"]
7927
  REQ_BGL = False
7928

    
7929
  def CheckArguments(self):
7930
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7931

    
7932
    _CheckStorageType(self.op.storage_type)
7933

    
7934
  def ExpandNames(self):
7935
    self.needed_locks = {
7936
      locking.LEVEL_NODE: [self.op.node_name],
7937
      }
7938

    
7939
  def _CheckFaultyDisks(self, instance, node_name):
7940
    """Ensure faulty disks abort the opcode or at least warn."""
7941
    try:
7942
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7943
                                  node_name, True):
7944
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7945
                                   " node '%s'" % (instance.name, node_name),
7946
                                   errors.ECODE_STATE)
7947
    except errors.OpPrereqError, err:
7948
      if self.op.ignore_consistency:
7949
        self.proc.LogWarning(str(err.args[0]))
7950
      else:
7951
        raise
7952

    
7953
  def CheckPrereq(self):
7954
    """Check prerequisites.
7955

7956
    """
7957
    storage_type = self.op.storage_type
7958

    
7959
    if (constants.SO_FIX_CONSISTENCY not in
7960
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7961
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7962
                                 " repaired" % storage_type,
7963
                                 errors.ECODE_INVAL)
7964

    
7965
    # Check whether any instance on this node has faulty disks
7966
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7967
      if not inst.admin_up:
7968
        continue
7969
      check_nodes = set(inst.all_nodes)
7970
      check_nodes.discard(self.op.node_name)
7971
      for inst_node_name in check_nodes:
7972
        self._CheckFaultyDisks(inst, inst_node_name)
7973

    
7974
  def Exec(self, feedback_fn):
7975
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7976
                (self.op.name, self.op.node_name))
7977

    
7978
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7979
    result = self.rpc.call_storage_execute(self.op.node_name,
7980
                                           self.op.storage_type, st_args,
7981
                                           self.op.name,
7982
                                           constants.SO_FIX_CONSISTENCY)
7983
    result.Raise("Failed to repair storage unit '%s' on %s" %
7984
                 (self.op.name, self.op.node_name))
7985

    
7986

    
7987
class LUNodeEvacuationStrategy(NoHooksLU):
7988
  """Computes the node evacuation strategy.
7989

7990
  """
7991
  _OP_REQP = ["nodes"]
7992
  _OP_DEFS = [
7993
    ("remote_node", None),
7994
    ("iallocator", None),
7995
    ]
7996
  REQ_BGL = False
7997

    
7998
  def CheckArguments(self):
7999
    if self.op.remote_node is not None and self.op.iallocator is not None:
8000
      raise errors.OpPrereqError("Give either the iallocator or the new"
8001
                                 " secondary, not both", errors.ECODE_INVAL)
8002

    
8003
  def ExpandNames(self):
8004
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8005
    self.needed_locks = locks = {}
8006
    if self.op.remote_node is None:
8007
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8008
    else:
8009
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8010
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8011

    
8012
  def Exec(self, feedback_fn):
8013
    if self.op.remote_node is not None:
8014
      instances = []
8015
      for node in self.op.nodes:
8016
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8017
      result = []
8018
      for i in instances:
8019
        if i.primary_node == self.op.remote_node:
8020
          raise errors.OpPrereqError("Node %s is the primary node of"
8021
                                     " instance %s, cannot use it as"
8022
                                     " secondary" %
8023
                                     (self.op.remote_node, i.name),
8024
                                     errors.ECODE_INVAL)
8025
        result.append([i.name, self.op.remote_node])
8026
    else:
8027
      ial = IAllocator(self.cfg, self.rpc,
8028
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8029
                       evac_nodes=self.op.nodes)
8030
      ial.Run(self.op.iallocator, validate=True)
8031
      if not ial.success:
8032
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8033
                                 errors.ECODE_NORES)
8034
      result = ial.result
8035
    return result
8036

    
8037

    
8038
class LUGrowDisk(LogicalUnit):
8039
  """Grow a disk of an instance.
8040

8041
  """
8042
  HPATH = "disk-grow"
8043
  HTYPE = constants.HTYPE_INSTANCE
8044
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
8045
  REQ_BGL = False
8046

    
8047
  def ExpandNames(self):
8048
    self._ExpandAndLockInstance()
8049
    self.needed_locks[locking.LEVEL_NODE] = []
8050
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8051

    
8052
  def DeclareLocks(self, level):
8053
    if level == locking.LEVEL_NODE:
8054
      self._LockInstancesNodes()
8055

    
8056
  def BuildHooksEnv(self):
8057
    """Build hooks env.
8058

8059
    This runs on the master, the primary and all the secondaries.
8060

8061
    """
8062
    env = {
8063
      "DISK": self.op.disk,
8064
      "AMOUNT": self.op.amount,
8065
      }
8066
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8067
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8068
    return env, nl, nl
8069

    
8070
  def CheckPrereq(self):
8071
    """Check prerequisites.
8072

8073
    This checks that the instance is in the cluster.
8074

8075
    """
8076
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8077
    assert instance is not None, \
8078
      "Cannot retrieve locked instance %s" % self.op.instance_name
8079
    nodenames = list(instance.all_nodes)
8080
    for node in nodenames:
8081
      _CheckNodeOnline(self, node)
8082

    
8083

    
8084
    self.instance = instance
8085

    
8086
    if instance.disk_template not in constants.DTS_GROWABLE:
8087
      raise errors.OpPrereqError("Instance's disk layout does not support"
8088
                                 " growing.", errors.ECODE_INVAL)
8089

    
8090
    self.disk = instance.FindDisk(self.op.disk)
8091

    
8092
    if instance.disk_template != constants.DT_FILE:
8093
      # TODO: check the free disk space for file, when that feature will be
8094
      # supported
8095
      _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8096

    
8097
  def Exec(self, feedback_fn):
8098
    """Execute disk grow.
8099

8100
    """
8101
    instance = self.instance
8102
    disk = self.disk
8103

    
8104
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8105
    if not disks_ok:
8106
      raise errors.OpExecError("Cannot activate block device to grow")
8107

    
8108
    for node in instance.all_nodes:
8109
      self.cfg.SetDiskID(disk, node)
8110
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8111
      result.Raise("Grow request failed to node %s" % node)
8112

    
8113
      # TODO: Rewrite code to work properly
8114
      # DRBD goes into sync mode for a short amount of time after executing the
8115
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8116
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8117
      # time is a work-around.
8118
      time.sleep(5)
8119

    
8120
    disk.RecordGrow(self.op.amount)
8121
    self.cfg.Update(instance, feedback_fn)
8122
    if self.op.wait_for_sync:
8123
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8124
      if disk_abort:
8125
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8126
                             " status.\nPlease check the instance.")
8127
      if not instance.admin_up:
8128
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8129
    elif not instance.admin_up:
8130
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8131
                           " not supposed to be running because no wait for"
8132
                           " sync mode was requested.")
8133

    
8134

    
8135
class LUQueryInstanceData(NoHooksLU):
8136
  """Query runtime instance data.
8137

8138
  """
8139
  _OP_REQP = ["instances", "static"]
8140
  REQ_BGL = False
8141

    
8142
  def CheckArguments(self):
8143
    if not isinstance(self.op.instances, list):
8144
      raise errors.OpPrereqError("Invalid argument type 'instances'",
8145
                                 errors.ECODE_INVAL)
8146

    
8147
  def ExpandNames(self):
8148
    self.needed_locks = {}
8149
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8150

    
8151
    if self.op.instances:
8152
      self.wanted_names = []
8153
      for name in self.op.instances:
8154
        full_name = _ExpandInstanceName(self.cfg, name)
8155
        self.wanted_names.append(full_name)
8156
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8157
    else:
8158
      self.wanted_names = None
8159
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8160

    
8161
    self.needed_locks[locking.LEVEL_NODE] = []
8162
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8163

    
8164
  def DeclareLocks(self, level):
8165
    if level == locking.LEVEL_NODE:
8166
      self._LockInstancesNodes()
8167

    
8168
  def CheckPrereq(self):
8169
    """Check prerequisites.
8170

8171
    This only checks the optional instance list against the existing names.
8172

8173
    """
8174
    if self.wanted_names is None:
8175
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8176

    
8177
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8178
                             in self.wanted_names]
8179

    
8180
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8181
    """Returns the status of a block device
8182

8183
    """
8184
    if self.op.static or not node:
8185
      return None
8186

    
8187
    self.cfg.SetDiskID(dev, node)
8188

    
8189
    result = self.rpc.call_blockdev_find(node, dev)
8190
    if result.offline:
8191
      return None
8192

    
8193
    result.Raise("Can't compute disk status for %s" % instance_name)
8194

    
8195
    status = result.payload
8196
    if status is None:
8197
      return None
8198

    
8199
    return (status.dev_path, status.major, status.minor,
8200
            status.sync_percent, status.estimated_time,
8201
            status.is_degraded, status.ldisk_status)
8202

    
8203
  def _ComputeDiskStatus(self, instance, snode, dev):
8204
    """Compute block device status.
8205

8206
    """
8207
    if dev.dev_type in constants.LDS_DRBD:
8208
      # we change the snode then (otherwise we use the one passed in)
8209
      if dev.logical_id[0] == instance.primary_node:
8210
        snode = dev.logical_id[1]
8211
      else:
8212
        snode = dev.logical_id[0]
8213

    
8214
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8215
                                              instance.name, dev)
8216
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8217

    
8218
    if dev.children:
8219
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8220
                      for child in dev.children]
8221
    else:
8222
      dev_children = []
8223

    
8224
    data = {
8225
      "iv_name": dev.iv_name,
8226
      "dev_type": dev.dev_type,
8227
      "logical_id": dev.logical_id,
8228
      "physical_id": dev.physical_id,
8229
      "pstatus": dev_pstatus,
8230
      "sstatus": dev_sstatus,
8231
      "children": dev_children,
8232
      "mode": dev.mode,
8233
      "size": dev.size,
8234
      }
8235

    
8236
    return data
8237

    
8238
  def Exec(self, feedback_fn):
8239
    """Gather and return data"""
8240
    result = {}
8241

    
8242
    cluster = self.cfg.GetClusterInfo()
8243

    
8244
    for instance in self.wanted_instances:
8245
      if not self.op.static:
8246
        remote_info = self.rpc.call_instance_info(instance.primary_node,
8247
                                                  instance.name,
8248
                                                  instance.hypervisor)
8249
        remote_info.Raise("Error checking node %s" % instance.primary_node)
8250
        remote_info = remote_info.payload
8251
        if remote_info and "state" in remote_info:
8252
          remote_state = "up"
8253
        else:
8254
          remote_state = "down"
8255
      else:
8256
        remote_state = None
8257
      if instance.admin_up:
8258
        config_state = "up"
8259
      else:
8260
        config_state = "down"
8261

    
8262
      disks = [self._ComputeDiskStatus(instance, None, device)
8263
               for device in instance.disks]
8264

    
8265
      idict = {
8266
        "name": instance.name,
8267
        "config_state": config_state,
8268
        "run_state": remote_state,
8269
        "pnode": instance.primary_node,
8270
        "snodes": instance.secondary_nodes,
8271
        "os": instance.os,
8272
        # this happens to be the same format used for hooks
8273
        "nics": _NICListToTuple(self, instance.nics),
8274
        "disk_template": instance.disk_template,
8275
        "disks": disks,
8276
        "hypervisor": instance.hypervisor,
8277
        "network_port": instance.network_port,
8278
        "hv_instance": instance.hvparams,
8279
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
8280
        "be_instance": instance.beparams,
8281
        "be_actual": cluster.FillBE(instance),
8282
        "os_instance": instance.osparams,
8283
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8284
        "serial_no": instance.serial_no,
8285
        "mtime": instance.mtime,
8286
        "ctime": instance.ctime,
8287
        "uuid": instance.uuid,
8288
        }
8289

    
8290
      result[instance.name] = idict
8291

    
8292
    return result
8293

    
8294

    
8295
class LUSetInstanceParams(LogicalUnit):
8296
  """Modifies an instances's parameters.
8297

8298
  """
8299
  HPATH = "instance-modify"
8300
  HTYPE = constants.HTYPE_INSTANCE
8301
  _OP_REQP = ["instance_name"]
8302
  _OP_DEFS = [
8303
    ("nics", _EmptyList),
8304
    ("disks", _EmptyList),
8305
    ("beparams", _EmptyDict),
8306
    ("hvparams", _EmptyDict),
8307
    ("disk_template", None),
8308
    ("remote_node", None),
8309
    ("os_name", None),
8310
    ("force_variant", False),
8311
    ("osparams", None),
8312
    ("force", False),
8313
    ]
8314
  REQ_BGL = False
8315

    
8316
  def CheckArguments(self):
8317
    if not (self.op.nics or self.op.disks or self.op.disk_template or
8318
            self.op.hvparams or self.op.beparams or self.op.os_name):
8319
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8320

    
8321
    if self.op.hvparams:
8322
      _CheckGlobalHvParams(self.op.hvparams)
8323

    
8324
    # Disk validation
8325
    disk_addremove = 0
8326
    for disk_op, disk_dict in self.op.disks:
8327
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8328
      if disk_op == constants.DDM_REMOVE:
8329
        disk_addremove += 1
8330
        continue
8331
      elif disk_op == constants.DDM_ADD:
8332
        disk_addremove += 1
8333
      else:
8334
        if not isinstance(disk_op, int):
8335
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8336
        if not isinstance(disk_dict, dict):
8337
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8338
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8339

    
8340
      if disk_op == constants.DDM_ADD:
8341
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8342
        if mode not in constants.DISK_ACCESS_SET:
8343
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8344
                                     errors.ECODE_INVAL)
8345
        size = disk_dict.get('size', None)
8346
        if size is None:
8347
          raise errors.OpPrereqError("Required disk parameter size missing",
8348
                                     errors.ECODE_INVAL)
8349
        try:
8350
          size = int(size)
8351
        except (TypeError, ValueError), err:
8352
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8353
                                     str(err), errors.ECODE_INVAL)
8354
        disk_dict['size'] = size
8355
      else:
8356
        # modification of disk
8357
        if 'size' in disk_dict:
8358
          raise errors.OpPrereqError("Disk size change not possible, use"
8359
                                     " grow-disk", errors.ECODE_INVAL)
8360

    
8361
    if disk_addremove > 1:
8362
      raise errors.OpPrereqError("Only one disk add or remove operation"
8363
                                 " supported at a time", errors.ECODE_INVAL)
8364

    
8365
    if self.op.disks and self.op.disk_template is not None:
8366
      raise errors.OpPrereqError("Disk template conversion and other disk"
8367
                                 " changes not supported at the same time",
8368
                                 errors.ECODE_INVAL)
8369

    
8370
    if self.op.disk_template:
8371
      _CheckDiskTemplate(self.op.disk_template)
8372
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
8373
          self.op.remote_node is None):
8374
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
8375
                                   " one requires specifying a secondary node",
8376
                                   errors.ECODE_INVAL)
8377

    
8378
    # NIC validation
8379
    nic_addremove = 0
8380
    for nic_op, nic_dict in self.op.nics:
8381
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8382
      if nic_op == constants.DDM_REMOVE:
8383
        nic_addremove += 1
8384
        continue
8385
      elif nic_op == constants.DDM_ADD:
8386
        nic_addremove += 1
8387
      else:
8388
        if not isinstance(nic_op, int):
8389
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8390
        if not isinstance(nic_dict, dict):
8391
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8392
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8393

    
8394
      # nic_dict should be a dict
8395
      nic_ip = nic_dict.get('ip', None)
8396
      if nic_ip is not None:
8397
        if nic_ip.lower() == constants.VALUE_NONE:
8398
          nic_dict['ip'] = None
8399
        else:
8400
          if not utils.IsValidIP(nic_ip):
8401
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8402
                                       errors.ECODE_INVAL)
8403

    
8404
      nic_bridge = nic_dict.get('bridge', None)
8405
      nic_link = nic_dict.get('link', None)
8406
      if nic_bridge and nic_link:
8407
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8408
                                   " at the same time", errors.ECODE_INVAL)
8409
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8410
        nic_dict['bridge'] = None
8411
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8412
        nic_dict['link'] = None
8413

    
8414
      if nic_op == constants.DDM_ADD:
8415
        nic_mac = nic_dict.get('mac', None)
8416
        if nic_mac is None:
8417
          nic_dict['mac'] = constants.VALUE_AUTO
8418

    
8419
      if 'mac' in nic_dict:
8420
        nic_mac = nic_dict['mac']
8421
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8422
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8423

    
8424
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8425
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8426
                                     " modifying an existing nic",
8427
                                     errors.ECODE_INVAL)
8428

    
8429
    if nic_addremove > 1:
8430
      raise errors.OpPrereqError("Only one NIC add or remove operation"
8431
                                 " supported at a time", errors.ECODE_INVAL)
8432

    
8433
  def ExpandNames(self):
8434
    self._ExpandAndLockInstance()
8435
    self.needed_locks[locking.LEVEL_NODE] = []
8436
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8437

    
8438
  def DeclareLocks(self, level):
8439
    if level == locking.LEVEL_NODE:
8440
      self._LockInstancesNodes()
8441
      if self.op.disk_template and self.op.remote_node:
8442
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8443
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8444

    
8445
  def BuildHooksEnv(self):
8446
    """Build hooks env.
8447

8448
    This runs on the master, primary and secondaries.
8449

8450
    """
8451
    args = dict()
8452
    if constants.BE_MEMORY in self.be_new:
8453
      args['memory'] = self.be_new[constants.BE_MEMORY]
8454
    if constants.BE_VCPUS in self.be_new:
8455
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
8456
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8457
    # information at all.
8458
    if self.op.nics:
8459
      args['nics'] = []
8460
      nic_override = dict(self.op.nics)
8461
      for idx, nic in enumerate(self.instance.nics):
8462
        if idx in nic_override:
8463
          this_nic_override = nic_override[idx]
8464
        else:
8465
          this_nic_override = {}
8466
        if 'ip' in this_nic_override:
8467
          ip = this_nic_override['ip']
8468
        else:
8469
          ip = nic.ip
8470
        if 'mac' in this_nic_override:
8471
          mac = this_nic_override['mac']
8472
        else:
8473
          mac = nic.mac
8474
        if idx in self.nic_pnew:
8475
          nicparams = self.nic_pnew[idx]
8476
        else:
8477
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8478
        mode = nicparams[constants.NIC_MODE]
8479
        link = nicparams[constants.NIC_LINK]
8480
        args['nics'].append((ip, mac, mode, link))
8481
      if constants.DDM_ADD in nic_override:
8482
        ip = nic_override[constants.DDM_ADD].get('ip', None)
8483
        mac = nic_override[constants.DDM_ADD]['mac']
8484
        nicparams = self.nic_pnew[constants.DDM_ADD]
8485
        mode = nicparams[constants.NIC_MODE]
8486
        link = nicparams[constants.NIC_LINK]
8487
        args['nics'].append((ip, mac, mode, link))
8488
      elif constants.DDM_REMOVE in nic_override:
8489
        del args['nics'][-1]
8490

    
8491
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8492
    if self.op.disk_template:
8493
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8494
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8495
    return env, nl, nl
8496

    
8497
  def CheckPrereq(self):
8498
    """Check prerequisites.
8499

8500
    This only checks the instance list against the existing names.
8501

8502
    """
8503
    # checking the new params on the primary/secondary nodes
8504

    
8505
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8506
    cluster = self.cluster = self.cfg.GetClusterInfo()
8507
    assert self.instance is not None, \
8508
      "Cannot retrieve locked instance %s" % self.op.instance_name
8509
    pnode = instance.primary_node
8510
    nodelist = list(instance.all_nodes)
8511

    
8512
    # OS change
8513
    if self.op.os_name and not self.op.force:
8514
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8515
                      self.op.force_variant)
8516
      instance_os = self.op.os_name
8517
    else:
8518
      instance_os = instance.os
8519

    
8520
    if self.op.disk_template:
8521
      if instance.disk_template == self.op.disk_template:
8522
        raise errors.OpPrereqError("Instance already has disk template %s" %
8523
                                   instance.disk_template, errors.ECODE_INVAL)
8524

    
8525
      if (instance.disk_template,
8526
          self.op.disk_template) not in self._DISK_CONVERSIONS:
8527
        raise errors.OpPrereqError("Unsupported disk template conversion from"
8528
                                   " %s to %s" % (instance.disk_template,
8529
                                                  self.op.disk_template),
8530
                                   errors.ECODE_INVAL)
8531
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8532
        _CheckNodeOnline(self, self.op.remote_node)
8533
        _CheckNodeNotDrained(self, self.op.remote_node)
8534
        disks = [{"size": d.size} for d in instance.disks]
8535
        required = _ComputeDiskSize(self.op.disk_template, disks)
8536
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8537
        _CheckInstanceDown(self, instance, "cannot change disk template")
8538

    
8539
    # hvparams processing
8540
    if self.op.hvparams:
8541
      hv_type = instance.hypervisor
8542
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8543
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8544
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8545

    
8546
      # local check
8547
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8548
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8549
      self.hv_new = hv_new # the new actual values
8550
      self.hv_inst = i_hvdict # the new dict (without defaults)
8551
    else:
8552
      self.hv_new = self.hv_inst = {}
8553

    
8554
    # beparams processing
8555
    if self.op.beparams:
8556
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8557
                                   use_none=True)
8558
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8559
      be_new = cluster.SimpleFillBE(i_bedict)
8560
      self.be_new = be_new # the new actual values
8561
      self.be_inst = i_bedict # the new dict (without defaults)
8562
    else:
8563
      self.be_new = self.be_inst = {}
8564

    
8565
    # osparams processing
8566
    if self.op.osparams:
8567
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8568
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8569
      self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8570
      self.os_inst = i_osdict # the new dict (without defaults)
8571
    else:
8572
      self.os_new = self.os_inst = {}
8573

    
8574
    self.warn = []
8575

    
8576
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8577
      mem_check_list = [pnode]
8578
      if be_new[constants.BE_AUTO_BALANCE]:
8579
        # either we changed auto_balance to yes or it was from before
8580
        mem_check_list.extend(instance.secondary_nodes)
8581
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8582
                                                  instance.hypervisor)
8583
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8584
                                         instance.hypervisor)
8585
      pninfo = nodeinfo[pnode]
8586
      msg = pninfo.fail_msg
8587
      if msg:
8588
        # Assume the primary node is unreachable and go ahead
8589
        self.warn.append("Can't get info from primary node %s: %s" %
8590
                         (pnode,  msg))
8591
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8592
        self.warn.append("Node data from primary node %s doesn't contain"
8593
                         " free memory information" % pnode)
8594
      elif instance_info.fail_msg:
8595
        self.warn.append("Can't get instance runtime information: %s" %
8596
                        instance_info.fail_msg)
8597
      else:
8598
        if instance_info.payload:
8599
          current_mem = int(instance_info.payload['memory'])
8600
        else:
8601
          # Assume instance not running
8602
          # (there is a slight race condition here, but it's not very probable,
8603
          # and we have no other way to check)
8604
          current_mem = 0
8605
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8606
                    pninfo.payload['memory_free'])
8607
        if miss_mem > 0:
8608
          raise errors.OpPrereqError("This change will prevent the instance"
8609
                                     " from starting, due to %d MB of memory"
8610
                                     " missing on its primary node" % miss_mem,
8611
                                     errors.ECODE_NORES)
8612

    
8613
      if be_new[constants.BE_AUTO_BALANCE]:
8614
        for node, nres in nodeinfo.items():
8615
          if node not in instance.secondary_nodes:
8616
            continue
8617
          msg = nres.fail_msg
8618
          if msg:
8619
            self.warn.append("Can't get info from secondary node %s: %s" %
8620
                             (node, msg))
8621
          elif not isinstance(nres.payload.get('memory_free', None), int):
8622
            self.warn.append("Secondary node %s didn't return free"
8623
                             " memory information" % node)
8624
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8625
            self.warn.append("Not enough memory to failover instance to"
8626
                             " secondary node %s" % node)
8627

    
8628
    # NIC processing
8629
    self.nic_pnew = {}
8630
    self.nic_pinst = {}
8631
    for nic_op, nic_dict in self.op.nics:
8632
      if nic_op == constants.DDM_REMOVE:
8633
        if not instance.nics:
8634
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8635
                                     errors.ECODE_INVAL)
8636
        continue
8637
      if nic_op != constants.DDM_ADD:
8638
        # an existing nic
8639
        if not instance.nics:
8640
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8641
                                     " no NICs" % nic_op,
8642
                                     errors.ECODE_INVAL)
8643
        if nic_op < 0 or nic_op >= len(instance.nics):
8644
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8645
                                     " are 0 to %d" %
8646
                                     (nic_op, len(instance.nics) - 1),
8647
                                     errors.ECODE_INVAL)
8648
        old_nic_params = instance.nics[nic_op].nicparams
8649
        old_nic_ip = instance.nics[nic_op].ip
8650
      else:
8651
        old_nic_params = {}
8652
        old_nic_ip = None
8653

    
8654
      update_params_dict = dict([(key, nic_dict[key])
8655
                                 for key in constants.NICS_PARAMETERS
8656
                                 if key in nic_dict])
8657

    
8658
      if 'bridge' in nic_dict:
8659
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8660

    
8661
      new_nic_params = _GetUpdatedParams(old_nic_params,
8662
                                         update_params_dict)
8663
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8664
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8665
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8666
      self.nic_pinst[nic_op] = new_nic_params
8667
      self.nic_pnew[nic_op] = new_filled_nic_params
8668
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8669

    
8670
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8671
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8672
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8673
        if msg:
8674
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8675
          if self.op.force:
8676
            self.warn.append(msg)
8677
          else:
8678
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8679
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8680
        if 'ip' in nic_dict:
8681
          nic_ip = nic_dict['ip']
8682
        else:
8683
          nic_ip = old_nic_ip
8684
        if nic_ip is None:
8685
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8686
                                     ' on a routed nic', errors.ECODE_INVAL)
8687
      if 'mac' in nic_dict:
8688
        nic_mac = nic_dict['mac']
8689
        if nic_mac is None:
8690
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8691
                                     errors.ECODE_INVAL)
8692
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8693
          # otherwise generate the mac
8694
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8695
        else:
8696
          # or validate/reserve the current one
8697
          try:
8698
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8699
          except errors.ReservationError:
8700
            raise errors.OpPrereqError("MAC address %s already in use"
8701
                                       " in cluster" % nic_mac,
8702
                                       errors.ECODE_NOTUNIQUE)
8703

    
8704
    # DISK processing
8705
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8706
      raise errors.OpPrereqError("Disk operations not supported for"
8707
                                 " diskless instances",
8708
                                 errors.ECODE_INVAL)
8709
    for disk_op, _ in self.op.disks:
8710
      if disk_op == constants.DDM_REMOVE:
8711
        if len(instance.disks) == 1:
8712
          raise errors.OpPrereqError("Cannot remove the last disk of"
8713
                                     " an instance", errors.ECODE_INVAL)
8714
        _CheckInstanceDown(self, instance, "cannot remove disks")
8715

    
8716
      if (disk_op == constants.DDM_ADD and
8717
          len(instance.nics) >= constants.MAX_DISKS):
8718
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8719
                                   " add more" % constants.MAX_DISKS,
8720
                                   errors.ECODE_STATE)
8721
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8722
        # an existing disk
8723
        if disk_op < 0 or disk_op >= len(instance.disks):
8724
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8725
                                     " are 0 to %d" %
8726
                                     (disk_op, len(instance.disks)),
8727
                                     errors.ECODE_INVAL)
8728

    
8729
    return
8730

    
8731
  def _ConvertPlainToDrbd(self, feedback_fn):
8732
    """Converts an instance from plain to drbd.
8733

8734
    """
8735
    feedback_fn("Converting template to drbd")
8736
    instance = self.instance
8737
    pnode = instance.primary_node
8738
    snode = self.op.remote_node
8739

    
8740
    # create a fake disk info for _GenerateDiskTemplate
8741
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8742
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8743
                                      instance.name, pnode, [snode],
8744
                                      disk_info, None, None, 0)
8745
    info = _GetInstanceInfoText(instance)
8746
    feedback_fn("Creating aditional volumes...")
8747
    # first, create the missing data and meta devices
8748
    for disk in new_disks:
8749
      # unfortunately this is... not too nice
8750
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8751
                            info, True)
8752
      for child in disk.children:
8753
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8754
    # at this stage, all new LVs have been created, we can rename the
8755
    # old ones
8756
    feedback_fn("Renaming original volumes...")
8757
    rename_list = [(o, n.children[0].logical_id)
8758
                   for (o, n) in zip(instance.disks, new_disks)]
8759
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8760
    result.Raise("Failed to rename original LVs")
8761

    
8762
    feedback_fn("Initializing DRBD devices...")
8763
    # all child devices are in place, we can now create the DRBD devices
8764
    for disk in new_disks:
8765
      for node in [pnode, snode]:
8766
        f_create = node == pnode
8767
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8768

    
8769
    # at this point, the instance has been modified
8770
    instance.disk_template = constants.DT_DRBD8
8771
    instance.disks = new_disks
8772
    self.cfg.Update(instance, feedback_fn)
8773

    
8774
    # disks are created, waiting for sync
8775
    disk_abort = not _WaitForSync(self, instance)
8776
    if disk_abort:
8777
      raise errors.OpExecError("There are some degraded disks for"
8778
                               " this instance, please cleanup manually")
8779

    
8780
  def _ConvertDrbdToPlain(self, feedback_fn):
8781
    """Converts an instance from drbd to plain.
8782

8783
    """
8784
    instance = self.instance
8785
    assert len(instance.secondary_nodes) == 1
8786
    pnode = instance.primary_node
8787
    snode = instance.secondary_nodes[0]
8788
    feedback_fn("Converting template to plain")
8789

    
8790
    old_disks = instance.disks
8791
    new_disks = [d.children[0] for d in old_disks]
8792

    
8793
    # copy over size and mode
8794
    for parent, child in zip(old_disks, new_disks):
8795
      child.size = parent.size
8796
      child.mode = parent.mode
8797

    
8798
    # update instance structure
8799
    instance.disks = new_disks
8800
    instance.disk_template = constants.DT_PLAIN
8801
    self.cfg.Update(instance, feedback_fn)
8802

    
8803
    feedback_fn("Removing volumes on the secondary node...")
8804
    for disk in old_disks:
8805
      self.cfg.SetDiskID(disk, snode)
8806
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8807
      if msg:
8808
        self.LogWarning("Could not remove block device %s on node %s,"
8809
                        " continuing anyway: %s", disk.iv_name, snode, msg)
8810

    
8811
    feedback_fn("Removing unneeded volumes on the primary node...")
8812
    for idx, disk in enumerate(old_disks):
8813
      meta = disk.children[1]
8814
      self.cfg.SetDiskID(meta, pnode)
8815
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8816
      if msg:
8817
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
8818
                        " continuing anyway: %s", idx, pnode, msg)
8819

    
8820

    
8821
  def Exec(self, feedback_fn):
8822
    """Modifies an instance.
8823

8824
    All parameters take effect only at the next restart of the instance.
8825

8826
    """
8827
    # Process here the warnings from CheckPrereq, as we don't have a
8828
    # feedback_fn there.
8829
    for warn in self.warn:
8830
      feedback_fn("WARNING: %s" % warn)
8831

    
8832
    result = []
8833
    instance = self.instance
8834
    # disk changes
8835
    for disk_op, disk_dict in self.op.disks:
8836
      if disk_op == constants.DDM_REMOVE:
8837
        # remove the last disk
8838
        device = instance.disks.pop()
8839
        device_idx = len(instance.disks)
8840
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8841
          self.cfg.SetDiskID(disk, node)
8842
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8843
          if msg:
8844
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8845
                            " continuing anyway", device_idx, node, msg)
8846
        result.append(("disk/%d" % device_idx, "remove"))
8847
      elif disk_op == constants.DDM_ADD:
8848
        # add a new disk
8849
        if instance.disk_template == constants.DT_FILE:
8850
          file_driver, file_path = instance.disks[0].logical_id
8851
          file_path = os.path.dirname(file_path)
8852
        else:
8853
          file_driver = file_path = None
8854
        disk_idx_base = len(instance.disks)
8855
        new_disk = _GenerateDiskTemplate(self,
8856
                                         instance.disk_template,
8857
                                         instance.name, instance.primary_node,
8858
                                         instance.secondary_nodes,
8859
                                         [disk_dict],
8860
                                         file_path,
8861
                                         file_driver,
8862
                                         disk_idx_base)[0]
8863
        instance.disks.append(new_disk)
8864
        info = _GetInstanceInfoText(instance)
8865

    
8866
        logging.info("Creating volume %s for instance %s",
8867
                     new_disk.iv_name, instance.name)
8868
        # Note: this needs to be kept in sync with _CreateDisks
8869
        #HARDCODE
8870
        for node in instance.all_nodes:
8871
          f_create = node == instance.primary_node
8872
          try:
8873
            _CreateBlockDev(self, node, instance, new_disk,
8874
                            f_create, info, f_create)
8875
          except errors.OpExecError, err:
8876
            self.LogWarning("Failed to create volume %s (%s) on"
8877
                            " node %s: %s",
8878
                            new_disk.iv_name, new_disk, node, err)
8879
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8880
                       (new_disk.size, new_disk.mode)))
8881
      else:
8882
        # change a given disk
8883
        instance.disks[disk_op].mode = disk_dict['mode']
8884
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8885

    
8886
    if self.op.disk_template:
8887
      r_shut = _ShutdownInstanceDisks(self, instance)
8888
      if not r_shut:
8889
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8890
                                 " proceed with disk template conversion")
8891
      mode = (instance.disk_template, self.op.disk_template)
8892
      try:
8893
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
8894
      except:
8895
        self.cfg.ReleaseDRBDMinors(instance.name)
8896
        raise
8897
      result.append(("disk_template", self.op.disk_template))
8898

    
8899
    # NIC changes
8900
    for nic_op, nic_dict in self.op.nics:
8901
      if nic_op == constants.DDM_REMOVE:
8902
        # remove the last nic
8903
        del instance.nics[-1]
8904
        result.append(("nic.%d" % len(instance.nics), "remove"))
8905
      elif nic_op == constants.DDM_ADD:
8906
        # mac and bridge should be set, by now
8907
        mac = nic_dict['mac']
8908
        ip = nic_dict.get('ip', None)
8909
        nicparams = self.nic_pinst[constants.DDM_ADD]
8910
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8911
        instance.nics.append(new_nic)
8912
        result.append(("nic.%d" % (len(instance.nics) - 1),
8913
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
8914
                       (new_nic.mac, new_nic.ip,
8915
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8916
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8917
                       )))
8918
      else:
8919
        for key in 'mac', 'ip':
8920
          if key in nic_dict:
8921
            setattr(instance.nics[nic_op], key, nic_dict[key])
8922
        if nic_op in self.nic_pinst:
8923
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8924
        for key, val in nic_dict.iteritems():
8925
          result.append(("nic.%s/%d" % (key, nic_op), val))
8926

    
8927
    # hvparams changes
8928
    if self.op.hvparams:
8929
      instance.hvparams = self.hv_inst
8930
      for key, val in self.op.hvparams.iteritems():
8931
        result.append(("hv/%s" % key, val))
8932

    
8933
    # beparams changes
8934
    if self.op.beparams:
8935
      instance.beparams = self.be_inst
8936
      for key, val in self.op.beparams.iteritems():
8937
        result.append(("be/%s" % key, val))
8938

    
8939
    # OS change
8940
    if self.op.os_name:
8941
      instance.os = self.op.os_name
8942

    
8943
    # osparams changes
8944
    if self.op.osparams:
8945
      instance.osparams = self.os_inst
8946
      for key, val in self.op.osparams.iteritems():
8947
        result.append(("os/%s" % key, val))
8948

    
8949
    self.cfg.Update(instance, feedback_fn)
8950

    
8951
    return result
8952

    
8953
  _DISK_CONVERSIONS = {
8954
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8955
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8956
    }
8957

    
8958

    
8959
class LUQueryExports(NoHooksLU):
8960
  """Query the exports list
8961

8962
  """
8963
  _OP_REQP = ['nodes']
8964
  REQ_BGL = False
8965

    
8966
  def ExpandNames(self):
8967
    self.needed_locks = {}
8968
    self.share_locks[locking.LEVEL_NODE] = 1
8969
    if not self.op.nodes:
8970
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8971
    else:
8972
      self.needed_locks[locking.LEVEL_NODE] = \
8973
        _GetWantedNodes(self, self.op.nodes)
8974

    
8975
  def Exec(self, feedback_fn):
8976
    """Compute the list of all the exported system images.
8977

8978
    @rtype: dict
8979
    @return: a dictionary with the structure node->(export-list)
8980
        where export-list is a list of the instances exported on
8981
        that node.
8982

8983
    """
8984
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8985
    rpcresult = self.rpc.call_export_list(self.nodes)
8986
    result = {}
8987
    for node in rpcresult:
8988
      if rpcresult[node].fail_msg:
8989
        result[node] = False
8990
      else:
8991
        result[node] = rpcresult[node].payload
8992

    
8993
    return result
8994

    
8995

    
8996
class LUPrepareExport(NoHooksLU):
8997
  """Prepares an instance for an export and returns useful information.
8998

8999
  """
9000
  _OP_REQP = ["instance_name", "mode"]
9001
  REQ_BGL = False
9002

    
9003
  def CheckArguments(self):
9004
    """Check the arguments.
9005

9006
    """
9007
    _CheckExportMode(self.op.mode)
9008

    
9009
  def ExpandNames(self):
9010
    self._ExpandAndLockInstance()
9011

    
9012
  def CheckPrereq(self):
9013
    """Check prerequisites.
9014

9015
    """
9016
    instance_name = self.op.instance_name
9017

    
9018
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9019
    assert self.instance is not None, \
9020
          "Cannot retrieve locked instance %s" % self.op.instance_name
9021
    _CheckNodeOnline(self, self.instance.primary_node)
9022

    
9023
    self._cds = _GetClusterDomainSecret()
9024

    
9025
  def Exec(self, feedback_fn):
9026
    """Prepares an instance for an export.
9027

9028
    """
9029
    instance = self.instance
9030

    
9031
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9032
      salt = utils.GenerateSecret(8)
9033

    
9034
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9035
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9036
                                              constants.RIE_CERT_VALIDITY)
9037
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9038

    
9039
      (name, cert_pem) = result.payload
9040

    
9041
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9042
                                             cert_pem)
9043

    
9044
      return {
9045
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9046
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9047
                          salt),
9048
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9049
        }
9050

    
9051
    return None
9052

    
9053

    
9054
class LUExportInstance(LogicalUnit):
9055
  """Export an instance to an image in the cluster.
9056

9057
  """
9058
  HPATH = "instance-export"
9059
  HTYPE = constants.HTYPE_INSTANCE
9060
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
9061
  _OP_DEFS = [
9062
    ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT),
9063
    ("remove_instance", False),
9064
    ("ignore_remove_failures", False),
9065
    ("mode", constants.EXPORT_MODE_LOCAL),
9066
    ("x509_key_name", None),
9067
    ("destination_x509_ca", None),
9068
    ]
9069
  REQ_BGL = False
9070

    
9071
  def CheckArguments(self):
9072
    """Check the arguments.
9073

9074
    """
9075
    self.x509_key_name = self.op.x509_key_name
9076
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9077

    
9078
    if self.op.remove_instance and not self.op.shutdown:
9079
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9080
                                 " down before")
9081

    
9082
    _CheckExportMode(self.op.mode)
9083

    
9084
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9085
      if not self.x509_key_name:
9086
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9087
                                   errors.ECODE_INVAL)
9088

    
9089
      if not self.dest_x509_ca_pem:
9090
        raise errors.OpPrereqError("Missing destination X509 CA",
9091
                                   errors.ECODE_INVAL)
9092

    
9093
  def ExpandNames(self):
9094
    self._ExpandAndLockInstance()
9095

    
9096
    # Lock all nodes for local exports
9097
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9098
      # FIXME: lock only instance primary and destination node
9099
      #
9100
      # Sad but true, for now we have do lock all nodes, as we don't know where
9101
      # the previous export might be, and in this LU we search for it and
9102
      # remove it from its current node. In the future we could fix this by:
9103
      #  - making a tasklet to search (share-lock all), then create the
9104
      #    new one, then one to remove, after
9105
      #  - removing the removal operation altogether
9106
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9107

    
9108
  def DeclareLocks(self, level):
9109
    """Last minute lock declaration."""
9110
    # All nodes are locked anyway, so nothing to do here.
9111

    
9112
  def BuildHooksEnv(self):
9113
    """Build hooks env.
9114

9115
    This will run on the master, primary node and target node.
9116

9117
    """
9118
    env = {
9119
      "EXPORT_MODE": self.op.mode,
9120
      "EXPORT_NODE": self.op.target_node,
9121
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9122
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9123
      # TODO: Generic function for boolean env variables
9124
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9125
      }
9126

    
9127
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9128

    
9129
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9130

    
9131
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9132
      nl.append(self.op.target_node)
9133

    
9134
    return env, nl, nl
9135

    
9136
  def CheckPrereq(self):
9137
    """Check prerequisites.
9138

9139
    This checks that the instance and node names are valid.
9140

9141
    """
9142
    instance_name = self.op.instance_name
9143

    
9144
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9145
    assert self.instance is not None, \
9146
          "Cannot retrieve locked instance %s" % self.op.instance_name
9147
    _CheckNodeOnline(self, self.instance.primary_node)
9148

    
9149
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9150
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9151
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9152
      assert self.dst_node is not None
9153

    
9154
      _CheckNodeOnline(self, self.dst_node.name)
9155
      _CheckNodeNotDrained(self, self.dst_node.name)
9156

    
9157
      self._cds = None
9158
      self.dest_disk_info = None
9159
      self.dest_x509_ca = None
9160

    
9161
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9162
      self.dst_node = None
9163

    
9164
      if len(self.op.target_node) != len(self.instance.disks):
9165
        raise errors.OpPrereqError(("Received destination information for %s"
9166
                                    " disks, but instance %s has %s disks") %
9167
                                   (len(self.op.target_node), instance_name,
9168
                                    len(self.instance.disks)),
9169
                                   errors.ECODE_INVAL)
9170

    
9171
      cds = _GetClusterDomainSecret()
9172

    
9173
      # Check X509 key name
9174
      try:
9175
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9176
      except (TypeError, ValueError), err:
9177
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9178

    
9179
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9180
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9181
                                   errors.ECODE_INVAL)
9182

    
9183
      # Load and verify CA
9184
      try:
9185
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9186
      except OpenSSL.crypto.Error, err:
9187
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9188
                                   (err, ), errors.ECODE_INVAL)
9189

    
9190
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9191
      if errcode is not None:
9192
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9193
                                   (msg, ), errors.ECODE_INVAL)
9194

    
9195
      self.dest_x509_ca = cert
9196

    
9197
      # Verify target information
9198
      disk_info = []
9199
      for idx, disk_data in enumerate(self.op.target_node):
9200
        try:
9201
          (host, port, magic) = \
9202
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9203
        except errors.GenericError, err:
9204
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9205
                                     (idx, err), errors.ECODE_INVAL)
9206

    
9207
        disk_info.append((host, port, magic))
9208

    
9209
      assert len(disk_info) == len(self.op.target_node)
9210
      self.dest_disk_info = disk_info
9211

    
9212
    else:
9213
      raise errors.ProgrammerError("Unhandled export mode %r" %
9214
                                   self.op.mode)
9215

    
9216
    # instance disk type verification
9217
    # TODO: Implement export support for file-based disks
9218
    for disk in self.instance.disks:
9219
      if disk.dev_type == constants.LD_FILE:
9220
        raise errors.OpPrereqError("Export not supported for instances with"
9221
                                   " file-based disks", errors.ECODE_INVAL)
9222

    
9223
  def _CleanupExports(self, feedback_fn):
9224
    """Removes exports of current instance from all other nodes.
9225

9226
    If an instance in a cluster with nodes A..D was exported to node C, its
9227
    exports will be removed from the nodes A, B and D.
9228

9229
    """
9230
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
9231

    
9232
    nodelist = self.cfg.GetNodeList()
9233
    nodelist.remove(self.dst_node.name)
9234

    
9235
    # on one-node clusters nodelist will be empty after the removal
9236
    # if we proceed the backup would be removed because OpQueryExports
9237
    # substitutes an empty list with the full cluster node list.
9238
    iname = self.instance.name
9239
    if nodelist:
9240
      feedback_fn("Removing old exports for instance %s" % iname)
9241
      exportlist = self.rpc.call_export_list(nodelist)
9242
      for node in exportlist:
9243
        if exportlist[node].fail_msg:
9244
          continue
9245
        if iname in exportlist[node].payload:
9246
          msg = self.rpc.call_export_remove(node, iname).fail_msg
9247
          if msg:
9248
            self.LogWarning("Could not remove older export for instance %s"
9249
                            " on node %s: %s", iname, node, msg)
9250

    
9251
  def Exec(self, feedback_fn):
9252
    """Export an instance to an image in the cluster.
9253

9254
    """
9255
    assert self.op.mode in constants.EXPORT_MODES
9256

    
9257
    instance = self.instance
9258
    src_node = instance.primary_node
9259

    
9260
    if self.op.shutdown:
9261
      # shutdown the instance, but not the disks
9262
      feedback_fn("Shutting down instance %s" % instance.name)
9263
      result = self.rpc.call_instance_shutdown(src_node, instance,
9264
                                               self.op.shutdown_timeout)
9265
      # TODO: Maybe ignore failures if ignore_remove_failures is set
9266
      result.Raise("Could not shutdown instance %s on"
9267
                   " node %s" % (instance.name, src_node))
9268

    
9269
    # set the disks ID correctly since call_instance_start needs the
9270
    # correct drbd minor to create the symlinks
9271
    for disk in instance.disks:
9272
      self.cfg.SetDiskID(disk, src_node)
9273

    
9274
    activate_disks = (not instance.admin_up)
9275

    
9276
    if activate_disks:
9277
      # Activate the instance disks if we'exporting a stopped instance
9278
      feedback_fn("Activating disks for %s" % instance.name)
9279
      _StartInstanceDisks(self, instance, None)
9280

    
9281
    try:
9282
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9283
                                                     instance)
9284

    
9285
      helper.CreateSnapshots()
9286
      try:
9287
        if (self.op.shutdown and instance.admin_up and
9288
            not self.op.remove_instance):
9289
          assert not activate_disks
9290
          feedback_fn("Starting instance %s" % instance.name)
9291
          result = self.rpc.call_instance_start(src_node, instance, None, None)
9292
          msg = result.fail_msg
9293
          if msg:
9294
            feedback_fn("Failed to start instance: %s" % msg)
9295
            _ShutdownInstanceDisks(self, instance)
9296
            raise errors.OpExecError("Could not start instance: %s" % msg)
9297

    
9298
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
9299
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9300
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9301
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
9302
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9303

    
9304
          (key_name, _, _) = self.x509_key_name
9305

    
9306
          dest_ca_pem = \
9307
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9308
                                            self.dest_x509_ca)
9309

    
9310
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9311
                                                     key_name, dest_ca_pem,
9312
                                                     timeouts)
9313
      finally:
9314
        helper.Cleanup()
9315

    
9316
      # Check for backwards compatibility
9317
      assert len(dresults) == len(instance.disks)
9318
      assert compat.all(isinstance(i, bool) for i in dresults), \
9319
             "Not all results are boolean: %r" % dresults
9320

    
9321
    finally:
9322
      if activate_disks:
9323
        feedback_fn("Deactivating disks for %s" % instance.name)
9324
        _ShutdownInstanceDisks(self, instance)
9325

    
9326
    # Remove instance if requested
9327
    if self.op.remove_instance:
9328
      if not (compat.all(dresults) and fin_resu):
9329
        feedback_fn("Not removing instance %s as parts of the export failed" %
9330
                    instance.name)
9331
      else:
9332
        feedback_fn("Removing instance %s" % instance.name)
9333
        _RemoveInstance(self, feedback_fn, instance,
9334
                        self.op.ignore_remove_failures)
9335

    
9336
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9337
      self._CleanupExports(feedback_fn)
9338

    
9339
    return fin_resu, dresults
9340

    
9341

    
9342
class LURemoveExport(NoHooksLU):
9343
  """Remove exports related to the named instance.
9344

9345
  """
9346
  _OP_REQP = ["instance_name"]
9347
  REQ_BGL = False
9348

    
9349
  def ExpandNames(self):
9350
    self.needed_locks = {}
9351
    # We need all nodes to be locked in order for RemoveExport to work, but we
9352
    # don't need to lock the instance itself, as nothing will happen to it (and
9353
    # we can remove exports also for a removed instance)
9354
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9355

    
9356
  def Exec(self, feedback_fn):
9357
    """Remove any export.
9358

9359
    """
9360
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9361
    # If the instance was not found we'll try with the name that was passed in.
9362
    # This will only work if it was an FQDN, though.
9363
    fqdn_warn = False
9364
    if not instance_name:
9365
      fqdn_warn = True
9366
      instance_name = self.op.instance_name
9367

    
9368
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9369
    exportlist = self.rpc.call_export_list(locked_nodes)
9370
    found = False
9371
    for node in exportlist:
9372
      msg = exportlist[node].fail_msg
9373
      if msg:
9374
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9375
        continue
9376
      if instance_name in exportlist[node].payload:
9377
        found = True
9378
        result = self.rpc.call_export_remove(node, instance_name)
9379
        msg = result.fail_msg
9380
        if msg:
9381
          logging.error("Could not remove export for instance %s"
9382
                        " on node %s: %s", instance_name, node, msg)
9383

    
9384
    if fqdn_warn and not found:
9385
      feedback_fn("Export not found. If trying to remove an export belonging"
9386
                  " to a deleted instance please use its Fully Qualified"
9387
                  " Domain Name.")
9388

    
9389

    
9390
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9391
  """Generic tags LU.
9392

9393
  This is an abstract class which is the parent of all the other tags LUs.
9394

9395
  """
9396

    
9397
  def ExpandNames(self):
9398
    self.needed_locks = {}
9399
    if self.op.kind == constants.TAG_NODE:
9400
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9401
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
9402
    elif self.op.kind == constants.TAG_INSTANCE:
9403
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9404
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9405

    
9406
  def CheckPrereq(self):
9407
    """Check prerequisites.
9408

9409
    """
9410
    if self.op.kind == constants.TAG_CLUSTER:
9411
      self.target = self.cfg.GetClusterInfo()
9412
    elif self.op.kind == constants.TAG_NODE:
9413
      self.target = self.cfg.GetNodeInfo(self.op.name)
9414
    elif self.op.kind == constants.TAG_INSTANCE:
9415
      self.target = self.cfg.GetInstanceInfo(self.op.name)
9416
    else:
9417
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9418
                                 str(self.op.kind), errors.ECODE_INVAL)
9419

    
9420

    
9421
class LUGetTags(TagsLU):
9422
  """Returns the tags of a given object.
9423

9424
  """
9425
  _OP_REQP = ["kind", "name"]
9426
  REQ_BGL = False
9427

    
9428
  def Exec(self, feedback_fn):
9429
    """Returns the tag list.
9430

9431
    """
9432
    return list(self.target.GetTags())
9433

    
9434

    
9435
class LUSearchTags(NoHooksLU):
9436
  """Searches the tags for a given pattern.
9437

9438
  """
9439
  _OP_REQP = ["pattern"]
9440
  REQ_BGL = False
9441

    
9442
  def ExpandNames(self):
9443
    self.needed_locks = {}
9444

    
9445
  def CheckPrereq(self):
9446
    """Check prerequisites.
9447

9448
    This checks the pattern passed for validity by compiling it.
9449

9450
    """
9451
    try:
9452
      self.re = re.compile(self.op.pattern)
9453
    except re.error, err:
9454
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9455
                                 (self.op.pattern, err), errors.ECODE_INVAL)
9456

    
9457
  def Exec(self, feedback_fn):
9458
    """Returns the tag list.
9459

9460
    """
9461
    cfg = self.cfg
9462
    tgts = [("/cluster", cfg.GetClusterInfo())]
9463
    ilist = cfg.GetAllInstancesInfo().values()
9464
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9465
    nlist = cfg.GetAllNodesInfo().values()
9466
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9467
    results = []
9468
    for path, target in tgts:
9469
      for tag in target.GetTags():
9470
        if self.re.search(tag):
9471
          results.append((path, tag))
9472
    return results
9473

    
9474

    
9475
class LUAddTags(TagsLU):
9476
  """Sets a tag on a given object.
9477

9478
  """
9479
  _OP_REQP = ["kind", "name", "tags"]
9480
  REQ_BGL = False
9481

    
9482
  def CheckPrereq(self):
9483
    """Check prerequisites.
9484

9485
    This checks the type and length of the tag name and value.
9486

9487
    """
9488
    TagsLU.CheckPrereq(self)
9489
    for tag in self.op.tags:
9490
      objects.TaggableObject.ValidateTag(tag)
9491

    
9492
  def Exec(self, feedback_fn):
9493
    """Sets the tag.
9494

9495
    """
9496
    try:
9497
      for tag in self.op.tags:
9498
        self.target.AddTag(tag)
9499
    except errors.TagError, err:
9500
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
9501
    self.cfg.Update(self.target, feedback_fn)
9502

    
9503

    
9504
class LUDelTags(TagsLU):
9505
  """Delete a list of tags from a given object.
9506

9507
  """
9508
  _OP_REQP = ["kind", "name", "tags"]
9509
  REQ_BGL = False
9510

    
9511
  def CheckPrereq(self):
9512
    """Check prerequisites.
9513

9514
    This checks that we have the given tag.
9515

9516
    """
9517
    TagsLU.CheckPrereq(self)
9518
    for tag in self.op.tags:
9519
      objects.TaggableObject.ValidateTag(tag)
9520
    del_tags = frozenset(self.op.tags)
9521
    cur_tags = self.target.GetTags()
9522
    if not del_tags <= cur_tags:
9523
      diff_tags = del_tags - cur_tags
9524
      diff_names = ["'%s'" % tag for tag in diff_tags]
9525
      diff_names.sort()
9526
      raise errors.OpPrereqError("Tag(s) %s not found" %
9527
                                 (",".join(diff_names)), errors.ECODE_NOENT)
9528

    
9529
  def Exec(self, feedback_fn):
9530
    """Remove the tag from the object.
9531

9532
    """
9533
    for tag in self.op.tags:
9534
      self.target.RemoveTag(tag)
9535
    self.cfg.Update(self.target, feedback_fn)
9536

    
9537

    
9538
class LUTestDelay(NoHooksLU):
9539
  """Sleep for a specified amount of time.
9540

9541
  This LU sleeps on the master and/or nodes for a specified amount of
9542
  time.
9543

9544
  """
9545
  _OP_REQP = ["duration", "on_master", "on_nodes"]
9546
  REQ_BGL = False
9547

    
9548
  def CheckArguments(self):
9549
    # TODO: convert to the type system
9550
    self.op.repeat = getattr(self.op, "repeat", 0)
9551
    if self.op.repeat < 0:
9552
      raise errors.OpPrereqError("Repetition count cannot be negative")
9553

    
9554
  def ExpandNames(self):
9555
    """Expand names and set required locks.
9556

9557
    This expands the node list, if any.
9558

9559
    """
9560
    self.needed_locks = {}
9561
    if self.op.on_nodes:
9562
      # _GetWantedNodes can be used here, but is not always appropriate to use
9563
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9564
      # more information.
9565
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9566
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9567

    
9568
  def _TestDelay(self):
9569
    """Do the actual sleep.
9570

9571
    """
9572
    if self.op.on_master:
9573
      if not utils.TestDelay(self.op.duration):
9574
        raise errors.OpExecError("Error during master delay test")
9575
    if self.op.on_nodes:
9576
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9577
      for node, node_result in result.items():
9578
        node_result.Raise("Failure during rpc call to node %s" % node)
9579

    
9580
  def Exec(self, feedback_fn):
9581
    """Execute the test delay opcode, with the wanted repetitions.
9582

9583
    """
9584
    if self.op.repeat == 0:
9585
      self._TestDelay()
9586
    else:
9587
      top_value = self.op.repeat - 1
9588
      for i in range(self.op.repeat):
9589
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9590
        self._TestDelay()
9591

    
9592

    
9593
class IAllocator(object):
9594
  """IAllocator framework.
9595

9596
  An IAllocator instance has three sets of attributes:
9597
    - cfg that is needed to query the cluster
9598
    - input data (all members of the _KEYS class attribute are required)
9599
    - four buffer attributes (in|out_data|text), that represent the
9600
      input (to the external script) in text and data structure format,
9601
      and the output from it, again in two formats
9602
    - the result variables from the script (success, info, nodes) for
9603
      easy usage
9604

9605
  """
9606
  # pylint: disable-msg=R0902
9607
  # lots of instance attributes
9608
  _ALLO_KEYS = [
9609
    "name", "mem_size", "disks", "disk_template",
9610
    "os", "tags", "nics", "vcpus", "hypervisor",
9611
    ]
9612
  _RELO_KEYS = [
9613
    "name", "relocate_from",
9614
    ]
9615
  _EVAC_KEYS = [
9616
    "evac_nodes",
9617
    ]
9618

    
9619
  def __init__(self, cfg, rpc, mode, **kwargs):
9620
    self.cfg = cfg
9621
    self.rpc = rpc
9622
    # init buffer variables
9623
    self.in_text = self.out_text = self.in_data = self.out_data = None
9624
    # init all input fields so that pylint is happy
9625
    self.mode = mode
9626
    self.mem_size = self.disks = self.disk_template = None
9627
    self.os = self.tags = self.nics = self.vcpus = None
9628
    self.hypervisor = None
9629
    self.relocate_from = None
9630
    self.name = None
9631
    self.evac_nodes = None
9632
    # computed fields
9633
    self.required_nodes = None
9634
    # init result fields
9635
    self.success = self.info = self.result = None
9636
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9637
      keyset = self._ALLO_KEYS
9638
      fn = self._AddNewInstance
9639
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9640
      keyset = self._RELO_KEYS
9641
      fn = self._AddRelocateInstance
9642
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9643
      keyset = self._EVAC_KEYS
9644
      fn = self._AddEvacuateNodes
9645
    else:
9646
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9647
                                   " IAllocator" % self.mode)
9648
    for key in kwargs:
9649
      if key not in keyset:
9650
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
9651
                                     " IAllocator" % key)
9652
      setattr(self, key, kwargs[key])
9653

    
9654
    for key in keyset:
9655
      if key not in kwargs:
9656
        raise errors.ProgrammerError("Missing input parameter '%s' to"
9657
                                     " IAllocator" % key)
9658
    self._BuildInputData(fn)
9659

    
9660
  def _ComputeClusterData(self):
9661
    """Compute the generic allocator input data.
9662

9663
    This is the data that is independent of the actual operation.
9664

9665
    """
9666
    cfg = self.cfg
9667
    cluster_info = cfg.GetClusterInfo()
9668
    # cluster data
9669
    data = {
9670
      "version": constants.IALLOCATOR_VERSION,
9671
      "cluster_name": cfg.GetClusterName(),
9672
      "cluster_tags": list(cluster_info.GetTags()),
9673
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9674
      # we don't have job IDs
9675
      }
9676
    iinfo = cfg.GetAllInstancesInfo().values()
9677
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9678

    
9679
    # node data
9680
    node_results = {}
9681
    node_list = cfg.GetNodeList()
9682

    
9683
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9684
      hypervisor_name = self.hypervisor
9685
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9686
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9687
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9688
      hypervisor_name = cluster_info.enabled_hypervisors[0]
9689

    
9690
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9691
                                        hypervisor_name)
9692
    node_iinfo = \
9693
      self.rpc.call_all_instances_info(node_list,
9694
                                       cluster_info.enabled_hypervisors)
9695
    for nname, nresult in node_data.items():
9696
      # first fill in static (config-based) values
9697
      ninfo = cfg.GetNodeInfo(nname)
9698
      pnr = {
9699
        "tags": list(ninfo.GetTags()),
9700
        "primary_ip": ninfo.primary_ip,
9701
        "secondary_ip": ninfo.secondary_ip,
9702
        "offline": ninfo.offline,
9703
        "drained": ninfo.drained,
9704
        "master_candidate": ninfo.master_candidate,
9705
        }
9706

    
9707
      if not (ninfo.offline or ninfo.drained):
9708
        nresult.Raise("Can't get data for node %s" % nname)
9709
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9710
                                nname)
9711
        remote_info = nresult.payload
9712

    
9713
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9714
                     'vg_size', 'vg_free', 'cpu_total']:
9715
          if attr not in remote_info:
9716
            raise errors.OpExecError("Node '%s' didn't return attribute"
9717
                                     " '%s'" % (nname, attr))
9718
          if not isinstance(remote_info[attr], int):
9719
            raise errors.OpExecError("Node '%s' returned invalid value"
9720
                                     " for '%s': %s" %
9721
                                     (nname, attr, remote_info[attr]))
9722
        # compute memory used by primary instances
9723
        i_p_mem = i_p_up_mem = 0
9724
        for iinfo, beinfo in i_list:
9725
          if iinfo.primary_node == nname:
9726
            i_p_mem += beinfo[constants.BE_MEMORY]
9727
            if iinfo.name not in node_iinfo[nname].payload:
9728
              i_used_mem = 0
9729
            else:
9730
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9731
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9732
            remote_info['memory_free'] -= max(0, i_mem_diff)
9733

    
9734
            if iinfo.admin_up:
9735
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9736

    
9737
        # compute memory used by instances
9738
        pnr_dyn = {
9739
          "total_memory": remote_info['memory_total'],
9740
          "reserved_memory": remote_info['memory_dom0'],
9741
          "free_memory": remote_info['memory_free'],
9742
          "total_disk": remote_info['vg_size'],
9743
          "free_disk": remote_info['vg_free'],
9744
          "total_cpus": remote_info['cpu_total'],
9745
          "i_pri_memory": i_p_mem,
9746
          "i_pri_up_memory": i_p_up_mem,
9747
          }
9748
        pnr.update(pnr_dyn)
9749

    
9750
      node_results[nname] = pnr
9751
    data["nodes"] = node_results
9752

    
9753
    # instance data
9754
    instance_data = {}
9755
    for iinfo, beinfo in i_list:
9756
      nic_data = []
9757
      for nic in iinfo.nics:
9758
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9759
        nic_dict = {"mac": nic.mac,
9760
                    "ip": nic.ip,
9761
                    "mode": filled_params[constants.NIC_MODE],
9762
                    "link": filled_params[constants.NIC_LINK],
9763
                   }
9764
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9765
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9766
        nic_data.append(nic_dict)
9767
      pir = {
9768
        "tags": list(iinfo.GetTags()),
9769
        "admin_up": iinfo.admin_up,
9770
        "vcpus": beinfo[constants.BE_VCPUS],
9771
        "memory": beinfo[constants.BE_MEMORY],
9772
        "os": iinfo.os,
9773
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9774
        "nics": nic_data,
9775
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9776
        "disk_template": iinfo.disk_template,
9777
        "hypervisor": iinfo.hypervisor,
9778
        }
9779
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9780
                                                 pir["disks"])
9781
      instance_data[iinfo.name] = pir
9782

    
9783
    data["instances"] = instance_data
9784

    
9785
    self.in_data = data
9786

    
9787
  def _AddNewInstance(self):
9788
    """Add new instance data to allocator structure.
9789

9790
    This in combination with _AllocatorGetClusterData will create the
9791
    correct structure needed as input for the allocator.
9792

9793
    The checks for the completeness of the opcode must have already been
9794
    done.
9795

9796
    """
9797
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9798

    
9799
    if self.disk_template in constants.DTS_NET_MIRROR:
9800
      self.required_nodes = 2
9801
    else:
9802
      self.required_nodes = 1
9803
    request = {
9804
      "name": self.name,
9805
      "disk_template": self.disk_template,
9806
      "tags": self.tags,
9807
      "os": self.os,
9808
      "vcpus": self.vcpus,
9809
      "memory": self.mem_size,
9810
      "disks": self.disks,
9811
      "disk_space_total": disk_space,
9812
      "nics": self.nics,
9813
      "required_nodes": self.required_nodes,
9814
      }
9815
    return request
9816

    
9817
  def _AddRelocateInstance(self):
9818
    """Add relocate instance data to allocator structure.
9819

9820
    This in combination with _IAllocatorGetClusterData will create the
9821
    correct structure needed as input for the allocator.
9822

9823
    The checks for the completeness of the opcode must have already been
9824
    done.
9825

9826
    """
9827
    instance = self.cfg.GetInstanceInfo(self.name)
9828
    if instance is None:
9829
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
9830
                                   " IAllocator" % self.name)
9831

    
9832
    if instance.disk_template not in constants.DTS_NET_MIRROR:
9833
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9834
                                 errors.ECODE_INVAL)
9835

    
9836
    if len(instance.secondary_nodes) != 1:
9837
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
9838
                                 errors.ECODE_STATE)
9839

    
9840
    self.required_nodes = 1
9841
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
9842
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9843

    
9844
    request = {
9845
      "name": self.name,
9846
      "disk_space_total": disk_space,
9847
      "required_nodes": self.required_nodes,
9848
      "relocate_from": self.relocate_from,
9849
      }
9850
    return request
9851

    
9852
  def _AddEvacuateNodes(self):
9853
    """Add evacuate nodes data to allocator structure.
9854

9855
    """
9856
    request = {
9857
      "evac_nodes": self.evac_nodes
9858
      }
9859
    return request
9860

    
9861
  def _BuildInputData(self, fn):
9862
    """Build input data structures.
9863

9864
    """
9865
    self._ComputeClusterData()
9866

    
9867
    request = fn()
9868
    request["type"] = self.mode
9869
    self.in_data["request"] = request
9870

    
9871
    self.in_text = serializer.Dump(self.in_data)
9872

    
9873
  def Run(self, name, validate=True, call_fn=None):
9874
    """Run an instance allocator and return the results.
9875

9876
    """
9877
    if call_fn is None:
9878
      call_fn = self.rpc.call_iallocator_runner
9879

    
9880
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9881
    result.Raise("Failure while running the iallocator script")
9882

    
9883
    self.out_text = result.payload
9884
    if validate:
9885
      self._ValidateResult()
9886

    
9887
  def _ValidateResult(self):
9888
    """Process the allocator results.
9889

9890
    This will process and if successful save the result in
9891
    self.out_data and the other parameters.
9892

9893
    """
9894
    try:
9895
      rdict = serializer.Load(self.out_text)
9896
    except Exception, err:
9897
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9898

    
9899
    if not isinstance(rdict, dict):
9900
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
9901

    
9902
    # TODO: remove backwards compatiblity in later versions
9903
    if "nodes" in rdict and "result" not in rdict:
9904
      rdict["result"] = rdict["nodes"]
9905
      del rdict["nodes"]
9906

    
9907
    for key in "success", "info", "result":
9908
      if key not in rdict:
9909
        raise errors.OpExecError("Can't parse iallocator results:"
9910
                                 " missing key '%s'" % key)
9911
      setattr(self, key, rdict[key])
9912

    
9913
    if not isinstance(rdict["result"], list):
9914
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9915
                               " is not a list")
9916
    self.out_data = rdict
9917

    
9918

    
9919
class LUTestAllocator(NoHooksLU):
9920
  """Run allocator tests.
9921

9922
  This LU runs the allocator tests
9923

9924
  """
9925
  _OP_REQP = ["direction", "mode", "name"]
9926
  _OP_DEFS = [
9927
    ("hypervisor", None),
9928
    ("allocator", None),
9929
    ]
9930

    
9931
  def CheckPrereq(self):
9932
    """Check prerequisites.
9933

9934
    This checks the opcode parameters depending on the director and mode test.
9935

9936
    """
9937
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9938
      for attr in ["name", "mem_size", "disks", "disk_template",
9939
                   "os", "tags", "nics", "vcpus"]:
9940
        if not hasattr(self.op, attr):
9941
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9942
                                     attr, errors.ECODE_INVAL)
9943
      iname = self.cfg.ExpandInstanceName(self.op.name)
9944
      if iname is not None:
9945
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9946
                                   iname, errors.ECODE_EXISTS)
9947
      if not isinstance(self.op.nics, list):
9948
        raise errors.OpPrereqError("Invalid parameter 'nics'",
9949
                                   errors.ECODE_INVAL)
9950
      for row in self.op.nics:
9951
        if (not isinstance(row, dict) or
9952
            "mac" not in row or
9953
            "ip" not in row or
9954
            "bridge" not in row):
9955
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
9956
                                     " parameter", errors.ECODE_INVAL)
9957
      if not isinstance(self.op.disks, list):
9958
        raise errors.OpPrereqError("Invalid parameter 'disks'",
9959
                                   errors.ECODE_INVAL)
9960
      for row in self.op.disks:
9961
        if (not isinstance(row, dict) or
9962
            "size" not in row or
9963
            not isinstance(row["size"], int) or
9964
            "mode" not in row or
9965
            row["mode"] not in ['r', 'w']):
9966
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
9967
                                     " parameter", errors.ECODE_INVAL)
9968
      if self.op.hypervisor is None:
9969
        self.op.hypervisor = self.cfg.GetHypervisorType()
9970
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9971
      if not hasattr(self.op, "name"):
9972
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9973
                                   errors.ECODE_INVAL)
9974
      fname = _ExpandInstanceName(self.cfg, self.op.name)
9975
      self.op.name = fname
9976
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9977
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9978
      if not hasattr(self.op, "evac_nodes"):
9979
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9980
                                   " opcode input", errors.ECODE_INVAL)
9981
    else:
9982
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9983
                                 self.op.mode, errors.ECODE_INVAL)
9984

    
9985
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9986
      if self.op.allocator is None:
9987
        raise errors.OpPrereqError("Missing allocator name",
9988
                                   errors.ECODE_INVAL)
9989
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9990
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
9991
                                 self.op.direction, errors.ECODE_INVAL)
9992

    
9993
  def Exec(self, feedback_fn):
9994
    """Run the allocator test.
9995

9996
    """
9997
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9998
      ial = IAllocator(self.cfg, self.rpc,
9999
                       mode=self.op.mode,
10000
                       name=self.op.name,
10001
                       mem_size=self.op.mem_size,
10002
                       disks=self.op.disks,
10003
                       disk_template=self.op.disk_template,
10004
                       os=self.op.os,
10005
                       tags=self.op.tags,
10006
                       nics=self.op.nics,
10007
                       vcpus=self.op.vcpus,
10008
                       hypervisor=self.op.hypervisor,
10009
                       )
10010
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10011
      ial = IAllocator(self.cfg, self.rpc,
10012
                       mode=self.op.mode,
10013
                       name=self.op.name,
10014
                       relocate_from=list(self.relocate_from),
10015
                       )
10016
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10017
      ial = IAllocator(self.cfg, self.rpc,
10018
                       mode=self.op.mode,
10019
                       evac_nodes=self.op.evac_nodes)
10020
    else:
10021
      raise errors.ProgrammerError("Uncatched mode %s in"
10022
                                   " LUTestAllocator.Exec", self.op.mode)
10023

    
10024
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
10025
      result = ial.in_text
10026
    else:
10027
      ial.Run(self.op.allocator, validate=False)
10028
      result = ial.out_text
10029
    return result