Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ c2fc8250

History | View | Annotate | Download (252.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import tempfile
30
import re
31
import platform
32
import logging
33
import copy
34
import random
35

    
36
from ganeti import ssh
37
from ganeti import utils
38
from ganeti import errors
39
from ganeti import hypervisor
40
from ganeti import locking
41
from ganeti import constants
42
from ganeti import objects
43
from ganeti import opcodes
44
from ganeti import serializer
45
from ganeti import ssconf
46

    
47

    
48
class LogicalUnit(object):
49
  """Logical Unit base class.
50

51
  Subclasses must follow these rules:
52
    - implement ExpandNames
53
    - implement CheckPrereq
54
    - implement Exec
55
    - implement BuildHooksEnv
56
    - redefine HPATH and HTYPE
57
    - optionally redefine their run requirements:
58
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
59

60
  Note that all commands require root permissions.
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overriden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict(((i, 0) for i in locking.LEVELS))
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92

    
93
    for attr_name in self._OP_REQP:
94
      attr_val = getattr(op, attr_name, None)
95
      if attr_val is None:
96
        raise errors.OpPrereqError("Required parameter '%s' missing" %
97
                                   attr_name)
98
    self.CheckArguments()
99

    
100
  def __GetSSH(self):
101
    """Returns the SshRunner object
102

103
    """
104
    if not self.__ssh:
105
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
106
    return self.__ssh
107

    
108
  ssh = property(fget=__GetSSH)
109

    
110
  def CheckArguments(self):
111
    """Check syntactic validity for the opcode arguments.
112

113
    This method is for doing a simple syntactic check and ensure
114
    validity of opcode parameters, without any cluster-related
115
    checks. While the same can be accomplished in ExpandNames and/or
116
    CheckPrereq, doing these separate is better because:
117

118
      - ExpandNames is left as as purely a lock-related function
119
      - CheckPrereq is run after we have aquired locks (and possible
120
        waited for them)
121

122
    The function is allowed to change the self.op attribute so that
123
    later methods can no longer worry about missing parameters.
124

125
    """
126
    pass
127

    
128
  def ExpandNames(self):
129
    """Expand names for this LU.
130

131
    This method is called before starting to execute the opcode, and it should
132
    update all the parameters of the opcode to their canonical form (e.g. a
133
    short node name must be fully expanded after this method has successfully
134
    completed). This way locking, hooks, logging, ecc. can work correctly.
135

136
    LUs which implement this method must also populate the self.needed_locks
137
    member, as a dict with lock levels as keys, and a list of needed lock names
138
    as values. Rules:
139

140
      - use an empty dict if you don't need any lock
141
      - if you don't need any lock at a particular level omit that level
142
      - don't put anything for the BGL level
143
      - if you want all locks at a level use locking.ALL_SET as a value
144

145
    If you need to share locks (rather than acquire them exclusively) at one
146
    level you can modify self.share_locks, setting a true value (usually 1) for
147
    that level. By default locks are not shared.
148

149
    Examples::
150

151
      # Acquire all nodes and one instance
152
      self.needed_locks = {
153
        locking.LEVEL_NODE: locking.ALL_SET,
154
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
155
      }
156
      # Acquire just two nodes
157
      self.needed_locks = {
158
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
159
      }
160
      # Acquire no locks
161
      self.needed_locks = {} # No, you can't leave it to the default value None
162

163
    """
164
    # The implementation of this method is mandatory only if the new LU is
165
    # concurrent, so that old LUs don't need to be changed all at the same
166
    # time.
167
    if self.REQ_BGL:
168
      self.needed_locks = {} # Exclusive LUs don't need locks.
169
    else:
170
      raise NotImplementedError
171

    
172
  def DeclareLocks(self, level):
173
    """Declare LU locking needs for a level
174

175
    While most LUs can just declare their locking needs at ExpandNames time,
176
    sometimes there's the need to calculate some locks after having acquired
177
    the ones before. This function is called just before acquiring locks at a
178
    particular level, but after acquiring the ones at lower levels, and permits
179
    such calculations. It can be used to modify self.needed_locks, and by
180
    default it does nothing.
181

182
    This function is only called if you have something already set in
183
    self.needed_locks for the level.
184

185
    @param level: Locking level which is going to be locked
186
    @type level: member of ganeti.locking.LEVELS
187

188
    """
189

    
190
  def CheckPrereq(self):
191
    """Check prerequisites for this LU.
192

193
    This method should check that the prerequisites for the execution
194
    of this LU are fulfilled. It can do internode communication, but
195
    it should be idempotent - no cluster or system changes are
196
    allowed.
197

198
    The method should raise errors.OpPrereqError in case something is
199
    not fulfilled. Its return value is ignored.
200

201
    This method should also update all the parameters of the opcode to
202
    their canonical form if it hasn't been done by ExpandNames before.
203

204
    """
205
    raise NotImplementedError
206

    
207
  def Exec(self, feedback_fn):
208
    """Execute the LU.
209

210
    This method should implement the actual work. It should raise
211
    errors.OpExecError for failures that are somewhat dealt with in
212
    code, or expected.
213

214
    """
215
    raise NotImplementedError
216

    
217
  def BuildHooksEnv(self):
218
    """Build hooks environment for this LU.
219

220
    This method should return a three-node tuple consisting of: a dict
221
    containing the environment that will be used for running the
222
    specific hook for this LU, a list of node names on which the hook
223
    should run before the execution, and a list of node names on which
224
    the hook should run after the execution.
225

226
    The keys of the dict must not have 'GANETI_' prefixed as this will
227
    be handled in the hooks runner. Also note additional keys will be
228
    added by the hooks runner. If the LU doesn't define any
229
    environment, an empty dict (and not None) should be returned.
230

231
    No nodes should be returned as an empty list (and not None).
232

233
    Note that if the HPATH for a LU class is None, this function will
234
    not be called.
235

236
    """
237
    raise NotImplementedError
238

    
239
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
240
    """Notify the LU about the results of its hooks.
241

242
    This method is called every time a hooks phase is executed, and notifies
243
    the Logical Unit about the hooks' result. The LU can then use it to alter
244
    its result based on the hooks.  By default the method does nothing and the
245
    previous result is passed back unchanged but any LU can define it if it
246
    wants to use the local cluster hook-scripts somehow.
247

248
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
249
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
250
    @param hook_results: the results of the multi-node hooks rpc call
251
    @param feedback_fn: function used send feedback back to the caller
252
    @param lu_result: the previous Exec result this LU had, or None
253
        in the PRE phase
254
    @return: the new Exec result, based on the previous result
255
        and hook results
256

257
    """
258
    return lu_result
259

    
260
  def _ExpandAndLockInstance(self):
261
    """Helper function to expand and lock an instance.
262

263
    Many LUs that work on an instance take its name in self.op.instance_name
264
    and need to expand it and then declare the expanded name for locking. This
265
    function does it, and then updates self.op.instance_name to the expanded
266
    name. It also initializes needed_locks as a dict, if this hasn't been done
267
    before.
268

269
    """
270
    if self.needed_locks is None:
271
      self.needed_locks = {}
272
    else:
273
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
274
        "_ExpandAndLockInstance called with instance-level locks set"
275
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
276
    if expanded_name is None:
277
      raise errors.OpPrereqError("Instance '%s' not known" %
278
                                  self.op.instance_name)
279
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
280
    self.op.instance_name = expanded_name
281

    
282
  def _LockInstancesNodes(self, primary_only=False):
283
    """Helper function to declare instances' nodes for locking.
284

285
    This function should be called after locking one or more instances to lock
286
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
287
    with all primary or secondary nodes for instances already locked and
288
    present in self.needed_locks[locking.LEVEL_INSTANCE].
289

290
    It should be called from DeclareLocks, and for safety only works if
291
    self.recalculate_locks[locking.LEVEL_NODE] is set.
292

293
    In the future it may grow parameters to just lock some instance's nodes, or
294
    to just lock primaries or secondary nodes, if needed.
295

296
    If should be called in DeclareLocks in a way similar to::
297

298
      if level == locking.LEVEL_NODE:
299
        self._LockInstancesNodes()
300

301
    @type primary_only: boolean
302
    @param primary_only: only lock primary nodes of locked instances
303

304
    """
305
    assert locking.LEVEL_NODE in self.recalculate_locks, \
306
      "_LockInstancesNodes helper function called with no nodes to recalculate"
307

    
308
    # TODO: check if we're really been called with the instance locks held
309

    
310
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
311
    # future we might want to have different behaviors depending on the value
312
    # of self.recalculate_locks[locking.LEVEL_NODE]
313
    wanted_nodes = []
314
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
315
      instance = self.context.cfg.GetInstanceInfo(instance_name)
316
      wanted_nodes.append(instance.primary_node)
317
      if not primary_only:
318
        wanted_nodes.extend(instance.secondary_nodes)
319

    
320
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
321
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
322
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
323
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
324

    
325
    del self.recalculate_locks[locking.LEVEL_NODE]
326

    
327

    
328
class NoHooksLU(LogicalUnit):
329
  """Simple LU which runs no hooks.
330

331
  This LU is intended as a parent for other LogicalUnits which will
332
  run no hooks, in order to reduce duplicate code.
333

334
  """
335
  HPATH = None
336
  HTYPE = None
337

    
338

    
339
def _GetWantedNodes(lu, nodes):
340
  """Returns list of checked and expanded node names.
341

342
  @type lu: L{LogicalUnit}
343
  @param lu: the logical unit on whose behalf we execute
344
  @type nodes: list
345
  @param nodes: list of node names or None for all nodes
346
  @rtype: list
347
  @return: the list of nodes, sorted
348
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
349

350
  """
351
  if not isinstance(nodes, list):
352
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
353

    
354
  if not nodes:
355
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
356
      " non-empty list of nodes whose name is to be expanded.")
357

    
358
  wanted = []
359
  for name in nodes:
360
    node = lu.cfg.ExpandNodeName(name)
361
    if node is None:
362
      raise errors.OpPrereqError("No such node name '%s'" % name)
363
    wanted.append(node)
364

    
365
  return utils.NiceSort(wanted)
366

    
367

    
368
def _GetWantedInstances(lu, instances):
369
  """Returns list of checked and expanded instance names.
370

371
  @type lu: L{LogicalUnit}
372
  @param lu: the logical unit on whose behalf we execute
373
  @type instances: list
374
  @param instances: list of instance names or None for all instances
375
  @rtype: list
376
  @return: the list of instances, sorted
377
  @raise errors.OpPrereqError: if the instances parameter is wrong type
378
  @raise errors.OpPrereqError: if any of the passed instances is not found
379

380
  """
381
  if not isinstance(instances, list):
382
    raise errors.OpPrereqError("Invalid argument type 'instances'")
383

    
384
  if instances:
385
    wanted = []
386

    
387
    for name in instances:
388
      instance = lu.cfg.ExpandInstanceName(name)
389
      if instance is None:
390
        raise errors.OpPrereqError("No such instance name '%s'" % name)
391
      wanted.append(instance)
392

    
393
  else:
394
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
395
  return wanted
396

    
397

    
398
def _CheckOutputFields(static, dynamic, selected):
399
  """Checks whether all selected fields are valid.
400

401
  @type static: L{utils.FieldSet}
402
  @param static: static fields set
403
  @type dynamic: L{utils.FieldSet}
404
  @param dynamic: dynamic fields set
405

406
  """
407
  f = utils.FieldSet()
408
  f.Extend(static)
409
  f.Extend(dynamic)
410

    
411
  delta = f.NonMatching(selected)
412
  if delta:
413
    raise errors.OpPrereqError("Unknown output fields selected: %s"
414
                               % ",".join(delta))
415

    
416

    
417
def _CheckBooleanOpField(op, name):
418
  """Validates boolean opcode parameters.
419

420
  This will ensure that an opcode parameter is either a boolean value,
421
  or None (but that it always exists).
422

423
  """
424
  val = getattr(op, name, None)
425
  if not (val is None or isinstance(val, bool)):
426
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
427
                               (name, str(val)))
428
  setattr(op, name, val)
429

    
430

    
431
def _CheckNodeOnline(lu, node):
432
  """Ensure that a given node is online.
433

434
  @param lu: the LU on behalf of which we make the check
435
  @param node: the node to check
436
  @raise errors.OpPrereqError: if the node is offline
437

438
  """
439
  if lu.cfg.GetNodeInfo(node).offline:
440
    raise errors.OpPrereqError("Can't use offline node %s" % node)
441

    
442

    
443
def _CheckNodeNotDrained(lu, node):
444
  """Ensure that a given node is not drained.
445

446
  @param lu: the LU on behalf of which we make the check
447
  @param node: the node to check
448
  @raise errors.OpPrereqError: if the node is drained
449

450
  """
451
  if lu.cfg.GetNodeInfo(node).drained:
452
    raise errors.OpPrereqError("Can't use drained node %s" % node)
453

    
454

    
455
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
456
                          memory, vcpus, nics, disk_template, disks):
457
  """Builds instance related env variables for hooks
458

459
  This builds the hook environment from individual variables.
460

461
  @type name: string
462
  @param name: the name of the instance
463
  @type primary_node: string
464
  @param primary_node: the name of the instance's primary node
465
  @type secondary_nodes: list
466
  @param secondary_nodes: list of secondary nodes as strings
467
  @type os_type: string
468
  @param os_type: the name of the instance's OS
469
  @type status: boolean
470
  @param status: the should_run status of the instance
471
  @type memory: string
472
  @param memory: the memory size of the instance
473
  @type vcpus: string
474
  @param vcpus: the count of VCPUs the instance has
475
  @type nics: list
476
  @param nics: list of tuples (ip, bridge, mac) representing
477
      the NICs the instance  has
478
  @type disk_template: string
479
  @param disk_template: the distk template of the instance
480
  @type disks: list
481
  @param disks: the list of (size, mode) pairs
482
  @rtype: dict
483
  @return: the hook environment for this instance
484

485
  """
486
  if status:
487
    str_status = "up"
488
  else:
489
    str_status = "down"
490
  env = {
491
    "OP_TARGET": name,
492
    "INSTANCE_NAME": name,
493
    "INSTANCE_PRIMARY": primary_node,
494
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
495
    "INSTANCE_OS_TYPE": os_type,
496
    "INSTANCE_STATUS": str_status,
497
    "INSTANCE_MEMORY": memory,
498
    "INSTANCE_VCPUS": vcpus,
499
    "INSTANCE_DISK_TEMPLATE": disk_template,
500
  }
501

    
502
  if nics:
503
    nic_count = len(nics)
504
    for idx, (ip, mac, mode, link) in enumerate(nics):
505
      if ip is None:
506
        ip = ""
507
      env["INSTANCE_NIC%d_IP" % idx] = ip
508
      env["INSTANCE_NIC%d_MAC" % idx] = mac
509
      env["INSTANCE_NIC%d_MODE" % idx] = mode
510
      env["INSTANCE_NIC%d_LINK" % idx] = link
511
      if mode == constants.NIC_MODE_BRIDGED:
512
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
513
  else:
514
    nic_count = 0
515

    
516
  env["INSTANCE_NIC_COUNT"] = nic_count
517

    
518
  if disks:
519
    disk_count = len(disks)
520
    for idx, (size, mode) in enumerate(disks):
521
      env["INSTANCE_DISK%d_SIZE" % idx] = size
522
      env["INSTANCE_DISK%d_MODE" % idx] = mode
523
  else:
524
    disk_count = 0
525

    
526
  env["INSTANCE_DISK_COUNT"] = disk_count
527

    
528
  return env
529

    
530
def _PreBuildNICHooksList(lu, nics):
531
  """Build a list of nic information tuples.
532

533
  This list is suitable to be passed to _BuildInstanceHookEnv.
534

535
  @type lu:  L{LogicalUnit}
536
  @param lu: the logical unit on whose behalf we execute
537
  @type nics: list of L{objects.NIC}
538
  @param nics: list of nics to convert to hooks tuples
539

540
  """
541
  hooks_nics = []
542
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
543
  for nic in nics:
544
    ip = nic.ip
545
    mac = nic.mac
546
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
547
    mode = filled_params[constants.NIC_MODE]
548
    link = filled_params[constants.NIC_LINK]
549
    hooks_nics.append((ip, mac, mode, link))
550
  return hooks_nics
551

    
552
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
553
  """Builds instance related env variables for hooks from an object.
554

555
  @type lu: L{LogicalUnit}
556
  @param lu: the logical unit on whose behalf we execute
557
  @type instance: L{objects.Instance}
558
  @param instance: the instance for which we should build the
559
      environment
560
  @type override: dict
561
  @param override: dictionary with key/values that will override
562
      our values
563
  @rtype: dict
564
  @return: the hook environment dictionary
565

566
  """
567
  bep = lu.cfg.GetClusterInfo().FillBE(instance)
568
  args = {
569
    'name': instance.name,
570
    'primary_node': instance.primary_node,
571
    'secondary_nodes': instance.secondary_nodes,
572
    'os_type': instance.os,
573
    'status': instance.admin_up,
574
    'memory': bep[constants.BE_MEMORY],
575
    'vcpus': bep[constants.BE_VCPUS],
576
    'nics': _PreBuildNICHooksList(lu, instance.nics),
577
    'disk_template': instance.disk_template,
578
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
579
  }
580
  if override:
581
    args.update(override)
582
  return _BuildInstanceHookEnv(**args)
583

    
584

    
585
def _AdjustCandidatePool(lu):
586
  """Adjust the candidate pool after node operations.
587

588
  """
589
  mod_list = lu.cfg.MaintainCandidatePool()
590
  if mod_list:
591
    lu.LogInfo("Promoted nodes to master candidate role: %s",
592
               ", ".join(node.name for node in mod_list))
593
    for name in mod_list:
594
      lu.context.ReaddNode(name)
595
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
596
  if mc_now > mc_max:
597
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
598
               (mc_now, mc_max))
599

    
600

    
601
def _CheckNicsBridgesExist(lu, target_nics, target_node,
602
                               profile=constants.PP_DEFAULT):
603
  """Check that the brigdes needed by a list of nics exist.
604

605
  """
606
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
607
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
608
                for nic in target_nics]
609
  brlist = [params[constants.NIC_LINK] for params in paramslist
610
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
611
  if brlist:
612
    result = lu.rpc.call_bridges_exist(target_node, brlist)
613
    msg = result.RemoteFailMsg()
614
    if msg:
615
      raise errors.OpPrereqError("Error checking bridges on destination node"
616
                                 " '%s': %s" % (target_node, msg))
617

    
618

    
619
def _CheckInstanceBridgesExist(lu, instance, node=None):
620
  """Check that the brigdes needed by an instance exist.
621

622
  """
623
  if node is None:
624
    node=instance.primary_node
625
  _CheckNicsBridgesExist(lu, instance.nics, node)
626

    
627

    
628
class LUDestroyCluster(NoHooksLU):
629
  """Logical unit for destroying the cluster.
630

631
  """
632
  _OP_REQP = []
633

    
634
  def CheckPrereq(self):
635
    """Check prerequisites.
636

637
    This checks whether the cluster is empty.
638

639
    Any errors are signalled by raising errors.OpPrereqError.
640

641
    """
642
    master = self.cfg.GetMasterNode()
643

    
644
    nodelist = self.cfg.GetNodeList()
645
    if len(nodelist) != 1 or nodelist[0] != master:
646
      raise errors.OpPrereqError("There are still %d node(s) in"
647
                                 " this cluster." % (len(nodelist) - 1))
648
    instancelist = self.cfg.GetInstanceList()
649
    if instancelist:
650
      raise errors.OpPrereqError("There are still %d instance(s) in"
651
                                 " this cluster." % len(instancelist))
652

    
653
  def Exec(self, feedback_fn):
654
    """Destroys the cluster.
655

656
    """
657
    master = self.cfg.GetMasterNode()
658
    result = self.rpc.call_node_stop_master(master, False)
659
    result.Raise()
660
    if not result.data:
661
      raise errors.OpExecError("Could not disable the master role")
662
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
663
    utils.CreateBackup(priv_key)
664
    utils.CreateBackup(pub_key)
665
    return master
666

    
667

    
668
class LUVerifyCluster(LogicalUnit):
669
  """Verifies the cluster status.
670

671
  """
672
  HPATH = "cluster-verify"
673
  HTYPE = constants.HTYPE_CLUSTER
674
  _OP_REQP = ["skip_checks"]
675
  REQ_BGL = False
676

    
677
  def ExpandNames(self):
678
    self.needed_locks = {
679
      locking.LEVEL_NODE: locking.ALL_SET,
680
      locking.LEVEL_INSTANCE: locking.ALL_SET,
681
    }
682
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
683

    
684
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
685
                  node_result, feedback_fn, master_files,
686
                  drbd_map, vg_name):
687
    """Run multiple tests against a node.
688

689
    Test list:
690

691
      - compares ganeti version
692
      - checks vg existance and size > 20G
693
      - checks config file checksum
694
      - checks ssh to other nodes
695

696
    @type nodeinfo: L{objects.Node}
697
    @param nodeinfo: the node to check
698
    @param file_list: required list of files
699
    @param local_cksum: dictionary of local files and their checksums
700
    @param node_result: the results from the node
701
    @param feedback_fn: function used to accumulate results
702
    @param master_files: list of files that only masters should have
703
    @param drbd_map: the useddrbd minors for this node, in
704
        form of minor: (instance, must_exist) which correspond to instances
705
        and their running status
706
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
707

708
    """
709
    node = nodeinfo.name
710

    
711
    # main result, node_result should be a non-empty dict
712
    if not node_result or not isinstance(node_result, dict):
713
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
714
      return True
715

    
716
    # compares ganeti version
717
    local_version = constants.PROTOCOL_VERSION
718
    remote_version = node_result.get('version', None)
719
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
720
            len(remote_version) == 2):
721
      feedback_fn("  - ERROR: connection to %s failed" % (node))
722
      return True
723

    
724
    if local_version != remote_version[0]:
725
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
726
                  " node %s %s" % (local_version, node, remote_version[0]))
727
      return True
728

    
729
    # node seems compatible, we can actually try to look into its results
730

    
731
    bad = False
732

    
733
    # full package version
734
    if constants.RELEASE_VERSION != remote_version[1]:
735
      feedback_fn("  - WARNING: software version mismatch: master %s,"
736
                  " node %s %s" %
737
                  (constants.RELEASE_VERSION, node, remote_version[1]))
738

    
739
    # checks vg existence and size > 20G
740
    if vg_name is not None:
741
      vglist = node_result.get(constants.NV_VGLIST, None)
742
      if not vglist:
743
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
744
                        (node,))
745
        bad = True
746
      else:
747
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
748
                                              constants.MIN_VG_SIZE)
749
        if vgstatus:
750
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
751
          bad = True
752

    
753
    # checks config file checksum
754

    
755
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
756
    if not isinstance(remote_cksum, dict):
757
      bad = True
758
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
759
    else:
760
      for file_name in file_list:
761
        node_is_mc = nodeinfo.master_candidate
762
        must_have_file = file_name not in master_files
763
        if file_name not in remote_cksum:
764
          if node_is_mc or must_have_file:
765
            bad = True
766
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
767
        elif remote_cksum[file_name] != local_cksum[file_name]:
768
          if node_is_mc or must_have_file:
769
            bad = True
770
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
771
          else:
772
            # not candidate and this is not a must-have file
773
            bad = True
774
            feedback_fn("  - ERROR: non master-candidate has old/wrong file"
775
                        " '%s'" % file_name)
776
        else:
777
          # all good, except non-master/non-must have combination
778
          if not node_is_mc and not must_have_file:
779
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
780
                        " candidates" % file_name)
781

    
782
    # checks ssh to any
783

    
784
    if constants.NV_NODELIST not in node_result:
785
      bad = True
786
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
787
    else:
788
      if node_result[constants.NV_NODELIST]:
789
        bad = True
790
        for node in node_result[constants.NV_NODELIST]:
791
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
792
                          (node, node_result[constants.NV_NODELIST][node]))
793

    
794
    if constants.NV_NODENETTEST not in node_result:
795
      bad = True
796
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
797
    else:
798
      if node_result[constants.NV_NODENETTEST]:
799
        bad = True
800
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
801
        for node in nlist:
802
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
803
                          (node, node_result[constants.NV_NODENETTEST][node]))
804

    
805
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
806
    if isinstance(hyp_result, dict):
807
      for hv_name, hv_result in hyp_result.iteritems():
808
        if hv_result is not None:
809
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
810
                      (hv_name, hv_result))
811

    
812
    # check used drbd list
813
    if vg_name is not None:
814
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
815
      if not isinstance(used_minors, (tuple, list)):
816
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
817
                    str(used_minors))
818
      else:
819
        for minor, (iname, must_exist) in drbd_map.items():
820
          if minor not in used_minors and must_exist:
821
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
822
                        " not active" % (minor, iname))
823
            bad = True
824
        for minor in used_minors:
825
          if minor not in drbd_map:
826
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
827
                        minor)
828
            bad = True
829

    
830
    return bad
831

    
832
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
833
                      node_instance, feedback_fn, n_offline):
834
    """Verify an instance.
835

836
    This function checks to see if the required block devices are
837
    available on the instance's node.
838

839
    """
840
    bad = False
841

    
842
    node_current = instanceconfig.primary_node
843

    
844
    node_vol_should = {}
845
    instanceconfig.MapLVsByNode(node_vol_should)
846

    
847
    for node in node_vol_should:
848
      if node in n_offline:
849
        # ignore missing volumes on offline nodes
850
        continue
851
      for volume in node_vol_should[node]:
852
        if node not in node_vol_is or volume not in node_vol_is[node]:
853
          feedback_fn("  - ERROR: volume %s missing on node %s" %
854
                          (volume, node))
855
          bad = True
856

    
857
    if instanceconfig.admin_up:
858
      if ((node_current not in node_instance or
859
          not instance in node_instance[node_current]) and
860
          node_current not in n_offline):
861
        feedback_fn("  - ERROR: instance %s not running on node %s" %
862
                        (instance, node_current))
863
        bad = True
864

    
865
    for node in node_instance:
866
      if (not node == node_current):
867
        if instance in node_instance[node]:
868
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
869
                          (instance, node))
870
          bad = True
871

    
872
    return bad
873

    
874
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
875
    """Verify if there are any unknown volumes in the cluster.
876

877
    The .os, .swap and backup volumes are ignored. All other volumes are
878
    reported as unknown.
879

880
    """
881
    bad = False
882

    
883
    for node in node_vol_is:
884
      for volume in node_vol_is[node]:
885
        if node not in node_vol_should or volume not in node_vol_should[node]:
886
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
887
                      (volume, node))
888
          bad = True
889
    return bad
890

    
891
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
892
    """Verify the list of running instances.
893

894
    This checks what instances are running but unknown to the cluster.
895

896
    """
897
    bad = False
898
    for node in node_instance:
899
      for runninginstance in node_instance[node]:
900
        if runninginstance not in instancelist:
901
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
902
                          (runninginstance, node))
903
          bad = True
904
    return bad
905

    
906
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
907
    """Verify N+1 Memory Resilience.
908

909
    Check that if one single node dies we can still start all the instances it
910
    was primary for.
911

912
    """
913
    bad = False
914

    
915
    for node, nodeinfo in node_info.iteritems():
916
      # This code checks that every node which is now listed as secondary has
917
      # enough memory to host all instances it is supposed to should a single
918
      # other node in the cluster fail.
919
      # FIXME: not ready for failover to an arbitrary node
920
      # FIXME: does not support file-backed instances
921
      # WARNING: we currently take into account down instances as well as up
922
      # ones, considering that even if they're down someone might want to start
923
      # them even in the event of a node failure.
924
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
925
        needed_mem = 0
926
        for instance in instances:
927
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
928
          if bep[constants.BE_AUTO_BALANCE]:
929
            needed_mem += bep[constants.BE_MEMORY]
930
        if nodeinfo['mfree'] < needed_mem:
931
          feedback_fn("  - ERROR: not enough memory on node %s to accomodate"
932
                      " failovers should node %s fail" % (node, prinode))
933
          bad = True
934
    return bad
935

    
936
  def CheckPrereq(self):
937
    """Check prerequisites.
938

939
    Transform the list of checks we're going to skip into a set and check that
940
    all its members are valid.
941

942
    """
943
    self.skip_set = frozenset(self.op.skip_checks)
944
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
945
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
946

    
947
  def BuildHooksEnv(self):
948
    """Build hooks env.
949

950
    Cluster-Verify hooks just rone in the post phase and their failure makes
951
    the output be logged in the verify output and the verification to fail.
952

953
    """
954
    all_nodes = self.cfg.GetNodeList()
955
    env = {
956
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
957
      }
958
    for node in self.cfg.GetAllNodesInfo().values():
959
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
960

    
961
    return env, [], all_nodes
962

    
963
  def Exec(self, feedback_fn):
964
    """Verify integrity of cluster, performing various test on nodes.
965

966
    """
967
    bad = False
968
    feedback_fn("* Verifying global settings")
969
    for msg in self.cfg.VerifyConfig():
970
      feedback_fn("  - ERROR: %s" % msg)
971

    
972
    vg_name = self.cfg.GetVGName()
973
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
974
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
975
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
976
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
977
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
978
                        for iname in instancelist)
979
    i_non_redundant = [] # Non redundant instances
980
    i_non_a_balanced = [] # Non auto-balanced instances
981
    n_offline = [] # List of offline nodes
982
    n_drained = [] # List of nodes being drained
983
    node_volume = {}
984
    node_instance = {}
985
    node_info = {}
986
    instance_cfg = {}
987

    
988
    # FIXME: verify OS list
989
    # do local checksums
990
    master_files = [constants.CLUSTER_CONF_FILE]
991

    
992
    file_names = ssconf.SimpleStore().GetFileList()
993
    file_names.append(constants.SSL_CERT_FILE)
994
    file_names.append(constants.RAPI_CERT_FILE)
995
    file_names.extend(master_files)
996

    
997
    local_checksums = utils.FingerprintFiles(file_names)
998

    
999
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1000
    node_verify_param = {
1001
      constants.NV_FILELIST: file_names,
1002
      constants.NV_NODELIST: [node.name for node in nodeinfo
1003
                              if not node.offline],
1004
      constants.NV_HYPERVISOR: hypervisors,
1005
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1006
                                  node.secondary_ip) for node in nodeinfo
1007
                                 if not node.offline],
1008
      constants.NV_INSTANCELIST: hypervisors,
1009
      constants.NV_VERSION: None,
1010
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1011
      }
1012
    if vg_name is not None:
1013
      node_verify_param[constants.NV_VGLIST] = None
1014
      node_verify_param[constants.NV_LVLIST] = vg_name
1015
      node_verify_param[constants.NV_DRBDLIST] = None
1016
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1017
                                           self.cfg.GetClusterName())
1018

    
1019
    cluster = self.cfg.GetClusterInfo()
1020
    master_node = self.cfg.GetMasterNode()
1021
    all_drbd_map = self.cfg.ComputeDRBDMap()
1022

    
1023
    for node_i in nodeinfo:
1024
      node = node_i.name
1025
      nresult = all_nvinfo[node].data
1026

    
1027
      if node_i.offline:
1028
        feedback_fn("* Skipping offline node %s" % (node,))
1029
        n_offline.append(node)
1030
        continue
1031

    
1032
      if node == master_node:
1033
        ntype = "master"
1034
      elif node_i.master_candidate:
1035
        ntype = "master candidate"
1036
      elif node_i.drained:
1037
        ntype = "drained"
1038
        n_drained.append(node)
1039
      else:
1040
        ntype = "regular"
1041
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1042

    
1043
      if all_nvinfo[node].failed or not isinstance(nresult, dict):
1044
        feedback_fn("  - ERROR: connection to %s failed" % (node,))
1045
        bad = True
1046
        continue
1047

    
1048
      node_drbd = {}
1049
      for minor, instance in all_drbd_map[node].items():
1050
        if instance not in instanceinfo:
1051
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1052
                      instance)
1053
          # ghost instance should not be running, but otherwise we
1054
          # don't give double warnings (both ghost instance and
1055
          # unallocated minor in use)
1056
          node_drbd[minor] = (instance, False)
1057
        else:
1058
          instance = instanceinfo[instance]
1059
          node_drbd[minor] = (instance.name, instance.admin_up)
1060
      result = self._VerifyNode(node_i, file_names, local_checksums,
1061
                                nresult, feedback_fn, master_files,
1062
                                node_drbd, vg_name)
1063
      bad = bad or result
1064

    
1065
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1066
      if vg_name is None:
1067
        node_volume[node] = {}
1068
      elif isinstance(lvdata, basestring):
1069
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1070
                    (node, utils.SafeEncode(lvdata)))
1071
        bad = True
1072
        node_volume[node] = {}
1073
      elif not isinstance(lvdata, dict):
1074
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1075
        bad = True
1076
        continue
1077
      else:
1078
        node_volume[node] = lvdata
1079

    
1080
      # node_instance
1081
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1082
      if not isinstance(idata, list):
1083
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1084
                    (node,))
1085
        bad = True
1086
        continue
1087

    
1088
      node_instance[node] = idata
1089

    
1090
      # node_info
1091
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1092
      if not isinstance(nodeinfo, dict):
1093
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1094
        bad = True
1095
        continue
1096

    
1097
      try:
1098
        node_info[node] = {
1099
          "mfree": int(nodeinfo['memory_free']),
1100
          "pinst": [],
1101
          "sinst": [],
1102
          # dictionary holding all instances this node is secondary for,
1103
          # grouped by their primary node. Each key is a cluster node, and each
1104
          # value is a list of instances which have the key as primary and the
1105
          # current node as secondary.  this is handy to calculate N+1 memory
1106
          # availability if you can only failover from a primary to its
1107
          # secondary.
1108
          "sinst-by-pnode": {},
1109
        }
1110
        # FIXME: devise a free space model for file based instances as well
1111
        if vg_name is not None:
1112
          if (constants.NV_VGLIST not in nresult or
1113
              vg_name not in nresult[constants.NV_VGLIST]):
1114
            feedback_fn("  - ERROR: node %s didn't return data for the"
1115
                        " volume group '%s' - it is either missing or broken" %
1116
                        (node, vg_name))
1117
            bad = True
1118
            continue
1119
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1120
      except (ValueError, KeyError):
1121
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1122
                    " from node %s" % (node,))
1123
        bad = True
1124
        continue
1125

    
1126
    node_vol_should = {}
1127

    
1128
    for instance in instancelist:
1129
      feedback_fn("* Verifying instance %s" % instance)
1130
      inst_config = instanceinfo[instance]
1131
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1132
                                     node_instance, feedback_fn, n_offline)
1133
      bad = bad or result
1134
      inst_nodes_offline = []
1135

    
1136
      inst_config.MapLVsByNode(node_vol_should)
1137

    
1138
      instance_cfg[instance] = inst_config
1139

    
1140
      pnode = inst_config.primary_node
1141
      if pnode in node_info:
1142
        node_info[pnode]['pinst'].append(instance)
1143
      elif pnode not in n_offline:
1144
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1145
                    " %s failed" % (instance, pnode))
1146
        bad = True
1147

    
1148
      if pnode in n_offline:
1149
        inst_nodes_offline.append(pnode)
1150

    
1151
      # If the instance is non-redundant we cannot survive losing its primary
1152
      # node, so we are not N+1 compliant. On the other hand we have no disk
1153
      # templates with more than one secondary so that situation is not well
1154
      # supported either.
1155
      # FIXME: does not support file-backed instances
1156
      if len(inst_config.secondary_nodes) == 0:
1157
        i_non_redundant.append(instance)
1158
      elif len(inst_config.secondary_nodes) > 1:
1159
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1160
                    % instance)
1161

    
1162
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1163
        i_non_a_balanced.append(instance)
1164

    
1165
      for snode in inst_config.secondary_nodes:
1166
        if snode in node_info:
1167
          node_info[snode]['sinst'].append(instance)
1168
          if pnode not in node_info[snode]['sinst-by-pnode']:
1169
            node_info[snode]['sinst-by-pnode'][pnode] = []
1170
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1171
        elif snode not in n_offline:
1172
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1173
                      " %s failed" % (instance, snode))
1174
          bad = True
1175
        if snode in n_offline:
1176
          inst_nodes_offline.append(snode)
1177

    
1178
      if inst_nodes_offline:
1179
        # warn that the instance lives on offline nodes, and set bad=True
1180
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1181
                    ", ".join(inst_nodes_offline))
1182
        bad = True
1183

    
1184
    feedback_fn("* Verifying orphan volumes")
1185
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1186
                                       feedback_fn)
1187
    bad = bad or result
1188

    
1189
    feedback_fn("* Verifying remaining instances")
1190
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1191
                                         feedback_fn)
1192
    bad = bad or result
1193

    
1194
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1195
      feedback_fn("* Verifying N+1 Memory redundancy")
1196
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1197
      bad = bad or result
1198

    
1199
    feedback_fn("* Other Notes")
1200
    if i_non_redundant:
1201
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1202
                  % len(i_non_redundant))
1203

    
1204
    if i_non_a_balanced:
1205
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1206
                  % len(i_non_a_balanced))
1207

    
1208
    if n_offline:
1209
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1210

    
1211
    if n_drained:
1212
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1213

    
1214
    return not bad
1215

    
1216
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1217
    """Analize the post-hooks' result
1218

1219
    This method analyses the hook result, handles it, and sends some
1220
    nicely-formatted feedback back to the user.
1221

1222
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1223
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1224
    @param hooks_results: the results of the multi-node hooks rpc call
1225
    @param feedback_fn: function used send feedback back to the caller
1226
    @param lu_result: previous Exec result
1227
    @return: the new Exec result, based on the previous result
1228
        and hook results
1229

1230
    """
1231
    # We only really run POST phase hooks, and are only interested in
1232
    # their results
1233
    if phase == constants.HOOKS_PHASE_POST:
1234
      # Used to change hooks' output to proper indentation
1235
      indent_re = re.compile('^', re.M)
1236
      feedback_fn("* Hooks Results")
1237
      if not hooks_results:
1238
        feedback_fn("  - ERROR: general communication failure")
1239
        lu_result = 1
1240
      else:
1241
        for node_name in hooks_results:
1242
          show_node_header = True
1243
          res = hooks_results[node_name]
1244
          if res.failed or res.data is False or not isinstance(res.data, list):
1245
            if res.offline:
1246
              # no need to warn or set fail return value
1247
              continue
1248
            feedback_fn("    Communication failure in hooks execution")
1249
            lu_result = 1
1250
            continue
1251
          for script, hkr, output in res.data:
1252
            if hkr == constants.HKR_FAIL:
1253
              # The node header is only shown once, if there are
1254
              # failing hooks on that node
1255
              if show_node_header:
1256
                feedback_fn("  Node %s:" % node_name)
1257
                show_node_header = False
1258
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1259
              output = indent_re.sub('      ', output)
1260
              feedback_fn("%s" % output)
1261
              lu_result = 1
1262

    
1263
      return lu_result
1264

    
1265

    
1266
class LUVerifyDisks(NoHooksLU):
1267
  """Verifies the cluster disks status.
1268

1269
  """
1270
  _OP_REQP = []
1271
  REQ_BGL = False
1272

    
1273
  def ExpandNames(self):
1274
    self.needed_locks = {
1275
      locking.LEVEL_NODE: locking.ALL_SET,
1276
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1277
    }
1278
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1279

    
1280
  def CheckPrereq(self):
1281
    """Check prerequisites.
1282

1283
    This has no prerequisites.
1284

1285
    """
1286
    pass
1287

    
1288
  def Exec(self, feedback_fn):
1289
    """Verify integrity of cluster disks.
1290

1291
    @rtype: tuple of three items
1292
    @return: a tuple of (dict of node-to-node_error, list of instances
1293
        which need activate-disks, dict of instance: (node, volume) for
1294
        missing volumes
1295

1296
    """
1297
    result = res_nodes, res_instances, res_missing = {}, [], {}
1298

    
1299
    vg_name = self.cfg.GetVGName()
1300
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1301
    instances = [self.cfg.GetInstanceInfo(name)
1302
                 for name in self.cfg.GetInstanceList()]
1303

    
1304
    nv_dict = {}
1305
    for inst in instances:
1306
      inst_lvs = {}
1307
      if (not inst.admin_up or
1308
          inst.disk_template not in constants.DTS_NET_MIRROR):
1309
        continue
1310
      inst.MapLVsByNode(inst_lvs)
1311
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1312
      for node, vol_list in inst_lvs.iteritems():
1313
        for vol in vol_list:
1314
          nv_dict[(node, vol)] = inst
1315

    
1316
    if not nv_dict:
1317
      return result
1318

    
1319
    node_lvs = self.rpc.call_volume_list(nodes, vg_name)
1320

    
1321
    to_act = set()
1322
    for node in nodes:
1323
      # node_volume
1324
      node_res = node_lvs[node]
1325
      if node_res.offline:
1326
        continue
1327
      msg = node_res.RemoteFailMsg()
1328
      if msg:
1329
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1330
        res_nodes[node] = msg
1331
        continue
1332

    
1333
      lvs = node_res.payload
1334
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1335
        inst = nv_dict.pop((node, lv_name), None)
1336
        if (not lv_online and inst is not None
1337
            and inst.name not in res_instances):
1338
          res_instances.append(inst.name)
1339

    
1340
    # any leftover items in nv_dict are missing LVs, let's arrange the
1341
    # data better
1342
    for key, inst in nv_dict.iteritems():
1343
      if inst.name not in res_missing:
1344
        res_missing[inst.name] = []
1345
      res_missing[inst.name].append(key)
1346

    
1347
    return result
1348

    
1349

    
1350
class LURenameCluster(LogicalUnit):
1351
  """Rename the cluster.
1352

1353
  """
1354
  HPATH = "cluster-rename"
1355
  HTYPE = constants.HTYPE_CLUSTER
1356
  _OP_REQP = ["name"]
1357

    
1358
  def BuildHooksEnv(self):
1359
    """Build hooks env.
1360

1361
    """
1362
    env = {
1363
      "OP_TARGET": self.cfg.GetClusterName(),
1364
      "NEW_NAME": self.op.name,
1365
      }
1366
    mn = self.cfg.GetMasterNode()
1367
    return env, [mn], [mn]
1368

    
1369
  def CheckPrereq(self):
1370
    """Verify that the passed name is a valid one.
1371

1372
    """
1373
    hostname = utils.HostInfo(self.op.name)
1374

    
1375
    new_name = hostname.name
1376
    self.ip = new_ip = hostname.ip
1377
    old_name = self.cfg.GetClusterName()
1378
    old_ip = self.cfg.GetMasterIP()
1379
    if new_name == old_name and new_ip == old_ip:
1380
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1381
                                 " cluster has changed")
1382
    if new_ip != old_ip:
1383
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1384
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1385
                                   " reachable on the network. Aborting." %
1386
                                   new_ip)
1387

    
1388
    self.op.name = new_name
1389

    
1390
  def Exec(self, feedback_fn):
1391
    """Rename the cluster.
1392

1393
    """
1394
    clustername = self.op.name
1395
    ip = self.ip
1396

    
1397
    # shutdown the master IP
1398
    master = self.cfg.GetMasterNode()
1399
    result = self.rpc.call_node_stop_master(master, False)
1400
    if result.failed or not result.data:
1401
      raise errors.OpExecError("Could not disable the master role")
1402

    
1403
    try:
1404
      cluster = self.cfg.GetClusterInfo()
1405
      cluster.cluster_name = clustername
1406
      cluster.master_ip = ip
1407
      self.cfg.Update(cluster)
1408

    
1409
      # update the known hosts file
1410
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1411
      node_list = self.cfg.GetNodeList()
1412
      try:
1413
        node_list.remove(master)
1414
      except ValueError:
1415
        pass
1416
      result = self.rpc.call_upload_file(node_list,
1417
                                         constants.SSH_KNOWN_HOSTS_FILE)
1418
      for to_node, to_result in result.iteritems():
1419
         msg = to_result.RemoteFailMsg()
1420
         if msg:
1421
           msg = ("Copy of file %s to node %s failed: %s" %
1422
                   (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1423
           self.proc.LogWarning(msg)
1424

    
1425
    finally:
1426
      result = self.rpc.call_node_start_master(master, False)
1427
      if result.failed or not result.data:
1428
        self.LogWarning("Could not re-enable the master role on"
1429
                        " the master, please restart manually.")
1430

    
1431

    
1432
def _RecursiveCheckIfLVMBased(disk):
1433
  """Check if the given disk or its children are lvm-based.
1434

1435
  @type disk: L{objects.Disk}
1436
  @param disk: the disk to check
1437
  @rtype: booleean
1438
  @return: boolean indicating whether a LD_LV dev_type was found or not
1439

1440
  """
1441
  if disk.children:
1442
    for chdisk in disk.children:
1443
      if _RecursiveCheckIfLVMBased(chdisk):
1444
        return True
1445
  return disk.dev_type == constants.LD_LV
1446

    
1447

    
1448
class LUSetClusterParams(LogicalUnit):
1449
  """Change the parameters of the cluster.
1450

1451
  """
1452
  HPATH = "cluster-modify"
1453
  HTYPE = constants.HTYPE_CLUSTER
1454
  _OP_REQP = []
1455
  REQ_BGL = False
1456

    
1457
  def CheckArguments(self):
1458
    """Check parameters
1459

1460
    """
1461
    if not hasattr(self.op, "candidate_pool_size"):
1462
      self.op.candidate_pool_size = None
1463
    if self.op.candidate_pool_size is not None:
1464
      try:
1465
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1466
      except (ValueError, TypeError), err:
1467
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1468
                                   str(err))
1469
      if self.op.candidate_pool_size < 1:
1470
        raise errors.OpPrereqError("At least one master candidate needed")
1471

    
1472
  def ExpandNames(self):
1473
    # FIXME: in the future maybe other cluster params won't require checking on
1474
    # all nodes to be modified.
1475
    self.needed_locks = {
1476
      locking.LEVEL_NODE: locking.ALL_SET,
1477
    }
1478
    self.share_locks[locking.LEVEL_NODE] = 1
1479

    
1480
  def BuildHooksEnv(self):
1481
    """Build hooks env.
1482

1483
    """
1484
    env = {
1485
      "OP_TARGET": self.cfg.GetClusterName(),
1486
      "NEW_VG_NAME": self.op.vg_name,
1487
      }
1488
    mn = self.cfg.GetMasterNode()
1489
    return env, [mn], [mn]
1490

    
1491
  def CheckPrereq(self):
1492
    """Check prerequisites.
1493

1494
    This checks whether the given params don't conflict and
1495
    if the given volume group is valid.
1496

1497
    """
1498
    if self.op.vg_name is not None and not self.op.vg_name:
1499
      instances = self.cfg.GetAllInstancesInfo().values()
1500
      for inst in instances:
1501
        for disk in inst.disks:
1502
          if _RecursiveCheckIfLVMBased(disk):
1503
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1504
                                       " lvm-based instances exist")
1505

    
1506
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1507

    
1508
    # if vg_name not None, checks given volume group on all nodes
1509
    if self.op.vg_name:
1510
      vglist = self.rpc.call_vg_list(node_list)
1511
      for node in node_list:
1512
        msg = vglist[node].RemoteFailMsg()
1513
        if msg:
1514
          # ignoring down node
1515
          self.LogWarning("Error while gathering data on node %s"
1516
                          " (ignoring node): %s", node, msg)
1517
          continue
1518
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1519
                                              self.op.vg_name,
1520
                                              constants.MIN_VG_SIZE)
1521
        if vgstatus:
1522
          raise errors.OpPrereqError("Error on node '%s': %s" %
1523
                                     (node, vgstatus))
1524

    
1525
    self.cluster = cluster = self.cfg.GetClusterInfo()
1526
    # validate params changes
1527
    if self.op.beparams:
1528
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1529
      self.new_beparams = objects.FillDict(
1530
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1531

    
1532
    if self.op.nicparams:
1533
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1534
      self.new_nicparams = objects.FillDict(
1535
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1536
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1537

    
1538
    # hypervisor list/parameters
1539
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1540
    if self.op.hvparams:
1541
      if not isinstance(self.op.hvparams, dict):
1542
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1543
      for hv_name, hv_dict in self.op.hvparams.items():
1544
        if hv_name not in self.new_hvparams:
1545
          self.new_hvparams[hv_name] = hv_dict
1546
        else:
1547
          self.new_hvparams[hv_name].update(hv_dict)
1548

    
1549
    if self.op.enabled_hypervisors is not None:
1550
      self.hv_list = self.op.enabled_hypervisors
1551
    else:
1552
      self.hv_list = cluster.enabled_hypervisors
1553

    
1554
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1555
      # either the enabled list has changed, or the parameters have, validate
1556
      for hv_name, hv_params in self.new_hvparams.items():
1557
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1558
            (self.op.enabled_hypervisors and
1559
             hv_name in self.op.enabled_hypervisors)):
1560
          # either this is a new hypervisor, or its parameters have changed
1561
          hv_class = hypervisor.GetHypervisor(hv_name)
1562
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1563
          hv_class.CheckParameterSyntax(hv_params)
1564
          _CheckHVParams(self, node_list, hv_name, hv_params)
1565

    
1566
  def Exec(self, feedback_fn):
1567
    """Change the parameters of the cluster.
1568

1569
    """
1570
    if self.op.vg_name is not None:
1571
      new_volume = self.op.vg_name
1572
      if not new_volume:
1573
        new_volume = None
1574
      if new_volume != self.cfg.GetVGName():
1575
        self.cfg.SetVGName(new_volume)
1576
      else:
1577
        feedback_fn("Cluster LVM configuration already in desired"
1578
                    " state, not changing")
1579
    if self.op.hvparams:
1580
      self.cluster.hvparams = self.new_hvparams
1581
    if self.op.enabled_hypervisors is not None:
1582
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1583
    if self.op.beparams:
1584
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1585
    if self.op.nicparams:
1586
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1587

    
1588
    if self.op.candidate_pool_size is not None:
1589
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1590

    
1591
    self.cfg.Update(self.cluster)
1592

    
1593
    # we want to update nodes after the cluster so that if any errors
1594
    # happen, we have recorded and saved the cluster info
1595
    if self.op.candidate_pool_size is not None:
1596
      _AdjustCandidatePool(self)
1597

    
1598

    
1599
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1600
  """Distribute additional files which are part of the cluster configuration.
1601

1602
  ConfigWriter takes care of distributing the config and ssconf files, but
1603
  there are more files which should be distributed to all nodes. This function
1604
  makes sure those are copied.
1605

1606
  @param lu: calling logical unit
1607
  @param additional_nodes: list of nodes not in the config to distribute to
1608

1609
  """
1610
  # 1. Gather target nodes
1611
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1612
  dist_nodes = lu.cfg.GetNodeList()
1613
  if additional_nodes is not None:
1614
    dist_nodes.extend(additional_nodes)
1615
  if myself.name in dist_nodes:
1616
    dist_nodes.remove(myself.name)
1617
  # 2. Gather files to distribute
1618
  dist_files = set([constants.ETC_HOSTS,
1619
                    constants.SSH_KNOWN_HOSTS_FILE,
1620
                    constants.RAPI_CERT_FILE,
1621
                    constants.RAPI_USERS_FILE,
1622
                   ])
1623

    
1624
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1625
  for hv_name in enabled_hypervisors:
1626
    hv_class = hypervisor.GetHypervisor(hv_name)
1627
    dist_files.update(hv_class.GetAncillaryFiles())
1628

    
1629
  # 3. Perform the files upload
1630
  for fname in dist_files:
1631
    if os.path.exists(fname):
1632
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1633
      for to_node, to_result in result.items():
1634
         msg = to_result.RemoteFailMsg()
1635
         if msg:
1636
           msg = ("Copy of file %s to node %s failed: %s" %
1637
                   (fname, to_node, msg))
1638
           lu.proc.LogWarning(msg)
1639

    
1640

    
1641
class LURedistributeConfig(NoHooksLU):
1642
  """Force the redistribution of cluster configuration.
1643

1644
  This is a very simple LU.
1645

1646
  """
1647
  _OP_REQP = []
1648
  REQ_BGL = False
1649

    
1650
  def ExpandNames(self):
1651
    self.needed_locks = {
1652
      locking.LEVEL_NODE: locking.ALL_SET,
1653
    }
1654
    self.share_locks[locking.LEVEL_NODE] = 1
1655

    
1656
  def CheckPrereq(self):
1657
    """Check prerequisites.
1658

1659
    """
1660

    
1661
  def Exec(self, feedback_fn):
1662
    """Redistribute the configuration.
1663

1664
    """
1665
    self.cfg.Update(self.cfg.GetClusterInfo())
1666
    _RedistributeAncillaryFiles(self)
1667

    
1668

    
1669
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1670
  """Sleep and poll for an instance's disk to sync.
1671

1672
  """
1673
  if not instance.disks:
1674
    return True
1675

    
1676
  if not oneshot:
1677
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1678

    
1679
  node = instance.primary_node
1680

    
1681
  for dev in instance.disks:
1682
    lu.cfg.SetDiskID(dev, node)
1683

    
1684
  retries = 0
1685
  while True:
1686
    max_time = 0
1687
    done = True
1688
    cumul_degraded = False
1689
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1690
    msg = rstats.RemoteFailMsg()
1691
    if msg:
1692
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1693
      retries += 1
1694
      if retries >= 10:
1695
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1696
                                 " aborting." % node)
1697
      time.sleep(6)
1698
      continue
1699
    rstats = rstats.payload
1700
    retries = 0
1701
    for i, mstat in enumerate(rstats):
1702
      if mstat is None:
1703
        lu.LogWarning("Can't compute data for node %s/%s",
1704
                           node, instance.disks[i].iv_name)
1705
        continue
1706
      # we ignore the ldisk parameter
1707
      perc_done, est_time, is_degraded, _ = mstat
1708
      cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1709
      if perc_done is not None:
1710
        done = False
1711
        if est_time is not None:
1712
          rem_time = "%d estimated seconds remaining" % est_time
1713
          max_time = est_time
1714
        else:
1715
          rem_time = "no time estimate"
1716
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1717
                        (instance.disks[i].iv_name, perc_done, rem_time))
1718
    if done or oneshot:
1719
      break
1720

    
1721
    time.sleep(min(60, max_time))
1722

    
1723
  if done:
1724
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1725
  return not cumul_degraded
1726

    
1727

    
1728
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
1729
  """Check that mirrors are not degraded.
1730

1731
  The ldisk parameter, if True, will change the test from the
1732
  is_degraded attribute (which represents overall non-ok status for
1733
  the device(s)) to the ldisk (representing the local storage status).
1734

1735
  """
1736
  lu.cfg.SetDiskID(dev, node)
1737
  if ldisk:
1738
    idx = 6
1739
  else:
1740
    idx = 5
1741

    
1742
  result = True
1743
  if on_primary or dev.AssembleOnSecondary():
1744
    rstats = lu.rpc.call_blockdev_find(node, dev)
1745
    msg = rstats.RemoteFailMsg()
1746
    if msg:
1747
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
1748
      result = False
1749
    elif not rstats.payload:
1750
      lu.LogWarning("Can't find disk on node %s", node)
1751
      result = False
1752
    else:
1753
      result = result and (not rstats.payload[idx])
1754
  if dev.children:
1755
    for child in dev.children:
1756
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
1757

    
1758
  return result
1759

    
1760

    
1761
class LUDiagnoseOS(NoHooksLU):
1762
  """Logical unit for OS diagnose/query.
1763

1764
  """
1765
  _OP_REQP = ["output_fields", "names"]
1766
  REQ_BGL = False
1767
  _FIELDS_STATIC = utils.FieldSet()
1768
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
1769

    
1770
  def ExpandNames(self):
1771
    if self.op.names:
1772
      raise errors.OpPrereqError("Selective OS query not supported")
1773

    
1774
    _CheckOutputFields(static=self._FIELDS_STATIC,
1775
                       dynamic=self._FIELDS_DYNAMIC,
1776
                       selected=self.op.output_fields)
1777

    
1778
    # Lock all nodes, in shared mode
1779
    # Temporary removal of locks, should be reverted later
1780
    # TODO: reintroduce locks when they are lighter-weight
1781
    self.needed_locks = {}
1782
    #self.share_locks[locking.LEVEL_NODE] = 1
1783
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1784

    
1785
  def CheckPrereq(self):
1786
    """Check prerequisites.
1787

1788
    """
1789

    
1790
  @staticmethod
1791
  def _DiagnoseByOS(node_list, rlist):
1792
    """Remaps a per-node return list into an a per-os per-node dictionary
1793

1794
    @param node_list: a list with the names of all nodes
1795
    @param rlist: a map with node names as keys and OS objects as values
1796

1797
    @rtype: dict
1798
    @return: a dictionary with osnames as keys and as value another map, with
1799
        nodes as keys and list of OS objects as values, eg::
1800

1801
          {"debian-etch": {"node1": [<object>,...],
1802
                           "node2": [<object>,]}
1803
          }
1804

1805
    """
1806
    all_os = {}
1807
    # we build here the list of nodes that didn't fail the RPC (at RPC
1808
    # level), so that nodes with a non-responding node daemon don't
1809
    # make all OSes invalid
1810
    good_nodes = [node_name for node_name in rlist
1811
                  if not rlist[node_name].failed]
1812
    for node_name, nr in rlist.iteritems():
1813
      if nr.failed or not nr.data:
1814
        continue
1815
      for os_obj in nr.data:
1816
        if os_obj.name not in all_os:
1817
          # build a list of nodes for this os containing empty lists
1818
          # for each node in node_list
1819
          all_os[os_obj.name] = {}
1820
          for nname in good_nodes:
1821
            all_os[os_obj.name][nname] = []
1822
        all_os[os_obj.name][node_name].append(os_obj)
1823
    return all_os
1824

    
1825
  def Exec(self, feedback_fn):
1826
    """Compute the list of OSes.
1827

1828
    """
1829
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
1830
    node_data = self.rpc.call_os_diagnose(valid_nodes)
1831
    if node_data == False:
1832
      raise errors.OpExecError("Can't gather the list of OSes")
1833
    pol = self._DiagnoseByOS(valid_nodes, node_data)
1834
    output = []
1835
    for os_name, os_data in pol.iteritems():
1836
      row = []
1837
      for field in self.op.output_fields:
1838
        if field == "name":
1839
          val = os_name
1840
        elif field == "valid":
1841
          val = utils.all([osl and osl[0] for osl in os_data.values()])
1842
        elif field == "node_status":
1843
          val = {}
1844
          for node_name, nos_list in os_data.iteritems():
1845
            val[node_name] = [(v.status, v.path) for v in nos_list]
1846
        else:
1847
          raise errors.ParameterError(field)
1848
        row.append(val)
1849
      output.append(row)
1850

    
1851
    return output
1852

    
1853

    
1854
class LURemoveNode(LogicalUnit):
1855
  """Logical unit for removing a node.
1856

1857
  """
1858
  HPATH = "node-remove"
1859
  HTYPE = constants.HTYPE_NODE
1860
  _OP_REQP = ["node_name"]
1861

    
1862
  def BuildHooksEnv(self):
1863
    """Build hooks env.
1864

1865
    This doesn't run on the target node in the pre phase as a failed
1866
    node would then be impossible to remove.
1867

1868
    """
1869
    env = {
1870
      "OP_TARGET": self.op.node_name,
1871
      "NODE_NAME": self.op.node_name,
1872
      }
1873
    all_nodes = self.cfg.GetNodeList()
1874
    all_nodes.remove(self.op.node_name)
1875
    return env, all_nodes, all_nodes
1876

    
1877
  def CheckPrereq(self):
1878
    """Check prerequisites.
1879

1880
    This checks:
1881
     - the node exists in the configuration
1882
     - it does not have primary or secondary instances
1883
     - it's not the master
1884

1885
    Any errors are signalled by raising errors.OpPrereqError.
1886

1887
    """
1888
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1889
    if node is None:
1890
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1891

    
1892
    instance_list = self.cfg.GetInstanceList()
1893

    
1894
    masternode = self.cfg.GetMasterNode()
1895
    if node.name == masternode:
1896
      raise errors.OpPrereqError("Node is the master node,"
1897
                                 " you need to failover first.")
1898

    
1899
    for instance_name in instance_list:
1900
      instance = self.cfg.GetInstanceInfo(instance_name)
1901
      if node.name in instance.all_nodes:
1902
        raise errors.OpPrereqError("Instance %s is still running on the node,"
1903
                                   " please remove first." % instance_name)
1904
    self.op.node_name = node.name
1905
    self.node = node
1906

    
1907
  def Exec(self, feedback_fn):
1908
    """Removes the node from the cluster.
1909

1910
    """
1911
    node = self.node
1912
    logging.info("Stopping the node daemon and removing configs from node %s",
1913
                 node.name)
1914

    
1915
    self.context.RemoveNode(node.name)
1916

    
1917
    self.rpc.call_node_leave_cluster(node.name)
1918

    
1919
    # Promote nodes to master candidate as needed
1920
    _AdjustCandidatePool(self)
1921

    
1922

    
1923
class LUQueryNodes(NoHooksLU):
1924
  """Logical unit for querying nodes.
1925

1926
  """
1927
  _OP_REQP = ["output_fields", "names", "use_locking"]
1928
  REQ_BGL = False
1929
  _FIELDS_DYNAMIC = utils.FieldSet(
1930
    "dtotal", "dfree",
1931
    "mtotal", "mnode", "mfree",
1932
    "bootid",
1933
    "ctotal", "cnodes", "csockets",
1934
    )
1935

    
1936
  _FIELDS_STATIC = utils.FieldSet(
1937
    "name", "pinst_cnt", "sinst_cnt",
1938
    "pinst_list", "sinst_list",
1939
    "pip", "sip", "tags",
1940
    "serial_no",
1941
    "master_candidate",
1942
    "master",
1943
    "offline",
1944
    "drained",
1945
    )
1946

    
1947
  def ExpandNames(self):
1948
    _CheckOutputFields(static=self._FIELDS_STATIC,
1949
                       dynamic=self._FIELDS_DYNAMIC,
1950
                       selected=self.op.output_fields)
1951

    
1952
    self.needed_locks = {}
1953
    self.share_locks[locking.LEVEL_NODE] = 1
1954

    
1955
    if self.op.names:
1956
      self.wanted = _GetWantedNodes(self, self.op.names)
1957
    else:
1958
      self.wanted = locking.ALL_SET
1959

    
1960
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
1961
    self.do_locking = self.do_node_query and self.op.use_locking
1962
    if self.do_locking:
1963
      # if we don't request only static fields, we need to lock the nodes
1964
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
1965

    
1966

    
1967
  def CheckPrereq(self):
1968
    """Check prerequisites.
1969

1970
    """
1971
    # The validation of the node list is done in the _GetWantedNodes,
1972
    # if non empty, and if empty, there's no validation to do
1973
    pass
1974

    
1975
  def Exec(self, feedback_fn):
1976
    """Computes the list of nodes and their attributes.
1977

1978
    """
1979
    all_info = self.cfg.GetAllNodesInfo()
1980
    if self.do_locking:
1981
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
1982
    elif self.wanted != locking.ALL_SET:
1983
      nodenames = self.wanted
1984
      missing = set(nodenames).difference(all_info.keys())
1985
      if missing:
1986
        raise errors.OpExecError(
1987
          "Some nodes were removed before retrieving their data: %s" % missing)
1988
    else:
1989
      nodenames = all_info.keys()
1990

    
1991
    nodenames = utils.NiceSort(nodenames)
1992
    nodelist = [all_info[name] for name in nodenames]
1993

    
1994
    # begin data gathering
1995

    
1996
    if self.do_node_query:
1997
      live_data = {}
1998
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
1999
                                          self.cfg.GetHypervisorType())
2000
      for name in nodenames:
2001
        nodeinfo = node_data[name]
2002
        if not nodeinfo.failed and nodeinfo.data:
2003
          nodeinfo = nodeinfo.data
2004
          fn = utils.TryConvert
2005
          live_data[name] = {
2006
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2007
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2008
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2009
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2010
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2011
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2012
            "bootid": nodeinfo.get('bootid', None),
2013
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2014
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2015
            }
2016
        else:
2017
          live_data[name] = {}
2018
    else:
2019
      live_data = dict.fromkeys(nodenames, {})
2020

    
2021
    node_to_primary = dict([(name, set()) for name in nodenames])
2022
    node_to_secondary = dict([(name, set()) for name in nodenames])
2023

    
2024
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2025
                             "sinst_cnt", "sinst_list"))
2026
    if inst_fields & frozenset(self.op.output_fields):
2027
      instancelist = self.cfg.GetInstanceList()
2028

    
2029
      for instance_name in instancelist:
2030
        inst = self.cfg.GetInstanceInfo(instance_name)
2031
        if inst.primary_node in node_to_primary:
2032
          node_to_primary[inst.primary_node].add(inst.name)
2033
        for secnode in inst.secondary_nodes:
2034
          if secnode in node_to_secondary:
2035
            node_to_secondary[secnode].add(inst.name)
2036

    
2037
    master_node = self.cfg.GetMasterNode()
2038

    
2039
    # end data gathering
2040

    
2041
    output = []
2042
    for node in nodelist:
2043
      node_output = []
2044
      for field in self.op.output_fields:
2045
        if field == "name":
2046
          val = node.name
2047
        elif field == "pinst_list":
2048
          val = list(node_to_primary[node.name])
2049
        elif field == "sinst_list":
2050
          val = list(node_to_secondary[node.name])
2051
        elif field == "pinst_cnt":
2052
          val = len(node_to_primary[node.name])
2053
        elif field == "sinst_cnt":
2054
          val = len(node_to_secondary[node.name])
2055
        elif field == "pip":
2056
          val = node.primary_ip
2057
        elif field == "sip":
2058
          val = node.secondary_ip
2059
        elif field == "tags":
2060
          val = list(node.GetTags())
2061
        elif field == "serial_no":
2062
          val = node.serial_no
2063
        elif field == "master_candidate":
2064
          val = node.master_candidate
2065
        elif field == "master":
2066
          val = node.name == master_node
2067
        elif field == "offline":
2068
          val = node.offline
2069
        elif field == "drained":
2070
          val = node.drained
2071
        elif self._FIELDS_DYNAMIC.Matches(field):
2072
          val = live_data[node.name].get(field, None)
2073
        else:
2074
          raise errors.ParameterError(field)
2075
        node_output.append(val)
2076
      output.append(node_output)
2077

    
2078
    return output
2079

    
2080

    
2081
class LUQueryNodeVolumes(NoHooksLU):
2082
  """Logical unit for getting volumes on node(s).
2083

2084
  """
2085
  _OP_REQP = ["nodes", "output_fields"]
2086
  REQ_BGL = False
2087
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2088
  _FIELDS_STATIC = utils.FieldSet("node")
2089

    
2090
  def ExpandNames(self):
2091
    _CheckOutputFields(static=self._FIELDS_STATIC,
2092
                       dynamic=self._FIELDS_DYNAMIC,
2093
                       selected=self.op.output_fields)
2094

    
2095
    self.needed_locks = {}
2096
    self.share_locks[locking.LEVEL_NODE] = 1
2097
    if not self.op.nodes:
2098
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2099
    else:
2100
      self.needed_locks[locking.LEVEL_NODE] = \
2101
        _GetWantedNodes(self, self.op.nodes)
2102

    
2103
  def CheckPrereq(self):
2104
    """Check prerequisites.
2105

2106
    This checks that the fields required are valid output fields.
2107

2108
    """
2109
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2110

    
2111
  def Exec(self, feedback_fn):
2112
    """Computes the list of nodes and their attributes.
2113

2114
    """
2115
    nodenames = self.nodes
2116
    volumes = self.rpc.call_node_volumes(nodenames)
2117

    
2118
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2119
             in self.cfg.GetInstanceList()]
2120

    
2121
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2122

    
2123
    output = []
2124
    for node in nodenames:
2125
      if node not in volumes or volumes[node].failed or not volumes[node].data:
2126
        continue
2127

    
2128
      node_vols = volumes[node].data[:]
2129
      node_vols.sort(key=lambda vol: vol['dev'])
2130

    
2131
      for vol in node_vols:
2132
        node_output = []
2133
        for field in self.op.output_fields:
2134
          if field == "node":
2135
            val = node
2136
          elif field == "phys":
2137
            val = vol['dev']
2138
          elif field == "vg":
2139
            val = vol['vg']
2140
          elif field == "name":
2141
            val = vol['name']
2142
          elif field == "size":
2143
            val = int(float(vol['size']))
2144
          elif field == "instance":
2145
            for inst in ilist:
2146
              if node not in lv_by_node[inst]:
2147
                continue
2148
              if vol['name'] in lv_by_node[inst][node]:
2149
                val = inst.name
2150
                break
2151
            else:
2152
              val = '-'
2153
          else:
2154
            raise errors.ParameterError(field)
2155
          node_output.append(str(val))
2156

    
2157
        output.append(node_output)
2158

    
2159
    return output
2160

    
2161

    
2162
class LUAddNode(LogicalUnit):
2163
  """Logical unit for adding node to the cluster.
2164

2165
  """
2166
  HPATH = "node-add"
2167
  HTYPE = constants.HTYPE_NODE
2168
  _OP_REQP = ["node_name"]
2169

    
2170
  def BuildHooksEnv(self):
2171
    """Build hooks env.
2172

2173
    This will run on all nodes before, and on all nodes + the new node after.
2174

2175
    """
2176
    env = {
2177
      "OP_TARGET": self.op.node_name,
2178
      "NODE_NAME": self.op.node_name,
2179
      "NODE_PIP": self.op.primary_ip,
2180
      "NODE_SIP": self.op.secondary_ip,
2181
      }
2182
    nodes_0 = self.cfg.GetNodeList()
2183
    nodes_1 = nodes_0 + [self.op.node_name, ]
2184
    return env, nodes_0, nodes_1
2185

    
2186
  def CheckPrereq(self):
2187
    """Check prerequisites.
2188

2189
    This checks:
2190
     - the new node is not already in the config
2191
     - it is resolvable
2192
     - its parameters (single/dual homed) matches the cluster
2193

2194
    Any errors are signalled by raising errors.OpPrereqError.
2195

2196
    """
2197
    node_name = self.op.node_name
2198
    cfg = self.cfg
2199

    
2200
    dns_data = utils.HostInfo(node_name)
2201

    
2202
    node = dns_data.name
2203
    primary_ip = self.op.primary_ip = dns_data.ip
2204
    secondary_ip = getattr(self.op, "secondary_ip", None)
2205
    if secondary_ip is None:
2206
      secondary_ip = primary_ip
2207
    if not utils.IsValidIP(secondary_ip):
2208
      raise errors.OpPrereqError("Invalid secondary IP given")
2209
    self.op.secondary_ip = secondary_ip
2210

    
2211
    node_list = cfg.GetNodeList()
2212
    if not self.op.readd and node in node_list:
2213
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2214
                                 node)
2215
    elif self.op.readd and node not in node_list:
2216
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2217

    
2218
    for existing_node_name in node_list:
2219
      existing_node = cfg.GetNodeInfo(existing_node_name)
2220

    
2221
      if self.op.readd and node == existing_node_name:
2222
        if (existing_node.primary_ip != primary_ip or
2223
            existing_node.secondary_ip != secondary_ip):
2224
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2225
                                     " address configuration as before")
2226
        continue
2227

    
2228
      if (existing_node.primary_ip == primary_ip or
2229
          existing_node.secondary_ip == primary_ip or
2230
          existing_node.primary_ip == secondary_ip or
2231
          existing_node.secondary_ip == secondary_ip):
2232
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2233
                                   " existing node %s" % existing_node.name)
2234

    
2235
    # check that the type of the node (single versus dual homed) is the
2236
    # same as for the master
2237
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2238
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2239
    newbie_singlehomed = secondary_ip == primary_ip
2240
    if master_singlehomed != newbie_singlehomed:
2241
      if master_singlehomed:
2242
        raise errors.OpPrereqError("The master has no private ip but the"
2243
                                   " new node has one")
2244
      else:
2245
        raise errors.OpPrereqError("The master has a private ip but the"
2246
                                   " new node doesn't have one")
2247

    
2248
    # checks reachablity
2249
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2250
      raise errors.OpPrereqError("Node not reachable by ping")
2251

    
2252
    if not newbie_singlehomed:
2253
      # check reachability from my secondary ip to newbie's secondary ip
2254
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2255
                           source=myself.secondary_ip):
2256
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2257
                                   " based ping to noded port")
2258

    
2259
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2260
    mc_now, _ = self.cfg.GetMasterCandidateStats()
2261
    master_candidate = mc_now < cp_size
2262

    
2263
    self.new_node = objects.Node(name=node,
2264
                                 primary_ip=primary_ip,
2265
                                 secondary_ip=secondary_ip,
2266
                                 master_candidate=master_candidate,
2267
                                 offline=False, drained=False)
2268

    
2269
  def Exec(self, feedback_fn):
2270
    """Adds the new node to the cluster.
2271

2272
    """
2273
    new_node = self.new_node
2274
    node = new_node.name
2275

    
2276
    # check connectivity
2277
    result = self.rpc.call_version([node])[node]
2278
    result.Raise()
2279
    if result.data:
2280
      if constants.PROTOCOL_VERSION == result.data:
2281
        logging.info("Communication to node %s fine, sw version %s match",
2282
                     node, result.data)
2283
      else:
2284
        raise errors.OpExecError("Version mismatch master version %s,"
2285
                                 " node version %s" %
2286
                                 (constants.PROTOCOL_VERSION, result.data))
2287
    else:
2288
      raise errors.OpExecError("Cannot get version from the new node")
2289

    
2290
    # setup ssh on node
2291
    logging.info("Copy ssh key to node %s", node)
2292
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2293
    keyarray = []
2294
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2295
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2296
                priv_key, pub_key]
2297

    
2298
    for i in keyfiles:
2299
      f = open(i, 'r')
2300
      try:
2301
        keyarray.append(f.read())
2302
      finally:
2303
        f.close()
2304

    
2305
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2306
                                    keyarray[2],
2307
                                    keyarray[3], keyarray[4], keyarray[5])
2308

    
2309
    msg = result.RemoteFailMsg()
2310
    if msg:
2311
      raise errors.OpExecError("Cannot transfer ssh keys to the"
2312
                               " new node: %s" % msg)
2313

    
2314
    # Add node to our /etc/hosts, and add key to known_hosts
2315
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2316
      utils.AddHostToEtcHosts(new_node.name)
2317

    
2318
    if new_node.secondary_ip != new_node.primary_ip:
2319
      result = self.rpc.call_node_has_ip_address(new_node.name,
2320
                                                 new_node.secondary_ip)
2321
      msg = result.RemoteFailMsg()
2322
      if msg:
2323
        raise errors.OpPrereqError("Failure checking secondary ip"
2324
                                   " on node %s: %s" % (new_node.name, msg))
2325
      if not result.payload:
2326
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2327
                                 " you gave (%s). Please fix and re-run this"
2328
                                 " command." % new_node.secondary_ip)
2329

    
2330
    node_verify_list = [self.cfg.GetMasterNode()]
2331
    node_verify_param = {
2332
      'nodelist': [node],
2333
      # TODO: do a node-net-test as well?
2334
    }
2335

    
2336
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2337
                                       self.cfg.GetClusterName())
2338
    for verifier in node_verify_list:
2339
      if result[verifier].failed or not result[verifier].data:
2340
        raise errors.OpExecError("Cannot communicate with %s's node daemon"
2341
                                 " for remote verification" % verifier)
2342
      if result[verifier].data['nodelist']:
2343
        for failed in result[verifier].data['nodelist']:
2344
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2345
                      (verifier, result[verifier].data['nodelist'][failed]))
2346
        raise errors.OpExecError("ssh/hostname verification failed.")
2347

    
2348
    if self.op.readd:
2349
      _RedistributeAncillaryFiles(self)
2350
      self.context.ReaddNode(new_node)
2351
    else:
2352
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2353
      self.context.AddNode(new_node)
2354

    
2355

    
2356
class LUSetNodeParams(LogicalUnit):
2357
  """Modifies the parameters of a node.
2358

2359
  """
2360
  HPATH = "node-modify"
2361
  HTYPE = constants.HTYPE_NODE
2362
  _OP_REQP = ["node_name"]
2363
  REQ_BGL = False
2364

    
2365
  def CheckArguments(self):
2366
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2367
    if node_name is None:
2368
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2369
    self.op.node_name = node_name
2370
    _CheckBooleanOpField(self.op, 'master_candidate')
2371
    _CheckBooleanOpField(self.op, 'offline')
2372
    _CheckBooleanOpField(self.op, 'drained')
2373
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2374
    if all_mods.count(None) == 3:
2375
      raise errors.OpPrereqError("Please pass at least one modification")
2376
    if all_mods.count(True) > 1:
2377
      raise errors.OpPrereqError("Can't set the node into more than one"
2378
                                 " state at the same time")
2379

    
2380
  def ExpandNames(self):
2381
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2382

    
2383
  def BuildHooksEnv(self):
2384
    """Build hooks env.
2385

2386
    This runs on the master node.
2387

2388
    """
2389
    env = {
2390
      "OP_TARGET": self.op.node_name,
2391
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2392
      "OFFLINE": str(self.op.offline),
2393
      "DRAINED": str(self.op.drained),
2394
      }
2395
    nl = [self.cfg.GetMasterNode(),
2396
          self.op.node_name]
2397
    return env, nl, nl
2398

    
2399
  def CheckPrereq(self):
2400
    """Check prerequisites.
2401

2402
    This only checks the instance list against the existing names.
2403

2404
    """
2405
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2406

    
2407
    if ((self.op.master_candidate == False or self.op.offline == True or
2408
         self.op.drained == True) and node.master_candidate):
2409
      # we will demote the node from master_candidate
2410
      if self.op.node_name == self.cfg.GetMasterNode():
2411
        raise errors.OpPrereqError("The master node has to be a"
2412
                                   " master candidate, online and not drained")
2413
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2414
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2415
      if num_candidates <= cp_size:
2416
        msg = ("Not enough master candidates (desired"
2417
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2418
        if self.op.force:
2419
          self.LogWarning(msg)
2420
        else:
2421
          raise errors.OpPrereqError(msg)
2422

    
2423
    if (self.op.master_candidate == True and
2424
        ((node.offline and not self.op.offline == False) or
2425
         (node.drained and not self.op.drained == False))):
2426
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2427
                                 " to master_candidate" % node.name)
2428

    
2429
    return
2430

    
2431
  def Exec(self, feedback_fn):
2432
    """Modifies a node.
2433

2434
    """
2435
    node = self.node
2436

    
2437
    result = []
2438
    changed_mc = False
2439

    
2440
    if self.op.offline is not None:
2441
      node.offline = self.op.offline
2442
      result.append(("offline", str(self.op.offline)))
2443
      if self.op.offline == True:
2444
        if node.master_candidate:
2445
          node.master_candidate = False
2446
          changed_mc = True
2447
          result.append(("master_candidate", "auto-demotion due to offline"))
2448
        if node.drained:
2449
          node.drained = False
2450
          result.append(("drained", "clear drained status due to offline"))
2451

    
2452
    if self.op.master_candidate is not None:
2453
      node.master_candidate = self.op.master_candidate
2454
      changed_mc = True
2455
      result.append(("master_candidate", str(self.op.master_candidate)))
2456
      if self.op.master_candidate == False:
2457
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2458
        msg = rrc.RemoteFailMsg()
2459
        if msg:
2460
          self.LogWarning("Node failed to demote itself: %s" % msg)
2461

    
2462
    if self.op.drained is not None:
2463
      node.drained = self.op.drained
2464
      result.append(("drained", str(self.op.drained)))
2465
      if self.op.drained == True:
2466
        if node.master_candidate:
2467
          node.master_candidate = False
2468
          changed_mc = True
2469
          result.append(("master_candidate", "auto-demotion due to drain"))
2470
        if node.offline:
2471
          node.offline = False
2472
          result.append(("offline", "clear offline status due to drain"))
2473

    
2474
    # this will trigger configuration file update, if needed
2475
    self.cfg.Update(node)
2476
    # this will trigger job queue propagation or cleanup
2477
    if changed_mc:
2478
      self.context.ReaddNode(node)
2479

    
2480
    return result
2481

    
2482

    
2483
class LUPowercycleNode(NoHooksLU):
2484
  """Powercycles a node.
2485

2486
  """
2487
  _OP_REQP = ["node_name", "force"]
2488
  REQ_BGL = False
2489

    
2490
  def CheckArguments(self):
2491
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2492
    if node_name is None:
2493
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2494
    self.op.node_name = node_name
2495
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
2496
      raise errors.OpPrereqError("The node is the master and the force"
2497
                                 " parameter was not set")
2498

    
2499
  def ExpandNames(self):
2500
    """Locking for PowercycleNode.
2501

2502
    This is a last-resource option and shouldn't block on other
2503
    jobs. Therefore, we grab no locks.
2504

2505
    """
2506
    self.needed_locks = {}
2507

    
2508
  def CheckPrereq(self):
2509
    """Check prerequisites.
2510

2511
    This LU has no prereqs.
2512

2513
    """
2514
    pass
2515

    
2516
  def Exec(self, feedback_fn):
2517
    """Reboots a node.
2518

2519
    """
2520
    result = self.rpc.call_node_powercycle(self.op.node_name,
2521
                                           self.cfg.GetHypervisorType())
2522
    msg = result.RemoteFailMsg()
2523
    if msg:
2524
      raise errors.OpExecError("Failed to schedule the reboot: %s" % msg)
2525
    return result.payload
2526

    
2527

    
2528
class LUQueryClusterInfo(NoHooksLU):
2529
  """Query cluster configuration.
2530

2531
  """
2532
  _OP_REQP = []
2533
  REQ_BGL = False
2534

    
2535
  def ExpandNames(self):
2536
    self.needed_locks = {}
2537

    
2538
  def CheckPrereq(self):
2539
    """No prerequsites needed for this LU.
2540

2541
    """
2542
    pass
2543

    
2544
  def Exec(self, feedback_fn):
2545
    """Return cluster config.
2546

2547
    """
2548
    cluster = self.cfg.GetClusterInfo()
2549
    result = {
2550
      "software_version": constants.RELEASE_VERSION,
2551
      "protocol_version": constants.PROTOCOL_VERSION,
2552
      "config_version": constants.CONFIG_VERSION,
2553
      "os_api_version": constants.OS_API_VERSION,
2554
      "export_version": constants.EXPORT_VERSION,
2555
      "architecture": (platform.architecture()[0], platform.machine()),
2556
      "name": cluster.cluster_name,
2557
      "master": cluster.master_node,
2558
      "default_hypervisor": cluster.default_hypervisor,
2559
      "enabled_hypervisors": cluster.enabled_hypervisors,
2560
      "hvparams": dict([(hypervisor, cluster.hvparams[hypervisor])
2561
                        for hypervisor in cluster.enabled_hypervisors]),
2562
      "beparams": cluster.beparams,
2563
      "nicparams": cluster.nicparams,
2564
      "candidate_pool_size": cluster.candidate_pool_size,
2565
      "master_netdev": cluster.master_netdev,
2566
      "volume_group_name": cluster.volume_group_name,
2567
      "file_storage_dir": cluster.file_storage_dir,
2568
      }
2569

    
2570
    return result
2571

    
2572

    
2573
class LUQueryConfigValues(NoHooksLU):
2574
  """Return configuration values.
2575

2576
  """
2577
  _OP_REQP = []
2578
  REQ_BGL = False
2579
  _FIELDS_DYNAMIC = utils.FieldSet()
2580
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
2581

    
2582
  def ExpandNames(self):
2583
    self.needed_locks = {}
2584

    
2585
    _CheckOutputFields(static=self._FIELDS_STATIC,
2586
                       dynamic=self._FIELDS_DYNAMIC,
2587
                       selected=self.op.output_fields)
2588

    
2589
  def CheckPrereq(self):
2590
    """No prerequisites.
2591

2592
    """
2593
    pass
2594

    
2595
  def Exec(self, feedback_fn):
2596
    """Dump a representation of the cluster config to the standard output.
2597

2598
    """
2599
    values = []
2600
    for field in self.op.output_fields:
2601
      if field == "cluster_name":
2602
        entry = self.cfg.GetClusterName()
2603
      elif field == "master_node":
2604
        entry = self.cfg.GetMasterNode()
2605
      elif field == "drain_flag":
2606
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
2607
      else:
2608
        raise errors.ParameterError(field)
2609
      values.append(entry)
2610
    return values
2611

    
2612

    
2613
class LUActivateInstanceDisks(NoHooksLU):
2614
  """Bring up an instance's disks.
2615

2616
  """
2617
  _OP_REQP = ["instance_name"]
2618
  REQ_BGL = False
2619

    
2620
  def ExpandNames(self):
2621
    self._ExpandAndLockInstance()
2622
    self.needed_locks[locking.LEVEL_NODE] = []
2623
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2624

    
2625
  def DeclareLocks(self, level):
2626
    if level == locking.LEVEL_NODE:
2627
      self._LockInstancesNodes()
2628

    
2629
  def CheckPrereq(self):
2630
    """Check prerequisites.
2631

2632
    This checks that the instance is in the cluster.
2633

2634
    """
2635
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2636
    assert self.instance is not None, \
2637
      "Cannot retrieve locked instance %s" % self.op.instance_name
2638
    _CheckNodeOnline(self, self.instance.primary_node)
2639

    
2640
  def Exec(self, feedback_fn):
2641
    """Activate the disks.
2642

2643
    """
2644
    disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
2645
    if not disks_ok:
2646
      raise errors.OpExecError("Cannot activate block devices")
2647

    
2648
    return disks_info
2649

    
2650

    
2651
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
2652
  """Prepare the block devices for an instance.
2653

2654
  This sets up the block devices on all nodes.
2655

2656
  @type lu: L{LogicalUnit}
2657
  @param lu: the logical unit on whose behalf we execute
2658
  @type instance: L{objects.Instance}
2659
  @param instance: the instance for whose disks we assemble
2660
  @type ignore_secondaries: boolean
2661
  @param ignore_secondaries: if true, errors on secondary nodes
2662
      won't result in an error return from the function
2663
  @return: False if the operation failed, otherwise a list of
2664
      (host, instance_visible_name, node_visible_name)
2665
      with the mapping from node devices to instance devices
2666

2667
  """
2668
  device_info = []
2669
  disks_ok = True
2670
  iname = instance.name
2671
  # With the two passes mechanism we try to reduce the window of
2672
  # opportunity for the race condition of switching DRBD to primary
2673
  # before handshaking occured, but we do not eliminate it
2674

    
2675
  # The proper fix would be to wait (with some limits) until the
2676
  # connection has been made and drbd transitions from WFConnection
2677
  # into any other network-connected state (Connected, SyncTarget,
2678
  # SyncSource, etc.)
2679

    
2680
  # 1st pass, assemble on all nodes in secondary mode
2681
  for inst_disk in instance.disks:
2682
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2683
      lu.cfg.SetDiskID(node_disk, node)
2684
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
2685
      msg = result.RemoteFailMsg()
2686
      if msg:
2687
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2688
                           " (is_primary=False, pass=1): %s",
2689
                           inst_disk.iv_name, node, msg)
2690
        if not ignore_secondaries:
2691
          disks_ok = False
2692

    
2693
  # FIXME: race condition on drbd migration to primary
2694

    
2695
  # 2nd pass, do only the primary node
2696
  for inst_disk in instance.disks:
2697
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2698
      if node != instance.primary_node:
2699
        continue
2700
      lu.cfg.SetDiskID(node_disk, node)
2701
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
2702
      msg = result.RemoteFailMsg()
2703
      if msg:
2704
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2705
                           " (is_primary=True, pass=2): %s",
2706
                           inst_disk.iv_name, node, msg)
2707
        disks_ok = False
2708
    device_info.append((instance.primary_node, inst_disk.iv_name,
2709
                        result.payload))
2710

    
2711
  # leave the disks configured for the primary node
2712
  # this is a workaround that would be fixed better by
2713
  # improving the logical/physical id handling
2714
  for disk in instance.disks:
2715
    lu.cfg.SetDiskID(disk, instance.primary_node)
2716

    
2717
  return disks_ok, device_info
2718

    
2719

    
2720
def _StartInstanceDisks(lu, instance, force):
2721
  """Start the disks of an instance.
2722

2723
  """
2724
  disks_ok, dummy = _AssembleInstanceDisks(lu, instance,
2725
                                           ignore_secondaries=force)
2726
  if not disks_ok:
2727
    _ShutdownInstanceDisks(lu, instance)
2728
    if force is not None and not force:
2729
      lu.proc.LogWarning("", hint="If the message above refers to a"
2730
                         " secondary node,"
2731
                         " you can retry the operation using '--force'.")
2732
    raise errors.OpExecError("Disk consistency error")
2733

    
2734

    
2735
class LUDeactivateInstanceDisks(NoHooksLU):
2736
  """Shutdown an instance's disks.
2737

2738
  """
2739
  _OP_REQP = ["instance_name"]
2740
  REQ_BGL = False
2741

    
2742
  def ExpandNames(self):
2743
    self._ExpandAndLockInstance()
2744
    self.needed_locks[locking.LEVEL_NODE] = []
2745
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2746

    
2747
  def DeclareLocks(self, level):
2748
    if level == locking.LEVEL_NODE:
2749
      self._LockInstancesNodes()
2750

    
2751
  def CheckPrereq(self):
2752
    """Check prerequisites.
2753

2754
    This checks that the instance is in the cluster.
2755

2756
    """
2757
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2758
    assert self.instance is not None, \
2759
      "Cannot retrieve locked instance %s" % self.op.instance_name
2760

    
2761
  def Exec(self, feedback_fn):
2762
    """Deactivate the disks
2763

2764
    """
2765
    instance = self.instance
2766
    _SafeShutdownInstanceDisks(self, instance)
2767

    
2768

    
2769
def _SafeShutdownInstanceDisks(lu, instance):
2770
  """Shutdown block devices of an instance.
2771

2772
  This function checks if an instance is running, before calling
2773
  _ShutdownInstanceDisks.
2774

2775
  """
2776
  pnode = instance.primary_node
2777
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])
2778
  ins_l = ins_l[pnode]
2779
  msg = ins_l.RemoteFailMsg()
2780
  if msg:
2781
    raise errors.OpExecError("Can't contact node %s: %s" % (pnode, msg))
2782

    
2783
  if instance.name in ins_l.payload:
2784
    raise errors.OpExecError("Instance is running, can't shutdown"
2785
                             " block devices.")
2786

    
2787
  _ShutdownInstanceDisks(lu, instance)
2788

    
2789

    
2790
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
2791
  """Shutdown block devices of an instance.
2792

2793
  This does the shutdown on all nodes of the instance.
2794

2795
  If the ignore_primary is false, errors on the primary node are
2796
  ignored.
2797

2798
  """
2799
  all_result = True
2800
  for disk in instance.disks:
2801
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
2802
      lu.cfg.SetDiskID(top_disk, node)
2803
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
2804
      msg = result.RemoteFailMsg()
2805
      if msg:
2806
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
2807
                      disk.iv_name, node, msg)
2808
        if not ignore_primary or node != instance.primary_node:
2809
          all_result = False
2810
  return all_result
2811

    
2812

    
2813
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
2814
  """Checks if a node has enough free memory.
2815

2816
  This function check if a given node has the needed amount of free
2817
  memory. In case the node has less memory or we cannot get the
2818
  information from the node, this function raise an OpPrereqError
2819
  exception.
2820

2821
  @type lu: C{LogicalUnit}
2822
  @param lu: a logical unit from which we get configuration data
2823
  @type node: C{str}
2824
  @param node: the node to check
2825
  @type reason: C{str}
2826
  @param reason: string to use in the error message
2827
  @type requested: C{int}
2828
  @param requested: the amount of memory in MiB to check for
2829
  @type hypervisor_name: C{str}
2830
  @param hypervisor_name: the hypervisor to ask for memory stats
2831
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
2832
      we cannot check the node
2833

2834
  """
2835
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
2836
  nodeinfo[node].Raise()
2837
  free_mem = nodeinfo[node].data.get('memory_free')
2838
  if not isinstance(free_mem, int):
2839
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
2840
                             " was '%s'" % (node, free_mem))
2841
  if requested > free_mem:
2842
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
2843
                             " needed %s MiB, available %s MiB" %
2844
                             (node, reason, requested, free_mem))
2845

    
2846

    
2847
class LUStartupInstance(LogicalUnit):
2848
  """Starts an instance.
2849

2850
  """
2851
  HPATH = "instance-start"
2852
  HTYPE = constants.HTYPE_INSTANCE
2853
  _OP_REQP = ["instance_name", "force"]
2854
  REQ_BGL = False
2855

    
2856
  def ExpandNames(self):
2857
    self._ExpandAndLockInstance()
2858

    
2859
  def BuildHooksEnv(self):
2860
    """Build hooks env.
2861

2862
    This runs on master, primary and secondary nodes of the instance.
2863

2864
    """
2865
    env = {
2866
      "FORCE": self.op.force,
2867
      }
2868
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2869
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2870
    return env, nl, nl
2871

    
2872
  def CheckPrereq(self):
2873
    """Check prerequisites.
2874

2875
    This checks that the instance is in the cluster.
2876

2877
    """
2878
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2879
    assert self.instance is not None, \
2880
      "Cannot retrieve locked instance %s" % self.op.instance_name
2881

    
2882
    # extra beparams
2883
    self.beparams = getattr(self.op, "beparams", {})
2884
    if self.beparams:
2885
      if not isinstance(self.beparams, dict):
2886
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
2887
                                   " dict" % (type(self.beparams), ))
2888
      # fill the beparams dict
2889
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
2890
      self.op.beparams = self.beparams
2891

    
2892
    # extra hvparams
2893
    self.hvparams = getattr(self.op, "hvparams", {})
2894
    if self.hvparams:
2895
      if not isinstance(self.hvparams, dict):
2896
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
2897
                                   " dict" % (type(self.hvparams), ))
2898

    
2899
      # check hypervisor parameter syntax (locally)
2900
      cluster = self.cfg.GetClusterInfo()
2901
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
2902
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
2903
                                    instance.hvparams)
2904
      filled_hvp.update(self.hvparams)
2905
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
2906
      hv_type.CheckParameterSyntax(filled_hvp)
2907
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
2908
      self.op.hvparams = self.hvparams
2909

    
2910
    _CheckNodeOnline(self, instance.primary_node)
2911

    
2912
    bep = self.cfg.GetClusterInfo().FillBE(instance)
2913
    # check bridges existance
2914
    _CheckInstanceBridgesExist(self, instance)
2915

    
2916
    remote_info = self.rpc.call_instance_info(instance.primary_node,
2917
                                              instance.name,
2918
                                              instance.hypervisor)
2919
    msg = remote_info.RemoteFailMsg()
2920
    if msg:
2921
      raise errors.OpPrereqError("Error checking node %s: %s" %
2922
                                 (instance.primary_node, msg))
2923
    if not remote_info.payload: # not running already
2924
      _CheckNodeFreeMemory(self, instance.primary_node,
2925
                           "starting instance %s" % instance.name,
2926
                           bep[constants.BE_MEMORY], instance.hypervisor)
2927

    
2928
  def Exec(self, feedback_fn):
2929
    """Start the instance.
2930

2931
    """
2932
    instance = self.instance
2933
    force = self.op.force
2934

    
2935
    self.cfg.MarkInstanceUp(instance.name)
2936

    
2937
    node_current = instance.primary_node
2938

    
2939
    _StartInstanceDisks(self, instance, force)
2940

    
2941
    result = self.rpc.call_instance_start(node_current, instance,
2942
                                          self.hvparams, self.beparams)
2943
    msg = result.RemoteFailMsg()
2944
    if msg:
2945
      _ShutdownInstanceDisks(self, instance)
2946
      raise errors.OpExecError("Could not start instance: %s" % msg)
2947

    
2948

    
2949
class LURebootInstance(LogicalUnit):
2950
  """Reboot an instance.
2951

2952
  """
2953
  HPATH = "instance-reboot"
2954
  HTYPE = constants.HTYPE_INSTANCE
2955
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
2956
  REQ_BGL = False
2957

    
2958
  def ExpandNames(self):
2959
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
2960
                                   constants.INSTANCE_REBOOT_HARD,
2961
                                   constants.INSTANCE_REBOOT_FULL]:
2962
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
2963
                                  (constants.INSTANCE_REBOOT_SOFT,
2964
                                   constants.INSTANCE_REBOOT_HARD,
2965
                                   constants.INSTANCE_REBOOT_FULL))
2966
    self._ExpandAndLockInstance()
2967

    
2968
  def BuildHooksEnv(self):
2969
    """Build hooks env.
2970

2971
    This runs on master, primary and secondary nodes of the instance.
2972

2973
    """
2974
    env = {
2975
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
2976
      "REBOOT_TYPE": self.op.reboot_type,
2977
      }
2978
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2979
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2980
    return env, nl, nl
2981

    
2982
  def CheckPrereq(self):
2983
    """Check prerequisites.
2984

2985
    This checks that the instance is in the cluster.
2986

2987
    """
2988
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2989
    assert self.instance is not None, \
2990
      "Cannot retrieve locked instance %s" % self.op.instance_name
2991

    
2992
    _CheckNodeOnline(self, instance.primary_node)
2993

    
2994
    # check bridges existance
2995
    _CheckInstanceBridgesExist(self, instance)
2996

    
2997
  def Exec(self, feedback_fn):
2998
    """Reboot the instance.
2999

3000
    """
3001
    instance = self.instance
3002
    ignore_secondaries = self.op.ignore_secondaries
3003
    reboot_type = self.op.reboot_type
3004

    
3005
    node_current = instance.primary_node
3006

    
3007
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3008
                       constants.INSTANCE_REBOOT_HARD]:
3009
      for disk in instance.disks:
3010
        self.cfg.SetDiskID(disk, node_current)
3011
      result = self.rpc.call_instance_reboot(node_current, instance,
3012
                                             reboot_type)
3013
      msg = result.RemoteFailMsg()
3014
      if msg:
3015
        raise errors.OpExecError("Could not reboot instance: %s" % msg)
3016
    else:
3017
      result = self.rpc.call_instance_shutdown(node_current, instance)
3018
      msg = result.RemoteFailMsg()
3019
      if msg:
3020
        raise errors.OpExecError("Could not shutdown instance for"
3021
                                 " full reboot: %s" % msg)
3022
      _ShutdownInstanceDisks(self, instance)
3023
      _StartInstanceDisks(self, instance, ignore_secondaries)
3024
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3025
      msg = result.RemoteFailMsg()
3026
      if msg:
3027
        _ShutdownInstanceDisks(self, instance)
3028
        raise errors.OpExecError("Could not start instance for"
3029
                                 " full reboot: %s" % msg)
3030

    
3031
    self.cfg.MarkInstanceUp(instance.name)
3032

    
3033

    
3034
class LUShutdownInstance(LogicalUnit):
3035
  """Shutdown an instance.
3036

3037
  """
3038
  HPATH = "instance-stop"
3039
  HTYPE = constants.HTYPE_INSTANCE
3040
  _OP_REQP = ["instance_name"]
3041
  REQ_BGL = False
3042

    
3043
  def ExpandNames(self):
3044
    self._ExpandAndLockInstance()
3045

    
3046
  def BuildHooksEnv(self):
3047
    """Build hooks env.
3048

3049
    This runs on master, primary and secondary nodes of the instance.
3050

3051
    """
3052
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3053
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3054
    return env, nl, nl
3055

    
3056
  def CheckPrereq(self):
3057
    """Check prerequisites.
3058

3059
    This checks that the instance is in the cluster.
3060

3061
    """
3062
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3063
    assert self.instance is not None, \
3064
      "Cannot retrieve locked instance %s" % self.op.instance_name
3065
    _CheckNodeOnline(self, self.instance.primary_node)
3066

    
3067
  def Exec(self, feedback_fn):
3068
    """Shutdown the instance.
3069

3070
    """
3071
    instance = self.instance
3072
    node_current = instance.primary_node
3073
    self.cfg.MarkInstanceDown(instance.name)
3074
    result = self.rpc.call_instance_shutdown(node_current, instance)
3075
    msg = result.RemoteFailMsg()
3076
    if msg:
3077
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3078

    
3079
    _ShutdownInstanceDisks(self, instance)
3080

    
3081

    
3082
class LUReinstallInstance(LogicalUnit):
3083
  """Reinstall an instance.
3084

3085
  """
3086
  HPATH = "instance-reinstall"
3087
  HTYPE = constants.HTYPE_INSTANCE
3088
  _OP_REQP = ["instance_name"]
3089
  REQ_BGL = False
3090

    
3091
  def ExpandNames(self):
3092
    self._ExpandAndLockInstance()
3093

    
3094
  def BuildHooksEnv(self):
3095
    """Build hooks env.
3096

3097
    This runs on master, primary and secondary nodes of the instance.
3098

3099
    """
3100
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3101
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3102
    return env, nl, nl
3103

    
3104
  def CheckPrereq(self):
3105
    """Check prerequisites.
3106

3107
    This checks that the instance is in the cluster and is not running.
3108

3109
    """
3110
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3111
    assert instance is not None, \
3112
      "Cannot retrieve locked instance %s" % self.op.instance_name
3113
    _CheckNodeOnline(self, instance.primary_node)
3114

    
3115
    if instance.disk_template == constants.DT_DISKLESS:
3116
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3117
                                 self.op.instance_name)
3118
    if instance.admin_up:
3119
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3120
                                 self.op.instance_name)
3121
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3122
                                              instance.name,
3123
                                              instance.hypervisor)
3124
    msg = remote_info.RemoteFailMsg()
3125
    if msg:
3126
      raise errors.OpPrereqError("Error checking node %s: %s" %
3127
                                 (instance.primary_node, msg))
3128
    if remote_info.payload:
3129
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3130
                                 (self.op.instance_name,
3131
                                  instance.primary_node))
3132

    
3133
    self.op.os_type = getattr(self.op, "os_type", None)
3134
    if self.op.os_type is not None:
3135
      # OS verification
3136
      pnode = self.cfg.GetNodeInfo(
3137
        self.cfg.ExpandNodeName(instance.primary_node))
3138
      if pnode is None:
3139
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3140
                                   self.op.pnode)
3141
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3142
      result.Raise()
3143
      if not isinstance(result.data, objects.OS):
3144
        raise errors.OpPrereqError("OS '%s' not in supported OS list for"
3145
                                   " primary node"  % self.op.os_type)
3146

    
3147
    self.instance = instance
3148

    
3149
  def Exec(self, feedback_fn):
3150
    """Reinstall the instance.
3151

3152
    """
3153
    inst = self.instance
3154

    
3155
    if self.op.os_type is not None:
3156
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3157
      inst.os = self.op.os_type
3158
      self.cfg.Update(inst)
3159

    
3160
    _StartInstanceDisks(self, inst, None)
3161
    try:
3162
      feedback_fn("Running the instance OS create scripts...")
3163
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3164
      msg = result.RemoteFailMsg()
3165
      if msg:
3166
        raise errors.OpExecError("Could not install OS for instance %s"
3167
                                 " on node %s: %s" %
3168
                                 (inst.name, inst.primary_node, msg))
3169
    finally:
3170
      _ShutdownInstanceDisks(self, inst)
3171

    
3172

    
3173
class LURenameInstance(LogicalUnit):
3174
  """Rename an instance.
3175

3176
  """
3177
  HPATH = "instance-rename"
3178
  HTYPE = constants.HTYPE_INSTANCE
3179
  _OP_REQP = ["instance_name", "new_name"]
3180

    
3181
  def BuildHooksEnv(self):
3182
    """Build hooks env.
3183

3184
    This runs on master, primary and secondary nodes of the instance.
3185

3186
    """
3187
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3188
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3189
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3190
    return env, nl, nl
3191

    
3192
  def CheckPrereq(self):
3193
    """Check prerequisites.
3194

3195
    This checks that the instance is in the cluster and is not running.
3196

3197
    """
3198
    instance = self.cfg.GetInstanceInfo(
3199
      self.cfg.ExpandInstanceName(self.op.instance_name))
3200
    if instance is None:
3201
      raise errors.OpPrereqError("Instance '%s' not known" %
3202
                                 self.op.instance_name)
3203
    _CheckNodeOnline(self, instance.primary_node)
3204

    
3205
    if instance.admin_up:
3206
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3207
                                 self.op.instance_name)
3208
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3209
                                              instance.name,
3210
                                              instance.hypervisor)
3211
    msg = remote_info.RemoteFailMsg()
3212
    if msg:
3213
      raise errors.OpPrereqError("Error checking node %s: %s" %
3214
                                 (instance.primary_node, msg))
3215
    if remote_info.payload:
3216
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3217
                                 (self.op.instance_name,
3218
                                  instance.primary_node))
3219
    self.instance = instance
3220

    
3221
    # new name verification
3222
    name_info = utils.HostInfo(self.op.new_name)
3223

    
3224
    self.op.new_name = new_name = name_info.name
3225
    instance_list = self.cfg.GetInstanceList()
3226
    if new_name in instance_list:
3227
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3228
                                 new_name)
3229

    
3230
    if not getattr(self.op, "ignore_ip", False):
3231
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3232
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3233
                                   (name_info.ip, new_name))
3234

    
3235

    
3236
  def Exec(self, feedback_fn):
3237
    """Reinstall the instance.
3238

3239
    """
3240
    inst = self.instance
3241
    old_name = inst.name
3242

    
3243
    if inst.disk_template == constants.DT_FILE:
3244
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3245

    
3246
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3247
    # Change the instance lock. This is definitely safe while we hold the BGL
3248
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3249
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3250

    
3251
    # re-read the instance from the configuration after rename
3252
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3253

    
3254
    if inst.disk_template == constants.DT_FILE:
3255
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3256
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3257
                                                     old_file_storage_dir,
3258
                                                     new_file_storage_dir)
3259
      result.Raise()
3260
      if not result.data:
3261
        raise errors.OpExecError("Could not connect to node '%s' to rename"
3262
                                 " directory '%s' to '%s' (but the instance"
3263
                                 " has been renamed in Ganeti)" % (
3264
                                 inst.primary_node, old_file_storage_dir,
3265
                                 new_file_storage_dir))
3266

    
3267
      if not result.data[0]:
3268
        raise errors.OpExecError("Could not rename directory '%s' to '%s'"
3269
                                 " (but the instance has been renamed in"
3270
                                 " Ganeti)" % (old_file_storage_dir,
3271
                                               new_file_storage_dir))
3272

    
3273
    _StartInstanceDisks(self, inst, None)
3274
    try:
3275
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3276
                                                 old_name)
3277
      msg = result.RemoteFailMsg()
3278
      if msg:
3279
        msg = ("Could not run OS rename script for instance %s on node %s"
3280
               " (but the instance has been renamed in Ganeti): %s" %
3281
               (inst.name, inst.primary_node, msg))
3282
        self.proc.LogWarning(msg)
3283
    finally:
3284
      _ShutdownInstanceDisks(self, inst)
3285

    
3286

    
3287
class LURemoveInstance(LogicalUnit):
3288
  """Remove an instance.
3289

3290
  """
3291
  HPATH = "instance-remove"
3292
  HTYPE = constants.HTYPE_INSTANCE
3293
  _OP_REQP = ["instance_name", "ignore_failures"]
3294
  REQ_BGL = False
3295

    
3296
  def ExpandNames(self):
3297
    self._ExpandAndLockInstance()
3298
    self.needed_locks[locking.LEVEL_NODE] = []
3299
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3300

    
3301
  def DeclareLocks(self, level):
3302
    if level == locking.LEVEL_NODE:
3303
      self._LockInstancesNodes()
3304

    
3305
  def BuildHooksEnv(self):
3306
    """Build hooks env.
3307

3308
    This runs on master, primary and secondary nodes of the instance.
3309

3310
    """
3311
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3312
    nl = [self.cfg.GetMasterNode()]
3313
    return env, nl, nl
3314

    
3315
  def CheckPrereq(self):
3316
    """Check prerequisites.
3317

3318
    This checks that the instance is in the cluster.
3319

3320
    """
3321
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3322
    assert self.instance is not None, \
3323
      "Cannot retrieve locked instance %s" % self.op.instance_name
3324

    
3325
  def Exec(self, feedback_fn):
3326
    """Remove the instance.
3327

3328
    """
3329
    instance = self.instance
3330
    logging.info("Shutting down instance %s on node %s",
3331
                 instance.name, instance.primary_node)
3332

    
3333
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3334
    msg = result.RemoteFailMsg()
3335
    if msg:
3336
      if self.op.ignore_failures:
3337
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3338
      else:
3339
        raise errors.OpExecError("Could not shutdown instance %s on"
3340
                                 " node %s: %s" %
3341
                                 (instance.name, instance.primary_node, msg))
3342

    
3343
    logging.info("Removing block devices for instance %s", instance.name)
3344

    
3345
    if not _RemoveDisks(self, instance):
3346
      if self.op.ignore_failures:
3347
        feedback_fn("Warning: can't remove instance's disks")
3348
      else:
3349
        raise errors.OpExecError("Can't remove instance's disks")
3350

    
3351
    logging.info("Removing instance %s out of cluster config", instance.name)
3352

    
3353
    self.cfg.RemoveInstance(instance.name)
3354
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3355

    
3356

    
3357
class LUQueryInstances(NoHooksLU):
3358
  """Logical unit for querying instances.
3359

3360
  """
3361
  _OP_REQP = ["output_fields", "names", "use_locking"]
3362
  REQ_BGL = False
3363
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3364
                                    "admin_state",
3365
                                    "disk_template", "ip", "mac", "bridge",
3366
                                    "sda_size", "sdb_size", "vcpus", "tags",
3367
                                    "network_port", "beparams",
3368
                                    r"(disk)\.(size)/([0-9]+)",
3369
                                    r"(disk)\.(sizes)", "disk_usage",
3370
                                    r"(nic)\.(mac|ip|bridge)/([0-9]+)",
3371
                                    r"(nic)\.(macs|ips|bridges)",
3372
                                    r"(disk|nic)\.(count)",
3373
                                    "serial_no", "hypervisor", "hvparams",] +
3374
                                  ["hv/%s" % name
3375
                                   for name in constants.HVS_PARAMETERS] +
3376
                                  ["be/%s" % name
3377
                                   for name in constants.BES_PARAMETERS])
3378
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3379

    
3380

    
3381
  def ExpandNames(self):
3382
    _CheckOutputFields(static=self._FIELDS_STATIC,
3383
                       dynamic=self._FIELDS_DYNAMIC,
3384
                       selected=self.op.output_fields)
3385

    
3386
    self.needed_locks = {}
3387
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3388
    self.share_locks[locking.LEVEL_NODE] = 1
3389

    
3390
    if self.op.names:
3391
      self.wanted = _GetWantedInstances(self, self.op.names)
3392
    else:
3393
      self.wanted = locking.ALL_SET
3394

    
3395
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3396
    self.do_locking = self.do_node_query and self.op.use_locking
3397
    if self.do_locking:
3398
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3399
      self.needed_locks[locking.LEVEL_NODE] = []
3400
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3401

    
3402
  def DeclareLocks(self, level):
3403
    if level == locking.LEVEL_NODE and self.do_locking:
3404
      self._LockInstancesNodes()
3405

    
3406
  def CheckPrereq(self):
3407
    """Check prerequisites.
3408

3409
    """
3410
    pass
3411

    
3412
  def Exec(self, feedback_fn):
3413
    """Computes the list of nodes and their attributes.
3414

3415
    """
3416
    all_info = self.cfg.GetAllInstancesInfo()
3417
    if self.wanted == locking.ALL_SET:
3418
      # caller didn't specify instance names, so ordering is not important
3419
      if self.do_locking:
3420
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
3421
      else:
3422
        instance_names = all_info.keys()
3423
      instance_names = utils.NiceSort(instance_names)
3424
    else:
3425
      # caller did specify names, so we must keep the ordering
3426
      if self.do_locking:
3427
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
3428
      else:
3429
        tgt_set = all_info.keys()
3430
      missing = set(self.wanted).difference(tgt_set)
3431
      if missing:
3432
        raise errors.OpExecError("Some instances were removed before"
3433
                                 " retrieving their data: %s" % missing)
3434
      instance_names = self.wanted
3435

    
3436
    instance_list = [all_info[iname] for iname in instance_names]
3437

    
3438
    # begin data gathering
3439

    
3440
    nodes = frozenset([inst.primary_node for inst in instance_list])
3441
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3442

    
3443
    bad_nodes = []
3444
    off_nodes = []
3445
    if self.do_node_query:
3446
      live_data = {}
3447
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
3448
      for name in nodes:
3449
        result = node_data[name]
3450
        if result.offline:
3451
          # offline nodes will be in both lists
3452
          off_nodes.append(name)
3453
        if result.failed or result.RemoteFailMsg():
3454
          bad_nodes.append(name)
3455
        else:
3456
          if result.payload:
3457
            live_data.update(result.payload)
3458
          # else no instance is alive
3459
    else:
3460
      live_data = dict([(name, {}) for name in instance_names])
3461

    
3462
    # end data gathering
3463

    
3464
    HVPREFIX = "hv/"
3465
    BEPREFIX = "be/"
3466
    output = []
3467
    for instance in instance_list:
3468
      iout = []
3469
      i_hv = self.cfg.GetClusterInfo().FillHV(instance)
3470
      i_be = self.cfg.GetClusterInfo().FillBE(instance)
3471
      for field in self.op.output_fields:
3472
        st_match = self._FIELDS_STATIC.Matches(field)
3473
        if field == "name":
3474
          val = instance.name
3475
        elif field == "os":
3476
          val = instance.os
3477
        elif field == "pnode":
3478
          val = instance.primary_node
3479
        elif field == "snodes":
3480
          val = list(instance.secondary_nodes)
3481
        elif field == "admin_state":
3482
          val = instance.admin_up
3483
        elif field == "oper_state":
3484
          if instance.primary_node in bad_nodes:
3485
            val = None
3486
          else:
3487
            val = bool(live_data.get(instance.name))
3488
        elif field == "status":
3489
          if instance.primary_node in off_nodes:
3490
            val = "ERROR_nodeoffline"
3491
          elif instance.primary_node in bad_nodes:
3492
            val = "ERROR_nodedown"
3493
          else:
3494
            running = bool(live_data.get(instance.name))
3495
            if running:
3496
              if instance.admin_up:
3497
                val = "running"
3498
              else:
3499
                val = "ERROR_up"
3500
            else:
3501
              if instance.admin_up:
3502
                val = "ERROR_down"
3503
              else:
3504
                val = "ADMIN_down"
3505
        elif field == "oper_ram":
3506
          if instance.primary_node in bad_nodes:
3507
            val = None
3508
          elif instance.name in live_data:
3509
            val = live_data[instance.name].get("memory", "?")
3510
          else:
3511
            val = "-"
3512
        elif field == "disk_template":
3513
          val = instance.disk_template
3514
        elif field == "ip":
3515
          val = instance.nics[0].ip
3516
        elif field == "bridge":
3517
          val = instance.nics[0].bridge
3518
        elif field == "mac":
3519
          val = instance.nics[0].mac
3520
        elif field == "sda_size" or field == "sdb_size":
3521
          idx = ord(field[2]) - ord('a')
3522
          try:
3523
            val = instance.FindDisk(idx).size
3524
          except errors.OpPrereqError:
3525
            val = None
3526
        elif field == "disk_usage": # total disk usage per node
3527
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
3528
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
3529
        elif field == "tags":
3530
          val = list(instance.GetTags())
3531
        elif field == "serial_no":
3532
          val = instance.serial_no
3533
        elif field == "network_port":
3534
          val = instance.network_port
3535
        elif field == "hypervisor":
3536
          val = instance.hypervisor
3537
        elif field == "hvparams":
3538
          val = i_hv
3539
        elif (field.startswith(HVPREFIX) and
3540
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
3541
          val = i_hv.get(field[len(HVPREFIX):], None)
3542
        elif field == "beparams":
3543
          val = i_be
3544
        elif (field.startswith(BEPREFIX) and
3545
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
3546
          val = i_be.get(field[len(BEPREFIX):], None)
3547
        elif st_match and st_match.groups():
3548
          # matches a variable list
3549
          st_groups = st_match.groups()
3550
          if st_groups and st_groups[0] == "disk":
3551
            if st_groups[1] == "count":
3552
              val = len(instance.disks)
3553
            elif st_groups[1] == "sizes":
3554
              val = [disk.size for disk in instance.disks]
3555
            elif st_groups[1] == "size":
3556
              try:
3557
                val = instance.FindDisk(st_groups[2]).size
3558
              except errors.OpPrereqError:
3559
                val = None
3560
            else:
3561
              assert False, "Unhandled disk parameter"
3562
          elif st_groups[0] == "nic":
3563
            if st_groups[1] == "count":
3564
              val = len(instance.nics)
3565
            elif st_groups[1] == "macs":
3566
              val = [nic.mac for nic in instance.nics]
3567
            elif st_groups[1] == "ips":
3568
              val = [nic.ip for nic in instance.nics]
3569
            elif st_groups[1] == "bridges":
3570
              val = [nic.bridge for nic in instance.nics]
3571
            else:
3572
              # index-based item
3573
              nic_idx = int(st_groups[2])
3574
              if nic_idx >= len(instance.nics):
3575
                val = None
3576
              else:
3577
                if st_groups[1] == "mac":
3578
                  val = instance.nics[nic_idx].mac
3579
                elif st_groups[1] == "ip":
3580
                  val = instance.nics[nic_idx].ip
3581
                elif st_groups[1] == "bridge":
3582
                  val = instance.nics[nic_idx].bridge
3583
                else:
3584
                  assert False, "Unhandled NIC parameter"
3585
          else:
3586
            assert False, "Unhandled variable parameter"
3587
        else:
3588
          raise errors.ParameterError(field)
3589
        iout.append(val)
3590
      output.append(iout)
3591

    
3592
    return output
3593

    
3594

    
3595
class LUFailoverInstance(LogicalUnit):
3596
  """Failover an instance.
3597

3598
  """
3599
  HPATH = "instance-failover"
3600
  HTYPE = constants.HTYPE_INSTANCE
3601
  _OP_REQP = ["instance_name", "ignore_consistency"]
3602
  REQ_BGL = False
3603

    
3604
  def ExpandNames(self):
3605
    self._ExpandAndLockInstance()
3606
    self.needed_locks[locking.LEVEL_NODE] = []
3607
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3608

    
3609
  def DeclareLocks(self, level):
3610
    if level == locking.LEVEL_NODE:
3611
      self._LockInstancesNodes()
3612

    
3613
  def BuildHooksEnv(self):
3614
    """Build hooks env.
3615

3616
    This runs on master, primary and secondary nodes of the instance.
3617

3618
    """
3619
    env = {
3620
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
3621
      }
3622
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3623
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3624
    return env, nl, nl
3625

    
3626
  def CheckPrereq(self):
3627
    """Check prerequisites.
3628

3629
    This checks that the instance is in the cluster.
3630

3631
    """
3632
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3633
    assert self.instance is not None, \
3634
      "Cannot retrieve locked instance %s" % self.op.instance_name
3635

    
3636
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3637
    if instance.disk_template not in constants.DTS_NET_MIRROR:
3638
      raise errors.OpPrereqError("Instance's disk layout is not"
3639
                                 " network mirrored, cannot failover.")
3640

    
3641
    secondary_nodes = instance.secondary_nodes
3642
    if not secondary_nodes:
3643
      raise errors.ProgrammerError("no secondary node but using "
3644
                                   "a mirrored disk template")
3645

    
3646
    target_node = secondary_nodes[0]
3647
    _CheckNodeOnline(self, target_node)
3648
    _CheckNodeNotDrained(self, target_node)
3649
    # check memory requirements on the secondary node
3650
    _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
3651
                         instance.name, bep[constants.BE_MEMORY],
3652
                         instance.hypervisor)
3653
    # check bridge existance
3654
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3655

    
3656
  def Exec(self, feedback_fn):
3657
    """Failover an instance.
3658

3659
    The failover is done by shutting it down on its present node and
3660
    starting it on the secondary.
3661

3662
    """
3663
    instance = self.instance
3664

    
3665
    source_node = instance.primary_node
3666
    target_node = instance.secondary_nodes[0]
3667

    
3668
    feedback_fn("* checking disk consistency between source and target")
3669
    for dev in instance.disks:
3670
      # for drbd, these are drbd over lvm
3671
      if not _CheckDiskConsistency(self, dev, target_node, False):
3672
        if instance.admin_up and not self.op.ignore_consistency:
3673
          raise errors.OpExecError("Disk %s is degraded on target node,"
3674
                                   " aborting failover." % dev.iv_name)
3675

    
3676
    feedback_fn("* shutting down instance on source node")
3677
    logging.info("Shutting down instance %s on node %s",
3678
                 instance.name, source_node)
3679

    
3680
    result = self.rpc.call_instance_shutdown(source_node, instance)
3681
    msg = result.RemoteFailMsg()
3682
    if msg:
3683
      if self.op.ignore_consistency:
3684
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
3685
                             " Proceeding anyway. Please make sure node"
3686
                             " %s is down. Error details: %s",
3687
                             instance.name, source_node, source_node, msg)
3688
      else:
3689
        raise errors.OpExecError("Could not shutdown instance %s on"
3690
                                 " node %s: %s" %
3691
                                 (instance.name, source_node, msg))
3692

    
3693
    feedback_fn("* deactivating the instance's disks on source node")
3694
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
3695
      raise errors.OpExecError("Can't shut down the instance's disks.")
3696

    
3697
    instance.primary_node = target_node
3698
    # distribute new instance config to the other nodes
3699
    self.cfg.Update(instance)
3700

    
3701
    # Only start the instance if it's marked as up
3702
    if instance.admin_up:
3703
      feedback_fn("* activating the instance's disks on target node")
3704
      logging.info("Starting instance %s on node %s",
3705
                   instance.name, target_node)
3706

    
3707
      disks_ok, dummy = _AssembleInstanceDisks(self, instance,
3708
                                               ignore_secondaries=True)
3709
      if not disks_ok:
3710
        _ShutdownInstanceDisks(self, instance)
3711
        raise errors.OpExecError("Can't activate the instance's disks")
3712

    
3713
      feedback_fn("* starting the instance on the target node")
3714
      result = self.rpc.call_instance_start(target_node, instance, None, None)
3715
      msg = result.RemoteFailMsg()
3716
      if msg:
3717
        _ShutdownInstanceDisks(self, instance)
3718
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
3719
                                 (instance.name, target_node, msg))
3720

    
3721

    
3722
class LUMigrateInstance(LogicalUnit):
3723
  """Migrate an instance.
3724

3725
  This is migration without shutting down, compared to the failover,
3726
  which is done with shutdown.
3727

3728
  """
3729
  HPATH = "instance-migrate"
3730
  HTYPE = constants.HTYPE_INSTANCE
3731
  _OP_REQP = ["instance_name", "live", "cleanup"]
3732

    
3733
  REQ_BGL = False
3734

    
3735
  def ExpandNames(self):
3736
    self._ExpandAndLockInstance()
3737
    self.needed_locks[locking.LEVEL_NODE] = []
3738
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3739

    
3740
  def DeclareLocks(self, level):
3741
    if level == locking.LEVEL_NODE:
3742
      self._LockInstancesNodes()
3743

    
3744
  def BuildHooksEnv(self):
3745
    """Build hooks env.
3746

3747
    This runs on master, primary and secondary nodes of the instance.
3748

3749
    """
3750
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3751
    env["MIGRATE_LIVE"] = self.op.live
3752
    env["MIGRATE_CLEANUP"] = self.op.cleanup
3753
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3754
    return env, nl, nl
3755

    
3756
  def CheckPrereq(self):
3757
    """Check prerequisites.
3758

3759
    This checks that the instance is in the cluster.
3760

3761
    """
3762
    instance = self.cfg.GetInstanceInfo(
3763
      self.cfg.ExpandInstanceName(self.op.instance_name))
3764
    if instance is None:
3765
      raise errors.OpPrereqError("Instance '%s' not known" %
3766
                                 self.op.instance_name)
3767

    
3768
    if instance.disk_template != constants.DT_DRBD8:
3769
      raise errors.OpPrereqError("Instance's disk layout is not"
3770
                                 " drbd8, cannot migrate.")
3771

    
3772
    secondary_nodes = instance.secondary_nodes
3773
    if not secondary_nodes:
3774
      raise errors.ConfigurationError("No secondary node but using"
3775
                                      " drbd8 disk template")
3776

    
3777
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
3778

    
3779
    target_node = secondary_nodes[0]
3780
    # check memory requirements on the secondary node
3781
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
3782
                         instance.name, i_be[constants.BE_MEMORY],
3783
                         instance.hypervisor)
3784

    
3785
    # check bridge existance
3786
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3787

    
3788
    if not self.op.cleanup:
3789
      _CheckNodeNotDrained(self, target_node)
3790
      result = self.rpc.call_instance_migratable(instance.primary_node,
3791
                                                 instance)
3792
      msg = result.RemoteFailMsg()
3793
      if msg:
3794
        raise errors.OpPrereqError("Can't migrate: %s - please use failover" %
3795
                                   msg)
3796

    
3797
    self.instance = instance
3798

    
3799
  def _WaitUntilSync(self):
3800
    """Poll with custom rpc for disk sync.
3801

3802
    This uses our own step-based rpc call.
3803

3804
    """
3805
    self.feedback_fn("* wait until resync is done")
3806
    all_done = False
3807
    while not all_done:
3808
      all_done = True
3809
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
3810
                                            self.nodes_ip,
3811
                                            self.instance.disks)
3812
      min_percent = 100
3813
      for node, nres in result.items():
3814
        msg = nres.RemoteFailMsg()
3815
        if msg:
3816
          raise errors.OpExecError("Cannot resync disks on node %s: %s" %
3817
                                   (node, msg))
3818
        node_done, node_percent = nres.payload
3819
        all_done = all_done and node_done
3820
        if node_percent is not None:
3821
          min_percent = min(min_percent, node_percent)
3822
      if not all_done:
3823
        if min_percent < 100:
3824
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
3825
        time.sleep(2)
3826

    
3827
  def _EnsureSecondary(self, node):
3828
    """Demote a node to secondary.
3829

3830
    """
3831
    self.feedback_fn("* switching node %s to secondary mode" % node)
3832

    
3833
    for dev in self.instance.disks:
3834
      self.cfg.SetDiskID(dev, node)
3835

    
3836
    result = self.rpc.call_blockdev_close(node, self.instance.name,
3837
                                          self.instance.disks)
3838
    msg = result.RemoteFailMsg()
3839
    if msg:
3840
      raise errors.OpExecError("Cannot change disk to secondary on node %s,"
3841
                               " error %s" % (node, msg))
3842

    
3843
  def _GoStandalone(self):
3844
    """Disconnect from the network.
3845

3846
    """
3847
    self.feedback_fn("* changing into standalone mode")
3848
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
3849
                                               self.instance.disks)
3850
    for node, nres in result.items():
3851
      msg = nres.RemoteFailMsg()
3852
      if msg:
3853
        raise errors.OpExecError("Cannot disconnect disks node %s,"
3854
                                 " error %s" % (node, msg))
3855

    
3856
  def _GoReconnect(self, multimaster):
3857
    """Reconnect to the network.
3858

3859
    """
3860
    if multimaster:
3861
      msg = "dual-master"
3862
    else:
3863
      msg = "single-master"
3864
    self.feedback_fn("* changing disks into %s mode" % msg)
3865
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
3866
                                           self.instance.disks,
3867
                                           self.instance.name, multimaster)
3868
    for node, nres in result.items():
3869
      msg = nres.RemoteFailMsg()
3870
      if msg:
3871
        raise errors.OpExecError("Cannot change disks config on node %s,"
3872
                                 " error: %s" % (node, msg))
3873

    
3874
  def _ExecCleanup(self):
3875
    """Try to cleanup after a failed migration.
3876

3877
    The cleanup is done by:
3878
      - check that the instance is running only on one node
3879
        (and update the config if needed)
3880
      - change disks on its secondary node to secondary
3881
      - wait until disks are fully synchronized
3882
      - disconnect from the network
3883
      - change disks into single-master mode
3884
      - wait again until disks are fully synchronized
3885

3886
    """
3887
    instance = self.instance
3888
    target_node = self.target_node
3889
    source_node = self.source_node
3890

    
3891
    # check running on only one node
3892
    self.feedback_fn("* checking where the instance actually runs"
3893
                     " (if this hangs, the hypervisor might be in"
3894
                     " a bad state)")
3895
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
3896
    for node, result in ins_l.items():
3897
      msg = result.RemoteFailMsg()
3898
      if msg:
3899
        raise errors.OpExecError("Can't contact node %s: %s" % (node, msg))
3900

    
3901
    runningon_source = instance.name in ins_l[source_node].payload
3902
    runningon_target = instance.name in ins_l[target_node].payload
3903

    
3904
    if runningon_source and runningon_target:
3905
      raise errors.OpExecError("Instance seems to be running on two nodes,"
3906
                               " or the hypervisor is confused. You will have"
3907
                               " to ensure manually that it runs only on one"
3908
                               " and restart this operation.")
3909

    
3910
    if not (runningon_source or runningon_target):
3911
      raise errors.OpExecError("Instance does not seem to be running at all."
3912
                               " In this case, it's safer to repair by"
3913
                               " running 'gnt-instance stop' to ensure disk"
3914
                               " shutdown, and then restarting it.")
3915

    
3916
    if runningon_target:
3917
      # the migration has actually succeeded, we need to update the config
3918
      self.feedback_fn("* instance running on secondary node (%s),"
3919
                       " updating config" % target_node)
3920
      instance.primary_node = target_node
3921
      self.cfg.Update(instance)
3922
      demoted_node = source_node
3923
    else:
3924
      self.feedback_fn("* instance confirmed to be running on its"
3925
                       " primary node (%s)" % source_node)
3926
      demoted_node = target_node
3927

    
3928
    self._EnsureSecondary(demoted_node)
3929
    try:
3930
      self._WaitUntilSync()
3931
    except errors.OpExecError:
3932
      # we ignore here errors, since if the device is standalone, it
3933
      # won't be able to sync
3934
      pass
3935
    self._GoStandalone()
3936
    self._GoReconnect(False)
3937
    self._WaitUntilSync()
3938

    
3939
    self.feedback_fn("* done")
3940

    
3941
  def _RevertDiskStatus(self):
3942
    """Try to revert the disk status after a failed migration.
3943

3944
    """
3945
    target_node = self.target_node
3946
    try:
3947
      self._EnsureSecondary(target_node)
3948
      self._GoStandalone()
3949
      self._GoReconnect(False)
3950
      self._WaitUntilSync()
3951
    except errors.OpExecError, err:
3952
      self.LogWarning("Migration failed and I can't reconnect the"
3953
                      " drives: error '%s'\n"
3954
                      "Please look and recover the instance status" %
3955
                      str(err))
3956

    
3957
  def _AbortMigration(self):
3958
    """Call the hypervisor code to abort a started migration.
3959

3960
    """
3961
    instance = self.instance
3962
    target_node = self.target_node
3963
    migration_info = self.migration_info
3964

    
3965
    abort_result = self.rpc.call_finalize_migration(target_node,
3966
                                                    instance,
3967
                                                    migration_info,
3968
                                                    False)
3969
    abort_msg = abort_result.RemoteFailMsg()
3970
    if abort_msg:
3971
      logging.error("Aborting migration failed on target node %s: %s" %
3972
                    (target_node, abort_msg))
3973
      # Don't raise an exception here, as we stil have to try to revert the
3974
      # disk status, even if this step failed.
3975

    
3976
  def _ExecMigration(self):
3977
    """Migrate an instance.
3978

3979
    The migrate is done by:
3980
      - change the disks into dual-master mode
3981
      - wait until disks are fully synchronized again
3982
      - migrate the instance
3983
      - change disks on the new secondary node (the old primary) to secondary
3984
      - wait until disks are fully synchronized
3985
      - change disks into single-master mode
3986

3987
    """
3988
    instance = self.instance
3989
    target_node = self.target_node
3990
    source_node = self.source_node
3991

    
3992
    self.feedback_fn("* checking disk consistency between source and target")
3993
    for dev in instance.disks:
3994
      if not _CheckDiskConsistency(self, dev, target_node, False):
3995
        raise errors.OpExecError("Disk %s is degraded or not fully"
3996
                                 " synchronized on target node,"
3997
                                 " aborting migrate." % dev.iv_name)
3998

    
3999
    # First get the migration information from the remote node
4000
    result = self.rpc.call_migration_info(source_node, instance)
4001
    msg = result.RemoteFailMsg()
4002
    if msg:
4003
      log_err = ("Failed fetching source migration information from %s: %s" %
4004
                 (source_node, msg))
4005
      logging.error(log_err)
4006
      raise errors.OpExecError(log_err)
4007

    
4008
    self.migration_info = migration_info = result.payload
4009

    
4010
    # Then switch the disks to master/master mode
4011
    self._EnsureSecondary(target_node)
4012
    self._GoStandalone()
4013
    self._GoReconnect(True)
4014
    self._WaitUntilSync()
4015

    
4016
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4017
    result = self.rpc.call_accept_instance(target_node,
4018
                                           instance,
4019
                                           migration_info,
4020
                                           self.nodes_ip[target_node])
4021

    
4022
    msg = result.RemoteFailMsg()
4023
    if msg:
4024
      logging.error("Instance pre-migration failed, trying to revert"
4025
                    " disk status: %s", msg)
4026
      self._AbortMigration()
4027
      self._RevertDiskStatus()
4028
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4029
                               (instance.name, msg))
4030

    
4031
    self.feedback_fn("* migrating instance to %s" % target_node)
4032
    time.sleep(10)
4033
    result = self.rpc.call_instance_migrate(source_node, instance,
4034
                                            self.nodes_ip[target_node],
4035
                                            self.op.live)
4036
    msg = result.RemoteFailMsg()
4037
    if msg:
4038
      logging.error("Instance migration failed, trying to revert"
4039
                    " disk status: %s", msg)
4040
      self._AbortMigration()
4041
      self._RevertDiskStatus()
4042
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4043
                               (instance.name, msg))
4044
    time.sleep(10)
4045

    
4046
    instance.primary_node = target_node
4047
    # distribute new instance config to the other nodes
4048
    self.cfg.Update(instance)
4049

    
4050
    result = self.rpc.call_finalize_migration(target_node,
4051
                                              instance,
4052
                                              migration_info,
4053
                                              True)
4054
    msg = result.RemoteFailMsg()
4055
    if msg:
4056
      logging.error("Instance migration succeeded, but finalization failed:"
4057
                    " %s" % msg)
4058
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4059
                               msg)
4060

    
4061
    self._EnsureSecondary(source_node)
4062
    self._WaitUntilSync()
4063
    self._GoStandalone()
4064
    self._GoReconnect(False)
4065
    self._WaitUntilSync()
4066

    
4067
    self.feedback_fn("* done")
4068

    
4069
  def Exec(self, feedback_fn):
4070
    """Perform the migration.
4071

4072
    """
4073
    self.feedback_fn = feedback_fn
4074

    
4075
    self.source_node = self.instance.primary_node
4076
    self.target_node = self.instance.secondary_nodes[0]
4077
    self.all_nodes = [self.source_node, self.target_node]
4078
    self.nodes_ip = {
4079
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4080
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4081
      }
4082
    if self.op.cleanup:
4083
      return self._ExecCleanup()
4084
    else:
4085
      return self._ExecMigration()
4086

    
4087

    
4088
def _CreateBlockDev(lu, node, instance, device, force_create,
4089
                    info, force_open):
4090
  """Create a tree of block devices on a given node.
4091

4092
  If this device type has to be created on secondaries, create it and
4093
  all its children.
4094

4095
  If not, just recurse to children keeping the same 'force' value.
4096

4097
  @param lu: the lu on whose behalf we execute
4098
  @param node: the node on which to create the device
4099
  @type instance: L{objects.Instance}
4100
  @param instance: the instance which owns the device
4101
  @type device: L{objects.Disk}
4102
  @param device: the device to create
4103
  @type force_create: boolean
4104
  @param force_create: whether to force creation of this device; this
4105
      will be change to True whenever we find a device which has
4106
      CreateOnSecondary() attribute
4107
  @param info: the extra 'metadata' we should attach to the device
4108
      (this will be represented as a LVM tag)
4109
  @type force_open: boolean
4110
  @param force_open: this parameter will be passes to the
4111
      L{backend.BlockdevCreate} function where it specifies
4112
      whether we run on primary or not, and it affects both
4113
      the child assembly and the device own Open() execution
4114

4115
  """
4116
  if device.CreateOnSecondary():
4117
    force_create = True
4118

    
4119
  if device.children:
4120
    for child in device.children:
4121
      _CreateBlockDev(lu, node, instance, child, force_create,
4122
                      info, force_open)
4123

    
4124
  if not force_create:
4125
    return
4126

    
4127
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
4128

    
4129

    
4130
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
4131
  """Create a single block device on a given node.
4132

4133
  This will not recurse over children of the device, so they must be
4134
  created in advance.
4135

4136
  @param lu: the lu on whose behalf we execute
4137
  @param node: the node on which to create the device
4138
  @type instance: L{objects.Instance}
4139
  @param instance: the instance which owns the device
4140
  @type device: L{objects.Disk}
4141
  @param device: the device to create
4142
  @param info: the extra 'metadata' we should attach to the device
4143
      (this will be represented as a LVM tag)
4144
  @type force_open: boolean
4145
  @param force_open: this parameter will be passes to the
4146
      L{backend.BlockdevCreate} function where it specifies
4147
      whether we run on primary or not, and it affects both
4148
      the child assembly and the device own Open() execution
4149

4150
  """
4151
  lu.cfg.SetDiskID(device, node)
4152
  result = lu.rpc.call_blockdev_create(node, device, device.size,
4153
                                       instance.name, force_open, info)
4154
  msg = result.RemoteFailMsg()
4155
  if msg:
4156
    raise errors.OpExecError("Can't create block device %s on"
4157
                             " node %s for instance %s: %s" %
4158
                             (device, node, instance.name, msg))
4159
  if device.physical_id is None:
4160
    device.physical_id = result.payload
4161

    
4162

    
4163
def _GenerateUniqueNames(lu, exts):
4164
  """Generate a suitable LV name.
4165

4166
  This will generate a logical volume name for the given instance.
4167

4168
  """
4169
  results = []
4170
  for val in exts:
4171
    new_id = lu.cfg.GenerateUniqueID()
4172
    results.append("%s%s" % (new_id, val))
4173
  return results
4174

    
4175

    
4176
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
4177
                         p_minor, s_minor):
4178
  """Generate a drbd8 device complete with its children.
4179

4180
  """
4181
  port = lu.cfg.AllocatePort()
4182
  vgname = lu.cfg.GetVGName()
4183
  shared_secret = lu.cfg.GenerateDRBDSecret()
4184
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
4185
                          logical_id=(vgname, names[0]))
4186
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
4187
                          logical_id=(vgname, names[1]))
4188
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
4189
                          logical_id=(primary, secondary, port,
4190
                                      p_minor, s_minor,
4191
                                      shared_secret),
4192
                          children=[dev_data, dev_meta],
4193
                          iv_name=iv_name)
4194
  return drbd_dev
4195

    
4196

    
4197
def _GenerateDiskTemplate(lu, template_name,
4198
                          instance_name, primary_node,
4199
                          secondary_nodes, disk_info,
4200
                          file_storage_dir, file_driver,
4201
                          base_index):
4202
  """Generate the entire disk layout for a given template type.
4203

4204
  """
4205
  #TODO: compute space requirements
4206

    
4207
  vgname = lu.cfg.GetVGName()
4208
  disk_count = len(disk_info)
4209
  disks = []
4210
  if template_name == constants.DT_DISKLESS:
4211
    pass
4212
  elif template_name == constants.DT_PLAIN:
4213
    if len(secondary_nodes) != 0:
4214
      raise errors.ProgrammerError("Wrong template configuration")
4215

    
4216
    names = _GenerateUniqueNames(lu, [".disk%d" % i
4217
                                      for i in range(disk_count)])
4218
    for idx, disk in enumerate(disk_info):
4219
      disk_index = idx + base_index
4220
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
4221
                              logical_id=(vgname, names[idx]),
4222
                              iv_name="disk/%d" % disk_index,
4223
                              mode=disk["mode"])
4224
      disks.append(disk_dev)
4225
  elif template_name == constants.DT_DRBD8:
4226
    if len(secondary_nodes) != 1:
4227
      raise errors.ProgrammerError("Wrong template configuration")
4228
    remote_node = secondary_nodes[0]
4229
    minors = lu.cfg.AllocateDRBDMinor(
4230
      [primary_node, remote_node] * len(disk_info), instance_name)
4231

    
4232
    names = []
4233
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % i
4234
                                               for i in range(disk_count)]):
4235
      names.append(lv_prefix + "_data")
4236
      names.append(lv_prefix + "_meta")
4237
    for idx, disk in enumerate(disk_info):
4238
      disk_index = idx + base_index
4239
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
4240
                                      disk["size"], names[idx*2:idx*2+2],
4241
                                      "disk/%d" % disk_index,
4242
                                      minors[idx*2], minors[idx*2+1])
4243
      disk_dev.mode = disk["mode"]
4244
      disks.append(disk_dev)
4245
  elif template_name == constants.DT_FILE:
4246
    if len(secondary_nodes) != 0:
4247
      raise errors.ProgrammerError("Wrong template configuration")
4248

    
4249
    for idx, disk in enumerate(disk_info):
4250
      disk_index = idx + base_index
4251
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
4252
                              iv_name="disk/%d" % disk_index,
4253
                              logical_id=(file_driver,
4254
                                          "%s/disk%d" % (file_storage_dir,
4255
                                                         disk_index)),
4256
                              mode=disk["mode"])
4257
      disks.append(disk_dev)
4258
  else:
4259
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
4260
  return disks
4261

    
4262

    
4263
def _GetInstanceInfoText(instance):
4264
  """Compute that text that should be added to the disk's metadata.
4265

4266
  """
4267
  return "originstname+%s" % instance.name
4268

    
4269

    
4270
def _CreateDisks(lu, instance):
4271
  """Create all disks for an instance.
4272

4273
  This abstracts away some work from AddInstance.
4274

4275
  @type lu: L{LogicalUnit}
4276
  @param lu: the logical unit on whose behalf we execute
4277
  @type instance: L{objects.Instance}
4278
  @param instance: the instance whose disks we should create
4279
  @rtype: boolean
4280
  @return: the success of the creation
4281

4282
  """
4283
  info = _GetInstanceInfoText(instance)
4284
  pnode = instance.primary_node
4285

    
4286
  if instance.disk_template == constants.DT_FILE:
4287
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4288
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
4289

    
4290
    if result.failed or not result.data:
4291
      raise errors.OpExecError("Could not connect to node '%s'" % pnode)
4292

    
4293
    if not result.data[0]:
4294
      raise errors.OpExecError("Failed to create directory '%s'" %
4295
                               file_storage_dir)
4296

    
4297
  # Note: this needs to be kept in sync with adding of disks in
4298
  # LUSetInstanceParams
4299
  for device in instance.disks:
4300
    logging.info("Creating volume %s for instance %s",
4301
                 device.iv_name, instance.name)
4302
    #HARDCODE
4303
    for node in instance.all_nodes:
4304
      f_create = node == pnode
4305
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
4306

    
4307

    
4308
def _RemoveDisks(lu, instance):
4309
  """Remove all disks for an instance.
4310

4311
  This abstracts away some work from `AddInstance()` and
4312
  `RemoveInstance()`. Note that in case some of the devices couldn't
4313
  be removed, the removal will continue with the other ones (compare
4314
  with `_CreateDisks()`).
4315

4316
  @type lu: L{LogicalUnit}
4317
  @param lu: the logical unit on whose behalf we execute
4318
  @type instance: L{objects.Instance}
4319
  @param instance: the instance whose disks we should remove
4320
  @rtype: boolean
4321
  @return: the success of the removal
4322

4323
  """
4324
  logging.info("Removing block devices for instance %s", instance.name)
4325

    
4326
  all_result = True
4327
  for device in instance.disks:
4328
    for node, disk in device.ComputeNodeTree(instance.primary_node):
4329
      lu.cfg.SetDiskID(disk, node)
4330
      msg = lu.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
4331
      if msg:
4332
        lu.LogWarning("Could not remove block device %s on node %s,"
4333
                      " continuing anyway: %s", device.iv_name, node, msg)
4334
        all_result = False
4335

    
4336
  if instance.disk_template == constants.DT_FILE:
4337
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4338
    result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
4339
                                                 file_storage_dir)
4340
    if result.failed or not result.data:
4341
      logging.error("Could not remove directory '%s'", file_storage_dir)
4342
      all_result = False
4343

    
4344
  return all_result
4345

    
4346

    
4347
def _ComputeDiskSize(disk_template, disks):
4348
  """Compute disk size requirements in the volume group
4349

4350
  """
4351
  # Required free disk space as a function of disk and swap space
4352
  req_size_dict = {
4353
    constants.DT_DISKLESS: None,
4354
    constants.DT_PLAIN: sum(d["size"] for d in disks),
4355
    # 128 MB are added for drbd metadata for each disk
4356
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
4357
    constants.DT_FILE: None,
4358
  }
4359

    
4360
  if disk_template not in req_size_dict:
4361
    raise errors.ProgrammerError("Disk template '%s' size requirement"
4362
                                 " is unknown" %  disk_template)
4363

    
4364
  return req_size_dict[disk_template]
4365

    
4366

    
4367
def _CheckHVParams(lu, nodenames, hvname, hvparams):
4368
  """Hypervisor parameter validation.
4369

4370
  This function abstract the hypervisor parameter validation to be
4371
  used in both instance create and instance modify.
4372

4373
  @type lu: L{LogicalUnit}
4374
  @param lu: the logical unit for which we check
4375
  @type nodenames: list
4376
  @param nodenames: the list of nodes on which we should check
4377
  @type hvname: string
4378
  @param hvname: the name of the hypervisor we should use
4379
  @type hvparams: dict
4380
  @param hvparams: the parameters which we need to check
4381
  @raise errors.OpPrereqError: if the parameters are not valid
4382

4383
  """
4384
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
4385
                                                  hvname,
4386
                                                  hvparams)
4387
  for node in nodenames:
4388
    info = hvinfo[node]
4389
    if info.offline:
4390
      continue
4391
    msg = info.RemoteFailMsg()
4392
    if msg:
4393
      raise errors.OpPrereqError("Hypervisor parameter validation"
4394
                                 " failed on node %s: %s" % (node, msg))
4395

    
4396

    
4397
class LUCreateInstance(LogicalUnit):
4398
  """Create an instance.
4399

4400
  """
4401
  HPATH = "instance-add"
4402
  HTYPE = constants.HTYPE_INSTANCE
4403
  _OP_REQP = ["instance_name", "disks", "disk_template",
4404
              "mode", "start",
4405
              "wait_for_sync", "ip_check", "nics",
4406
              "hvparams", "beparams"]
4407
  REQ_BGL = False
4408

    
4409
  def _ExpandNode(self, node):
4410
    """Expands and checks one node name.
4411

4412
    """
4413
    node_full = self.cfg.ExpandNodeName(node)
4414
    if node_full is None:
4415
      raise errors.OpPrereqError("Unknown node %s" % node)
4416
    return node_full
4417

    
4418
  def ExpandNames(self):
4419
    """ExpandNames for CreateInstance.
4420

4421
    Figure out the right locks for instance creation.
4422

4423
    """
4424
    self.needed_locks = {}
4425

    
4426
    # set optional parameters to none if they don't exist
4427
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
4428
      if not hasattr(self.op, attr):
4429
        setattr(self.op, attr, None)
4430

    
4431
    # cheap checks, mostly valid constants given
4432

    
4433
    # verify creation mode
4434
    if self.op.mode not in (constants.INSTANCE_CREATE,
4435
                            constants.INSTANCE_IMPORT):
4436
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
4437
                                 self.op.mode)
4438

    
4439
    # disk template and mirror node verification
4440
    if self.op.disk_template not in constants.DISK_TEMPLATES:
4441
      raise errors.OpPrereqError("Invalid disk template name")
4442

    
4443
    if self.op.hypervisor is None:
4444
      self.op.hypervisor = self.cfg.GetHypervisorType()
4445

    
4446
    cluster = self.cfg.GetClusterInfo()
4447
    enabled_hvs = cluster.enabled_hypervisors
4448
    if self.op.hypervisor not in enabled_hvs:
4449
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
4450
                                 " cluster (%s)" % (self.op.hypervisor,
4451
                                  ",".join(enabled_hvs)))
4452

    
4453
    # check hypervisor parameter syntax (locally)
4454
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4455
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
4456
                                  self.op.hvparams)
4457
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
4458
    hv_type.CheckParameterSyntax(filled_hvp)
4459

    
4460
    # fill and remember the beparams dict
4461
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4462
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
4463
                                    self.op.beparams)
4464

    
4465
    #### instance parameters check
4466

    
4467
    # instance name verification
4468
    hostname1 = utils.HostInfo(self.op.instance_name)
4469
    self.op.instance_name = instance_name = hostname1.name
4470

    
4471
    # this is just a preventive check, but someone might still add this
4472
    # instance in the meantime, and creation will fail at lock-add time
4473
    if instance_name in self.cfg.GetInstanceList():
4474
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4475
                                 instance_name)
4476

    
4477
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
4478

    
4479
    # NIC buildup
4480
    self.nics = []
4481
    for idx, nic in enumerate(self.op.nics):
4482
      nic_mode_req = nic.get("mode", None)
4483
      nic_mode = nic_mode_req
4484
      if nic_mode is None:
4485
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
4486

    
4487
      # in routed mode, for the first nic, the default ip is 'auto'
4488
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
4489
        default_ip_mode = constants.VALUE_AUTO
4490
      else:
4491
        default_ip_mode = constants.VALUE_NONE
4492

    
4493
      # ip validity checks
4494
      ip = nic.get("ip", default_ip_mode)
4495
      if ip is None or ip.lower() == constants.VALUE_NONE:
4496
        nic_ip = None
4497
      elif ip.lower() == constants.VALUE_AUTO:
4498
        nic_ip = hostname1.ip
4499
      else:
4500
        if not utils.IsValidIP(ip):
4501
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
4502
                                     " like a valid IP" % ip)
4503
        nic_ip = ip
4504

    
4505
      # TODO: check the ip for uniqueness !!
4506
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
4507
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
4508

    
4509
      # MAC address verification
4510
      mac = nic.get("mac", constants.VALUE_AUTO)
4511
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4512
        if not utils.IsValidMac(mac.lower()):
4513
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
4514
                                     mac)
4515
      # bridge verification
4516
      bridge = nic.get("bridge", None)
4517
      link = nic.get("link", None)
4518
      if bridge and link:
4519
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link' at the same time")
4520
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
4521
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
4522
      elif bridge:
4523
        link = bridge
4524

    
4525
      nicparams = {}
4526
      if nic_mode_req:
4527
        nicparams[constants.NIC_MODE] = nic_mode_req
4528
      if link:
4529
        nicparams[constants.NIC_LINK] = link
4530

    
4531
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4532
                                      nicparams)
4533
      objects.NIC.CheckParameterSyntax(check_params)
4534
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
4535

    
4536
    # disk checks/pre-build
4537
    self.disks = []
4538
    for disk in self.op.disks:
4539
      mode = disk.get("mode", constants.DISK_RDWR)
4540
      if mode not in constants.DISK_ACCESS_SET:
4541
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
4542
                                   mode)
4543
      size = disk.get("size", None)
4544
      if size is None:
4545
        raise errors.OpPrereqError("Missing disk size")
4546
      try:
4547
        size = int(size)
4548
      except ValueError:
4549
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
4550
      self.disks.append({"size": size, "mode": mode})
4551

    
4552
    # used in CheckPrereq for ip ping check
4553
    self.check_ip = hostname1.ip
4554

    
4555
    # file storage checks
4556
    if (self.op.file_driver and
4557
        not self.op.file_driver in constants.FILE_DRIVER):
4558
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
4559
                                 self.op.file_driver)
4560

    
4561
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
4562
      raise errors.OpPrereqError("File storage directory path not absolute")
4563

    
4564
    ### Node/iallocator related checks
4565
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
4566
      raise errors.OpPrereqError("One and only one of iallocator and primary"
4567
                                 " node must be given")
4568

    
4569
    if self.op.iallocator:
4570
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4571
    else:
4572
      self.op.pnode = self._ExpandNode(self.op.pnode)
4573
      nodelist = [self.op.pnode]
4574
      if self.op.snode is not None:
4575
        self.op.snode = self._ExpandNode(self.op.snode)
4576
        nodelist.append(self.op.snode)
4577
      self.needed_locks[locking.LEVEL_NODE] = nodelist
4578

    
4579
    # in case of import lock the source node too
4580
    if self.op.mode == constants.INSTANCE_IMPORT:
4581
      src_node = getattr(self.op, "src_node", None)
4582
      src_path = getattr(self.op, "src_path", None)
4583

    
4584
      if src_path is None:
4585
        self.op.src_path = src_path = self.op.instance_name
4586

    
4587
      if src_node is None:
4588
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4589
        self.op.src_node = None
4590
        if os.path.isabs(src_path):
4591
          raise errors.OpPrereqError("Importing an instance from an absolute"
4592
                                     " path requires a source node option.")
4593
      else:
4594
        self.op.src_node = src_node = self._ExpandNode(src_node)
4595
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4596
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
4597
        if not os.path.isabs(src_path):
4598
          self.op.src_path = src_path = \
4599
            os.path.join(constants.EXPORT_DIR, src_path)
4600

    
4601
    else: # INSTANCE_CREATE
4602
      if getattr(self.op, "os_type", None) is None:
4603
        raise errors.OpPrereqError("No guest OS specified")
4604

    
4605
  def _RunAllocator(self):
4606
    """Run the allocator based on input opcode.
4607

4608
    """
4609
    nics = [n.ToDict() for n in self.nics]
4610
    ial = IAllocator(self,
4611
                     mode=constants.IALLOCATOR_MODE_ALLOC,
4612
                     name=self.op.instance_name,
4613
                     disk_template=self.op.disk_template,
4614
                     tags=[],
4615
                     os=self.op.os_type,
4616
                     vcpus=self.be_full[constants.BE_VCPUS],
4617
                     mem_size=self.be_full[constants.BE_MEMORY],
4618
                     disks=self.disks,
4619
                     nics=nics,
4620
                     hypervisor=self.op.hypervisor,
4621
                     )
4622

    
4623
    ial.Run(self.op.iallocator)
4624

    
4625
    if not ial.success:
4626
      raise errors.OpPrereqError("Can't compute nodes using"
4627
                                 " iallocator '%s': %s" % (self.op.iallocator,
4628
                                                           ial.info))
4629
    if len(ial.nodes) != ial.required_nodes:
4630
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
4631
                                 " of nodes (%s), required %s" %
4632
                                 (self.op.iallocator, len(ial.nodes),
4633
                                  ial.required_nodes))
4634
    self.op.pnode = ial.nodes[0]
4635
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4636
                 self.op.instance_name, self.op.iallocator,
4637
                 ", ".join(ial.nodes))
4638
    if ial.required_nodes == 2:
4639
      self.op.snode = ial.nodes[1]
4640

    
4641
  def BuildHooksEnv(self):
4642
    """Build hooks env.
4643

4644
    This runs on master, primary and secondary nodes of the instance.
4645

4646
    """
4647
    env = {
4648
      "ADD_MODE": self.op.mode,
4649
      }
4650
    if self.op.mode == constants.INSTANCE_IMPORT:
4651
      env["SRC_NODE"] = self.op.src_node
4652
      env["SRC_PATH"] = self.op.src_path
4653
      env["SRC_IMAGES"] = self.src_images
4654

    
4655
    env.update(_BuildInstanceHookEnv(
4656
      name=self.op.instance_name,
4657
      primary_node=self.op.pnode,
4658
      secondary_nodes=self.secondaries,
4659
      status=self.op.start,
4660
      os_type=self.op.os_type,
4661
      memory=self.be_full[constants.BE_MEMORY],
4662
      vcpus=self.be_full[constants.BE_VCPUS],
4663
      nics=_PreBuildNICHooksList(self, self.nics),
4664
      disk_template=self.op.disk_template,
4665
      disks=[(d["size"], d["mode"]) for d in self.disks],
4666
    ))
4667

    
4668
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
4669
          self.secondaries)
4670
    return env, nl, nl
4671

    
4672

    
4673
  def CheckPrereq(self):
4674
    """Check prerequisites.
4675

4676
    """
4677
    if (not self.cfg.GetVGName() and
4678
        self.op.disk_template not in constants.DTS_NOT_LVM):
4679
      raise errors.OpPrereqError("Cluster does not support lvm-based"
4680
                                 " instances")
4681

    
4682
    if self.op.mode == constants.INSTANCE_IMPORT:
4683
      src_node = self.op.src_node
4684
      src_path = self.op.src_path
4685

    
4686
      if src_node is None:
4687
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
4688
        exp_list = self.rpc.call_export_list(locked_nodes)
4689
        found = False
4690
        for node in exp_list:
4691
          if exp_list[node].RemoteFailMsg():
4692
            continue
4693
          if src_path in exp_list[node].payload:
4694
            found = True
4695
            self.op.src_node = src_node = node
4696
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
4697
                                                       src_path)
4698
            break
4699
        if not found:
4700
          raise errors.OpPrereqError("No export found for relative path %s" %
4701
                                      src_path)
4702

    
4703
      _CheckNodeOnline(self, src_node)
4704
      result = self.rpc.call_export_info(src_node, src_path)
4705
      msg = result.RemoteFailMsg()
4706
      if msg:
4707
        raise errors.OpPrereqError("No export or invalid export found in"
4708
                                   " dir %s: %s" % (src_path, msg))
4709

    
4710
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
4711
      if not export_info.has_section(constants.INISECT_EXP):
4712
        raise errors.ProgrammerError("Corrupted export config")
4713

    
4714
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
4715
      if (int(ei_version) != constants.EXPORT_VERSION):
4716
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
4717
                                   (ei_version, constants.EXPORT_VERSION))
4718

    
4719
      # Check that the new instance doesn't have less disks than the export
4720
      instance_disks = len(self.disks)
4721
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
4722
      if instance_disks < export_disks:
4723
        raise errors.OpPrereqError("Not enough disks to import."
4724
                                   " (instance: %d, export: %d)" %
4725
                                   (instance_disks, export_disks))
4726

    
4727
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
4728
      disk_images = []
4729
      for idx in range(export_disks):
4730
        option = 'disk%d_dump' % idx
4731
        if export_info.has_option(constants.INISECT_INS, option):
4732
          # FIXME: are the old os-es, disk sizes, etc. useful?
4733
          export_name = export_info.get(constants.INISECT_INS, option)
4734
          image = os.path.join(src_path, export_name)
4735
          disk_images.append(image)
4736
        else:
4737
          disk_images.append(False)
4738

    
4739
      self.src_images = disk_images
4740

    
4741
      old_name = export_info.get(constants.INISECT_INS, 'name')
4742
      # FIXME: int() here could throw a ValueError on broken exports
4743
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
4744
      if self.op.instance_name == old_name:
4745
        for idx, nic in enumerate(self.nics):
4746
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
4747
            nic_mac_ini = 'nic%d_mac' % idx
4748
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
4749

    
4750
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
4751
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
4752
    if self.op.start and not self.op.ip_check:
4753
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
4754
                                 " adding an instance in start mode")
4755

    
4756
    if self.op.ip_check:
4757
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
4758
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4759
                                   (self.check_ip, self.op.instance_name))
4760

    
4761
    #### mac address generation
4762
    # By generating here the mac address both the allocator and the hooks get
4763
    # the real final mac address rather than the 'auto' or 'generate' value.
4764
    # There is a race condition between the generation and the instance object
4765
    # creation, which means that we know the mac is valid now, but we're not
4766
    # sure it will be when we actually add the instance. If things go bad
4767
    # adding the instance will abort because of a duplicate mac, and the
4768
    # creation job will fail.
4769
    for nic in self.nics:
4770
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4771
        nic.mac = self.cfg.GenerateMAC()
4772

    
4773
    #### allocator run
4774

    
4775
    if self.op.iallocator is not None:
4776
      self._RunAllocator()
4777

    
4778
    #### node related checks
4779

    
4780
    # check primary node
4781
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
4782
    assert self.pnode is not None, \
4783
      "Cannot retrieve locked node %s" % self.op.pnode
4784
    if pnode.offline:
4785
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
4786
                                 pnode.name)
4787
    if pnode.drained:
4788
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
4789
                                 pnode.name)
4790

    
4791
    self.secondaries = []
4792

    
4793
    # mirror node verification
4794
    if self.op.disk_template in constants.DTS_NET_MIRROR:
4795
      if self.op.snode is None:
4796
        raise errors.OpPrereqError("The networked disk templates need"
4797
                                   " a mirror node")
4798
      if self.op.snode == pnode.name:
4799
        raise errors.OpPrereqError("The secondary node cannot be"
4800
                                   " the primary node.")
4801
      _CheckNodeOnline(self, self.op.snode)
4802
      _CheckNodeNotDrained(self, self.op.snode)
4803
      self.secondaries.append(self.op.snode)
4804

    
4805
    nodenames = [pnode.name] + self.secondaries
4806

    
4807
    req_size = _ComputeDiskSize(self.op.disk_template,
4808
                                self.disks)
4809

    
4810
    # Check lv size requirements
4811
    if req_size is not None:
4812
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
4813
                                         self.op.hypervisor)
4814
      for node in nodenames:
4815
        info = nodeinfo[node]
4816
        info.Raise()
4817
        info = info.data
4818
        if not info:
4819
          raise errors.OpPrereqError("Cannot get current information"
4820
                                     " from node '%s'" % node)
4821
        vg_free = info.get('vg_free', None)
4822
        if not isinstance(vg_free, int):
4823
          raise errors.OpPrereqError("Can't compute free disk space on"
4824
                                     " node %s" % node)
4825
        if req_size > info['vg_free']:
4826
          raise errors.OpPrereqError("Not enough disk space on target node %s."
4827
                                     " %d MB available, %d MB required" %
4828
                                     (node, info['vg_free'], req_size))
4829

    
4830
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
4831

    
4832
    # os verification
4833
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
4834
    result.Raise()
4835
    if not isinstance(result.data, objects.OS):
4836
      raise errors.OpPrereqError("OS '%s' not in supported os list for"
4837
                                 " primary node"  % self.op.os_type)
4838

    
4839
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
4840

    
4841
    # memory check on primary node
4842
    if self.op.start:
4843
      _CheckNodeFreeMemory(self, self.pnode.name,
4844
                           "creating instance %s" % self.op.instance_name,
4845
                           self.be_full[constants.BE_MEMORY],
4846
                           self.op.hypervisor)
4847

    
4848
  def Exec(self, feedback_fn):
4849
    """Create and add the instance to the cluster.
4850

4851
    """
4852
    instance = self.op.instance_name
4853
    pnode_name = self.pnode.name
4854

    
4855
    ht_kind = self.op.hypervisor
4856
    if ht_kind in constants.HTS_REQ_PORT:
4857
      network_port = self.cfg.AllocatePort()
4858
    else:
4859
      network_port = None
4860

    
4861
    ##if self.op.vnc_bind_address is None:
4862
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
4863

    
4864
    # this is needed because os.path.join does not accept None arguments
4865
    if self.op.file_storage_dir is None:
4866
      string_file_storage_dir = ""
4867
    else:
4868
      string_file_storage_dir = self.op.file_storage_dir
4869

    
4870
    # build the full file storage dir path
4871
    file_storage_dir = os.path.normpath(os.path.join(
4872
                                        self.cfg.GetFileStorageDir(),
4873
                                        string_file_storage_dir, instance))
4874

    
4875

    
4876
    disks = _GenerateDiskTemplate(self,
4877
                                  self.op.disk_template,
4878
                                  instance, pnode_name,
4879
                                  self.secondaries,
4880
                                  self.disks,
4881
                                  file_storage_dir,
4882
                                  self.op.file_driver,
4883
                                  0)
4884

    
4885
    iobj = objects.Instance(name=instance, os=self.op.os_type,
4886
                            primary_node=pnode_name,
4887
                            nics=self.nics, disks=disks,
4888
                            disk_template=self.op.disk_template,
4889
                            admin_up=False,
4890
                            network_port=network_port,
4891
                            beparams=self.op.beparams,
4892
                            hvparams=self.op.hvparams,
4893
                            hypervisor=self.op.hypervisor,
4894
                            )
4895

    
4896
    feedback_fn("* creating instance disks...")
4897
    try:
4898
      _CreateDisks(self, iobj)
4899
    except errors.OpExecError:
4900
      self.LogWarning("Device creation failed, reverting...")
4901
      try:
4902
        _RemoveDisks(self, iobj)
4903
      finally:
4904
        self.cfg.ReleaseDRBDMinors(instance)
4905
        raise
4906

    
4907
    feedback_fn("adding instance %s to cluster config" % instance)
4908

    
4909
    self.cfg.AddInstance(iobj)
4910
    # Declare that we don't want to remove the instance lock anymore, as we've
4911
    # added the instance to the config
4912
    del self.remove_locks[locking.LEVEL_INSTANCE]
4913
    # Unlock all the nodes
4914
    if self.op.mode == constants.INSTANCE_IMPORT:
4915
      nodes_keep = [self.op.src_node]
4916
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
4917
                       if node != self.op.src_node]
4918
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
4919
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
4920
    else:
4921
      self.context.glm.release(locking.LEVEL_NODE)
4922
      del self.acquired_locks[locking.LEVEL_NODE]
4923

    
4924
    if self.op.wait_for_sync:
4925
      disk_abort = not _WaitForSync(self, iobj)
4926
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
4927
      # make sure the disks are not degraded (still sync-ing is ok)
4928
      time.sleep(15)
4929
      feedback_fn("* checking mirrors status")
4930
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
4931
    else:
4932
      disk_abort = False
4933

    
4934
    if disk_abort:
4935
      _RemoveDisks(self, iobj)
4936
      self.cfg.RemoveInstance(iobj.name)
4937
      # Make sure the instance lock gets removed
4938
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
4939
      raise errors.OpExecError("There are some degraded disks for"
4940
                               " this instance")
4941

    
4942
    feedback_fn("creating os for instance %s on node %s" %
4943
                (instance, pnode_name))
4944

    
4945
    if iobj.disk_template != constants.DT_DISKLESS:
4946
      if self.op.mode == constants.INSTANCE_CREATE:
4947
        feedback_fn("* running the instance OS create scripts...")
4948
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
4949
        msg = result.RemoteFailMsg()
4950
        if msg:
4951
          raise errors.OpExecError("Could not add os for instance %s"
4952
                                   " on node %s: %s" %
4953
                                   (instance, pnode_name, msg))
4954

    
4955
      elif self.op.mode == constants.INSTANCE_IMPORT:
4956
        feedback_fn("* running the instance OS import scripts...")
4957
        src_node = self.op.src_node
4958
        src_images = self.src_images
4959
        cluster_name = self.cfg.GetClusterName()
4960
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
4961
                                                         src_node, src_images,
4962
                                                         cluster_name)
4963
        msg = import_result.RemoteFailMsg()
4964
        if msg:
4965
          self.LogWarning("Error while importing the disk images for instance"
4966
                          " %s on node %s: %s" % (instance, pnode_name, msg))
4967
      else:
4968
        # also checked in the prereq part
4969
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
4970
                                     % self.op.mode)
4971

    
4972
    if self.op.start:
4973
      iobj.admin_up = True
4974
      self.cfg.Update(iobj)
4975
      logging.info("Starting instance %s on node %s", instance, pnode_name)
4976
      feedback_fn("* starting instance...")
4977
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
4978
      msg = result.RemoteFailMsg()
4979
      if msg:
4980
        raise errors.OpExecError("Could not start instance: %s" % msg)
4981

    
4982

    
4983
class LUConnectConsole(NoHooksLU):
4984
  """Connect to an instance's console.
4985

4986
  This is somewhat special in that it returns the command line that
4987
  you need to run on the master node in order to connect to the
4988
  console.
4989

4990
  """
4991
  _OP_REQP = ["instance_name"]
4992
  REQ_BGL = False
4993

    
4994
  def ExpandNames(self):
4995
    self._ExpandAndLockInstance()
4996

    
4997
  def CheckPrereq(self):
4998
    """Check prerequisites.
4999

5000
    This checks that the instance is in the cluster.
5001

5002
    """
5003
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5004
    assert self.instance is not None, \
5005
      "Cannot retrieve locked instance %s" % self.op.instance_name
5006
    _CheckNodeOnline(self, self.instance.primary_node)
5007

    
5008
  def Exec(self, feedback_fn):
5009
    """Connect to the console of an instance
5010

5011
    """
5012
    instance = self.instance
5013
    node = instance.primary_node
5014

    
5015
    node_insts = self.rpc.call_instance_list([node],
5016
                                             [instance.hypervisor])[node]
5017
    msg = node_insts.RemoteFailMsg()
5018
    if msg:
5019
      raise errors.OpExecError("Can't get node information from %s: %s" %
5020
                               (node, msg))
5021

    
5022
    if instance.name not in node_insts.payload:
5023
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5024

    
5025
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5026

    
5027
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5028
    cluster = self.cfg.GetClusterInfo()
5029
    # beparams and hvparams are passed separately, to avoid editing the
5030
    # instance and then saving the defaults in the instance itself.
5031
    hvparams = cluster.FillHV(instance)
5032
    beparams = cluster.FillBE(instance)
5033
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5034

    
5035
    # build ssh cmdline
5036
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5037

    
5038

    
5039
class LUReplaceDisks(LogicalUnit):
5040
  """Replace the disks of an instance.
5041

5042
  """
5043
  HPATH = "mirrors-replace"
5044
  HTYPE = constants.HTYPE_INSTANCE
5045
  _OP_REQP = ["instance_name", "mode", "disks"]
5046
  REQ_BGL = False
5047

    
5048
  def CheckArguments(self):
5049
    if not hasattr(self.op, "remote_node"):
5050
      self.op.remote_node = None
5051
    if not hasattr(self.op, "iallocator"):
5052
      self.op.iallocator = None
5053

    
5054
    # check for valid parameter combination
5055
    cnt = [self.op.remote_node, self.op.iallocator].count(None)
5056
    if self.op.mode == constants.REPLACE_DISK_CHG:
5057
      if cnt == 2:
5058
        raise errors.OpPrereqError("When changing the secondary either an"
5059
                                   " iallocator script must be used or the"
5060
                                   " new node given")
5061
      elif cnt == 0:
5062
        raise errors.OpPrereqError("Give either the iallocator or the new"
5063
                                   " secondary, not both")
5064
    else: # not replacing the secondary
5065
      if cnt != 2:
5066
        raise errors.OpPrereqError("The iallocator and new node options can"
5067
                                   " be used only when changing the"
5068
                                   " secondary node")
5069

    
5070
  def ExpandNames(self):
5071
    self._ExpandAndLockInstance()
5072

    
5073
    if self.op.iallocator is not None:
5074
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5075
    elif self.op.remote_node is not None:
5076
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5077
      if remote_node is None:
5078
        raise errors.OpPrereqError("Node '%s' not known" %
5079
                                   self.op.remote_node)
5080
      self.op.remote_node = remote_node
5081
      # Warning: do not remove the locking of the new secondary here
5082
      # unless DRBD8.AddChildren is changed to work in parallel;
5083
      # currently it doesn't since parallel invocations of
5084
      # FindUnusedMinor will conflict
5085
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5086
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5087
    else:
5088
      self.needed_locks[locking.LEVEL_NODE] = []
5089
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5090

    
5091
  def DeclareLocks(self, level):
5092
    # If we're not already locking all nodes in the set we have to declare the
5093
    # instance's primary/secondary nodes.
5094
    if (level == locking.LEVEL_NODE and
5095
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5096
      self._LockInstancesNodes()
5097

    
5098
  def _RunAllocator(self):
5099
    """Compute a new secondary node using an IAllocator.
5100

5101
    """
5102
    ial = IAllocator(self,
5103
                     mode=constants.IALLOCATOR_MODE_RELOC,
5104
                     name=self.op.instance_name,
5105
                     relocate_from=[self.sec_node])
5106

    
5107
    ial.Run(self.op.iallocator)
5108

    
5109
    if not ial.success:
5110
      raise errors.OpPrereqError("Can't compute nodes using"
5111
                                 " iallocator '%s': %s" % (self.op.iallocator,
5112
                                                           ial.info))
5113
    if len(ial.nodes) != ial.required_nodes:
5114
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5115
                                 " of nodes (%s), required %s" %
5116
                                 (len(ial.nodes), ial.required_nodes))
5117
    self.op.remote_node = ial.nodes[0]
5118
    self.LogInfo("Selected new secondary for the instance: %s",
5119
                 self.op.remote_node)
5120

    
5121
  def BuildHooksEnv(self):
5122
    """Build hooks env.
5123

5124
    This runs on the master, the primary and all the secondaries.
5125

5126
    """
5127
    env = {
5128
      "MODE": self.op.mode,
5129
      "NEW_SECONDARY": self.op.remote_node,
5130
      "OLD_SECONDARY": self.instance.secondary_nodes[0],
5131
      }
5132
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5133
    nl = [
5134
      self.cfg.GetMasterNode(),
5135
      self.instance.primary_node,
5136
      ]
5137
    if self.op.remote_node is not None:
5138
      nl.append(self.op.remote_node)
5139
    return env, nl, nl
5140

    
5141
  def CheckPrereq(self):
5142
    """Check prerequisites.
5143

5144
    This checks that the instance is in the cluster.
5145

5146
    """
5147
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5148
    assert instance is not None, \
5149
      "Cannot retrieve locked instance %s" % self.op.instance_name
5150
    self.instance = instance
5151

    
5152
    if instance.disk_template != constants.DT_DRBD8:
5153
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
5154
                                 " instances")
5155

    
5156
    if len(instance.secondary_nodes) != 1:
5157
      raise errors.OpPrereqError("The instance has a strange layout,"
5158
                                 " expected one secondary but found %d" %
5159
                                 len(instance.secondary_nodes))
5160

    
5161
    self.sec_node = instance.secondary_nodes[0]
5162

    
5163
    if self.op.iallocator is not None:
5164
      self._RunAllocator()
5165

    
5166
    remote_node = self.op.remote_node
5167
    if remote_node is not None:
5168
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
5169
      assert self.remote_node_info is not None, \
5170
        "Cannot retrieve locked node %s" % remote_node
5171
    else:
5172
      self.remote_node_info = None
5173
    if remote_node == instance.primary_node:
5174
      raise errors.OpPrereqError("The specified node is the primary node of"
5175
                                 " the instance.")
5176
    elif remote_node == self.sec_node:
5177
      raise errors.OpPrereqError("The specified node is already the"
5178
                                 " secondary node of the instance.")
5179

    
5180
    if self.op.mode == constants.REPLACE_DISK_PRI:
5181
      n1 = self.tgt_node = instance.primary_node
5182
      n2 = self.oth_node = self.sec_node
5183
    elif self.op.mode == constants.REPLACE_DISK_SEC:
5184
      n1 = self.tgt_node = self.sec_node
5185
      n2 = self.oth_node = instance.primary_node
5186
    elif self.op.mode == constants.REPLACE_DISK_CHG:
5187
      n1 = self.new_node = remote_node
5188
      n2 = self.oth_node = instance.primary_node
5189
      self.tgt_node = self.sec_node
5190
      _CheckNodeNotDrained(self, remote_node)
5191
    else:
5192
      raise errors.ProgrammerError("Unhandled disk replace mode")
5193

    
5194
    _CheckNodeOnline(self, n1)
5195
    _CheckNodeOnline(self, n2)
5196

    
5197
    if not self.op.disks:
5198
      self.op.disks = range(len(instance.disks))
5199

    
5200
    for disk_idx in self.op.disks:
5201
      instance.FindDisk(disk_idx)
5202

    
5203
  def _ExecD8DiskOnly(self, feedback_fn):
5204
    """Replace a disk on the primary or secondary for dbrd8.
5205

5206
    The algorithm for replace is quite complicated:
5207

5208
      1. for each disk to be replaced:
5209

5210
        1. create new LVs on the target node with unique names
5211
        1. detach old LVs from the drbd device
5212
        1. rename old LVs to name_replaced.<time_t>
5213
        1. rename new LVs to old LVs
5214
        1. attach the new LVs (with the old names now) to the drbd device
5215

5216
      1. wait for sync across all devices
5217

5218
      1. for each modified disk:
5219

5220
        1. remove old LVs (which have the name name_replaces.<time_t>)
5221

5222
    Failures are not very well handled.
5223

5224
    """
5225
    steps_total = 6
5226
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
5227
    instance = self.instance
5228
    iv_names = {}
5229
    vgname = self.cfg.GetVGName()
5230
    # start of work
5231
    cfg = self.cfg
5232
    tgt_node = self.tgt_node
5233
    oth_node = self.oth_node
5234

    
5235
    # Step: check device activation
5236
    self.proc.LogStep(1, steps_total, "check device existence")
5237
    info("checking volume groups")
5238
    my_vg = cfg.GetVGName()
5239
    results = self.rpc.call_vg_list([oth_node, tgt_node])
5240
    if not results:
5241
      raise errors.OpExecError("Can't list volume groups on the nodes")
5242
    for node in oth_node, tgt_node:
5243
      res = results[node]
5244
      msg = res.RemoteFailMsg()
5245
      if msg:
5246
        raise errors.OpExecError("Error checking node %s: %s" % (node, msg))
5247
      if my_vg not in res.payload:
5248
        raise errors.OpExecError("Volume group '%s' not found on %s" %
5249
                                 (my_vg, node))
5250
    for idx, dev in enumerate(instance.disks):
5251
      if idx not in self.op.disks:
5252
        continue
5253
      for node in tgt_node, oth_node:
5254
        info("checking disk/%d on %s" % (idx, node))
5255
        cfg.SetDiskID(dev, node)
5256
        result = self.rpc.call_blockdev_find(node, dev)
5257
        msg = result.RemoteFailMsg()
5258
        if not msg and not result.payload:
5259
          msg = "disk not found"
5260
        if msg:
5261
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5262
                                   (idx, node, msg))
5263

    
5264
    # Step: check other node consistency
5265
    self.proc.LogStep(2, steps_total, "check peer consistency")
5266
    for idx, dev in enumerate(instance.disks):
5267
      if idx not in self.op.disks:
5268
        continue
5269
      info("checking disk/%d consistency on %s" % (idx, oth_node))
5270
      if not _CheckDiskConsistency(self, dev, oth_node,
5271
                                   oth_node==instance.primary_node):
5272
        raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe"
5273
                                 " to replace disks on this node (%s)" %
5274
                                 (oth_node, tgt_node))
5275

    
5276
    # Step: create new storage
5277
    self.proc.LogStep(3, steps_total, "allocate new storage")
5278
    for idx, dev in enumerate(instance.disks):
5279
      if idx not in self.op.disks:
5280
        continue
5281
      size = dev.size
5282
      cfg.SetDiskID(dev, tgt_node)
5283
      lv_names = [".disk%d_%s" % (idx, suf)
5284
                  for suf in ["data", "meta"]]
5285
      names = _GenerateUniqueNames(self, lv_names)
5286
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5287
                             logical_id=(vgname, names[0]))
5288
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5289
                             logical_id=(vgname, names[1]))
5290
      new_lvs = [lv_data, lv_meta]
5291
      old_lvs = dev.children
5292
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
5293
      info("creating new local storage on %s for %s" %
5294
           (tgt_node, dev.iv_name))
5295
      # we pass force_create=True to force the LVM creation
5296
      for new_lv in new_lvs:
5297
        _CreateBlockDev(self, tgt_node, instance, new_lv, True,
5298
                        _GetInstanceInfoText(instance), False)
5299

    
5300
    # Step: for each lv, detach+rename*2+attach
5301
    self.proc.LogStep(4, steps_total, "change drbd configuration")
5302
    for dev, old_lvs, new_lvs in iv_names.itervalues():
5303
      info("detaching %s drbd from local storage" % dev.iv_name)
5304
      result = self.rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs)
5305
      msg = result.RemoteFailMsg()
5306
      if msg:
5307
        raise errors.OpExecError("Can't detach drbd from local storage on node"
5308
                                 " %s for device %s: %s" %
5309
                                 (tgt_node, dev.iv_name, msg))
5310
      #dev.children = []
5311
      #cfg.Update(instance)
5312

    
5313
      # ok, we created the new LVs, so now we know we have the needed
5314
      # storage; as such, we proceed on the target node to rename
5315
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
5316
      # using the assumption that logical_id == physical_id (which in
5317
      # turn is the unique_id on that node)
5318

    
5319
      # FIXME(iustin): use a better name for the replaced LVs
5320
      temp_suffix = int(time.time())
5321
      ren_fn = lambda d, suff: (d.physical_id[0],
5322
                                d.physical_id[1] + "_replaced-%s" % suff)
5323
      # build the rename list based on what LVs exist on the node
5324
      rlist = []
5325
      for to_ren in old_lvs:
5326
        result = self.rpc.call_blockdev_find(tgt_node, to_ren)
5327
        if not result.RemoteFailMsg() and result.payload:
5328
          # device exists
5329
          rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
5330

    
5331
      info("renaming the old LVs on the target node")
5332
      result = self.rpc.call_blockdev_rename(tgt_node, rlist)
5333
      msg = result.RemoteFailMsg()
5334
      if msg:
5335
        raise errors.OpExecError("Can't rename old LVs on node %s: %s" %
5336
                                 (tgt_node, msg))
5337
      # now we rename the new LVs to the old LVs
5338
      info("renaming the new LVs on the target node")
5339
      rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
5340
      result = self.rpc.call_blockdev_rename(tgt_node, rlist)
5341
      msg = result.RemoteFailMsg()
5342
      if msg:
5343
        raise errors.OpExecError("Can't rename new LVs on node %s: %s" %
5344
                                 (tgt_node, msg))
5345

    
5346
      for old, new in zip(old_lvs, new_lvs):
5347
        new.logical_id = old.logical_id
5348
        cfg.SetDiskID(new, tgt_node)
5349

    
5350
      for disk in old_lvs:
5351
        disk.logical_id = ren_fn(disk, temp_suffix)
5352
        cfg.SetDiskID(disk, tgt_node)
5353

    
5354
      # now that the new lvs have the old name, we can add them to the device
5355
      info("adding new mirror component on %s" % tgt_node)
5356
      result = self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs)
5357
      msg = result.RemoteFailMsg()
5358
      if msg:
5359
        for new_lv in new_lvs:
5360
          msg = self.rpc.call_blockdev_remove(tgt_node, new_lv).RemoteFailMsg()
5361
          if msg:
5362
            warning("Can't rollback device %s: %s", dev, msg,
5363
                    hint="cleanup manually the unused logical volumes")
5364
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
5365

    
5366
      dev.children = new_lvs
5367
      cfg.Update(instance)
5368

    
5369
    # Step: wait for sync
5370

    
5371
    # this can fail as the old devices are degraded and _WaitForSync
5372
    # does a combined result over all disks, so we don't check its
5373
    # return value
5374
    self.proc.LogStep(5, steps_total, "sync devices")
5375
    _WaitForSync(self, instance, unlock=True)
5376

    
5377
    # so check manually all the devices
5378
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5379
      cfg.SetDiskID(dev, instance.primary_node)
5380
      result = self.rpc.call_blockdev_find(instance.primary_node, dev)
5381
      msg = result.RemoteFailMsg()
5382
      if not msg and not result.payload:
5383
        msg = "disk not found"
5384
      if msg:
5385
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
5386
                                 (name, msg))
5387
      if result.payload[5]:
5388
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
5389

    
5390
    # Step: remove old storage
5391
    self.proc.LogStep(6, steps_total, "removing old storage")
5392
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5393
      info("remove logical volumes for %s" % name)
5394
      for lv in old_lvs:
5395
        cfg.SetDiskID(lv, tgt_node)
5396
        msg = self.rpc.call_blockdev_remove(tgt_node, lv).RemoteFailMsg()
5397
        if msg:
5398
          warning("Can't remove old LV: %s" % msg,
5399
                  hint="manually remove unused LVs")
5400
          continue
5401

    
5402
  def _ExecD8Secondary(self, feedback_fn):
5403
    """Replace the secondary node for drbd8.
5404

5405
    The algorithm for replace is quite complicated:
5406
      - for all disks of the instance:
5407
        - create new LVs on the new node with same names
5408
        - shutdown the drbd device on the old secondary
5409
        - disconnect the drbd network on the primary
5410
        - create the drbd device on the new secondary
5411
        - network attach the drbd on the primary, using an artifice:
5412
          the drbd code for Attach() will connect to the network if it
5413
          finds a device which is connected to the good local disks but
5414
          not network enabled
5415
      - wait for sync across all devices
5416
      - remove all disks from the old secondary
5417

5418
    Failures are not very well handled.
5419

5420
    """
5421
    steps_total = 6
5422
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
5423
    instance = self.instance
5424
    iv_names = {}
5425
    # start of work
5426
    cfg = self.cfg
5427
    old_node = self.tgt_node
5428
    new_node = self.new_node
5429
    pri_node = instance.primary_node
5430
    nodes_ip = {
5431
      old_node: self.cfg.GetNodeInfo(old_node).secondary_ip,
5432
      new_node: self.cfg.GetNodeInfo(new_node).secondary_ip,
5433
      pri_node: self.cfg.GetNodeInfo(pri_node).secondary_ip,
5434
      }
5435

    
5436
    # Step: check device activation
5437
    self.proc.LogStep(1, steps_total, "check device existence")
5438
    info("checking volume groups")
5439
    my_vg = cfg.GetVGName()
5440
    results = self.rpc.call_vg_list([pri_node, new_node])
5441
    for node in pri_node, new_node:
5442
      res = results[node]
5443
      msg = res.RemoteFailMsg()
5444
      if msg:
5445
        raise errors.OpExecError("Error checking node %s: %s" % (node, msg))
5446
      if my_vg not in res.payload:
5447
        raise errors.OpExecError("Volume group '%s' not found on %s" %
5448
                                 (my_vg, node))
5449
    for idx, dev in enumerate(instance.disks):
5450
      if idx not in self.op.disks:
5451
        continue
5452
      info("checking disk/%d on %s" % (idx, pri_node))
5453
      cfg.SetDiskID(dev, pri_node)
5454
      result = self.rpc.call_blockdev_find(pri_node, dev)
5455
      msg = result.RemoteFailMsg()
5456
      if not msg and not result.payload:
5457
        msg = "disk not found"
5458
      if msg:
5459
        raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5460
                                 (idx, pri_node, msg))
5461

    
5462
    # Step: check other node consistency
5463
    self.proc.LogStep(2, steps_total, "check peer consistency")
5464
    for idx, dev in enumerate(instance.disks):
5465
      if idx not in self.op.disks:
5466
        continue
5467
      info("checking disk/%d consistency on %s" % (idx, pri_node))
5468
      if not _CheckDiskConsistency(self, dev, pri_node, True, ldisk=True):
5469
        raise errors.OpExecError("Primary node (%s) has degraded storage,"
5470
                                 " unsafe to replace the secondary" %
5471
                                 pri_node)
5472

    
5473
    # Step: create new storage
5474
    self.proc.LogStep(3, steps_total, "allocate new storage")
5475
    for idx, dev in enumerate(instance.disks):
5476
      info("adding new local storage on %s for disk/%d" %
5477
           (new_node, idx))
5478
      # we pass force_create=True to force LVM creation
5479
      for new_lv in dev.children:
5480
        _CreateBlockDev(self, new_node, instance, new_lv, True,
5481
                        _GetInstanceInfoText(instance), False)
5482

    
5483
    # Step 4: dbrd minors and drbd setups changes
5484
    # after this, we must manually remove the drbd minors on both the
5485
    # error and the success paths
5486
    minors = cfg.AllocateDRBDMinor([new_node for dev in instance.disks],
5487
                                   instance.name)
5488
    logging.debug("Allocated minors %s" % (minors,))
5489
    self.proc.LogStep(4, steps_total, "changing drbd configuration")
5490
    for idx, (dev, new_minor) in enumerate(zip(instance.disks, minors)):
5491
      size = dev.size
5492
      info("activating a new drbd on %s for disk/%d" % (new_node, idx))
5493
      # create new devices on new_node; note that we create two IDs:
5494
      # one without port, so the drbd will be activated without
5495
      # networking information on the new node at this stage, and one
5496
      # with network, for the latter activation in step 4
5497
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
5498
      if pri_node == o_node1:
5499
        p_minor = o_minor1
5500
      else:
5501
        p_minor = o_minor2
5502

    
5503
      new_alone_id = (pri_node, new_node, None, p_minor, new_minor, o_secret)
5504
      new_net_id = (pri_node, new_node, o_port, p_minor, new_minor, o_secret)
5505

    
5506
      iv_names[idx] = (dev, dev.children, new_net_id)
5507
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
5508
                    new_net_id)
5509
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
5510
                              logical_id=new_alone_id,
5511
                              children=dev.children)
5512
      try:
5513
        _CreateSingleBlockDev(self, new_node, instance, new_drbd,
5514
                              _GetInstanceInfoText(instance), False)
5515
      except errors.GenericError:
5516
        self.cfg.ReleaseDRBDMinors(instance.name)
5517
        raise
5518

    
5519
    for idx, dev in enumerate(instance.disks):
5520
      # we have new devices, shutdown the drbd on the old secondary
5521
      info("shutting down drbd for disk/%d on old node" % idx)
5522
      cfg.SetDiskID(dev, old_node)
5523
      msg = self.rpc.call_blockdev_shutdown(old_node, dev).RemoteFailMsg()
5524
      if msg:
5525
        warning("Failed to shutdown drbd for disk/%d on old node: %s" %
5526
                (idx, msg),
5527
                hint="Please cleanup this device manually as soon as possible")
5528

    
5529
    info("detaching primary drbds from the network (=> standalone)")
5530
    result = self.rpc.call_drbd_disconnect_net([pri_node], nodes_ip,
5531
                                               instance.disks)[pri_node]
5532

    
5533
    msg = result.RemoteFailMsg()
5534
    if msg:
5535
      # detaches didn't succeed (unlikely)
5536
      self.cfg.ReleaseDRBDMinors(instance.name)
5537
      raise errors.OpExecError("Can't detach the disks from the network on"
5538
                               " old node: %s" % (msg,))
5539

    
5540
    # if we managed to detach at least one, we update all the disks of
5541
    # the instance to point to the new secondary
5542
    info("updating instance configuration")
5543
    for dev, _, new_logical_id in iv_names.itervalues():
5544
      dev.logical_id = new_logical_id
5545
      cfg.SetDiskID(dev, pri_node)
5546
    cfg.Update(instance)
5547

    
5548
    # and now perform the drbd attach
5549
    info("attaching primary drbds to new secondary (standalone => connected)")
5550
    result = self.rpc.call_drbd_attach_net([pri_node, new_node], nodes_ip,
5551
                                           instance.disks, instance.name,
5552
                                           False)
5553
    for to_node, to_result in result.items():
5554
      msg = to_result.RemoteFailMsg()
5555
      if msg:
5556
        warning("can't attach drbd disks on node %s: %s", to_node, msg,
5557
                hint="please do a gnt-instance info to see the"
5558
                " status of disks")
5559

    
5560
    # this can fail as the old devices are degraded and _WaitForSync
5561
    # does a combined result over all disks, so we don't check its
5562
    # return value
5563
    self.proc.LogStep(5, steps_total, "sync devices")
5564
    _WaitForSync(self, instance, unlock=True)
5565

    
5566
    # so check manually all the devices
5567
    for idx, (dev, old_lvs, _) in iv_names.iteritems():
5568
      cfg.SetDiskID(dev, pri_node)
5569
      result = self.rpc.call_blockdev_find(pri_node, dev)
5570
      msg = result.RemoteFailMsg()
5571
      if not msg and not result.payload:
5572
        msg = "disk not found"
5573
      if msg:
5574
        raise errors.OpExecError("Can't find DRBD device disk/%d: %s" %
5575
                                 (idx, msg))
5576
      if result.payload[5]:
5577
        raise errors.OpExecError("DRBD device disk/%d is degraded!" % idx)
5578

    
5579
    self.proc.LogStep(6, steps_total, "removing old storage")
5580
    for idx, (dev, old_lvs, _) in iv_names.iteritems():
5581
      info("remove logical volumes for disk/%d" % idx)
5582
      for lv in old_lvs:
5583
        cfg.SetDiskID(lv, old_node)
5584
        msg = self.rpc.call_blockdev_remove(old_node, lv).RemoteFailMsg()
5585
        if msg:
5586
          warning("Can't remove LV on old secondary: %s", msg,
5587
                  hint="Cleanup stale volumes by hand")
5588

    
5589
  def Exec(self, feedback_fn):
5590
    """Execute disk replacement.
5591

5592
    This dispatches the disk replacement to the appropriate handler.
5593

5594
    """
5595
    instance = self.instance
5596

    
5597
    # Activate the instance disks if we're replacing them on a down instance
5598
    if not instance.admin_up:
5599
      _StartInstanceDisks(self, instance, True)
5600

    
5601
    if self.op.mode == constants.REPLACE_DISK_CHG:
5602
      fn = self._ExecD8Secondary
5603
    else:
5604
      fn = self._ExecD8DiskOnly
5605

    
5606
    ret = fn(feedback_fn)
5607

    
5608
    # Deactivate the instance disks if we're replacing them on a down instance
5609
    if not instance.admin_up:
5610
      _SafeShutdownInstanceDisks(self, instance)
5611

    
5612
    return ret
5613

    
5614

    
5615
class LUGrowDisk(LogicalUnit):
5616
  """Grow a disk of an instance.
5617

5618
  """
5619
  HPATH = "disk-grow"
5620
  HTYPE = constants.HTYPE_INSTANCE
5621
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
5622
  REQ_BGL = False
5623

    
5624
  def ExpandNames(self):
5625
    self._ExpandAndLockInstance()
5626
    self.needed_locks[locking.LEVEL_NODE] = []
5627
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5628

    
5629
  def DeclareLocks(self, level):
5630
    if level == locking.LEVEL_NODE:
5631
      self._LockInstancesNodes()
5632

    
5633
  def BuildHooksEnv(self):
5634
    """Build hooks env.
5635

5636
    This runs on the master, the primary and all the secondaries.
5637

5638
    """
5639
    env = {
5640
      "DISK": self.op.disk,
5641
      "AMOUNT": self.op.amount,
5642
      }
5643
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5644
    nl = [
5645
      self.cfg.GetMasterNode(),
5646
      self.instance.primary_node,
5647
      ]
5648
    return env, nl, nl
5649

    
5650
  def CheckPrereq(self):
5651
    """Check prerequisites.
5652

5653
    This checks that the instance is in the cluster.
5654

5655
    """
5656
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5657
    assert instance is not None, \
5658
      "Cannot retrieve locked instance %s" % self.op.instance_name
5659
    nodenames = list(instance.all_nodes)
5660
    for node in nodenames:
5661
      _CheckNodeOnline(self, node)
5662

    
5663

    
5664
    self.instance = instance
5665

    
5666
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
5667
      raise errors.OpPrereqError("Instance's disk layout does not support"
5668
                                 " growing.")
5669

    
5670
    self.disk = instance.FindDisk(self.op.disk)
5671

    
5672
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5673
                                       instance.hypervisor)
5674
    for node in nodenames:
5675
      info = nodeinfo[node]
5676
      if info.failed or not info.data:
5677
        raise errors.OpPrereqError("Cannot get current information"
5678
                                   " from node '%s'" % node)
5679
      vg_free = info.data.get('vg_free', None)
5680
      if not isinstance(vg_free, int):
5681
        raise errors.OpPrereqError("Can't compute free disk space on"
5682
                                   " node %s" % node)
5683
      if self.op.amount > vg_free:
5684
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
5685
                                   " %d MiB available, %d MiB required" %
5686
                                   (node, vg_free, self.op.amount))
5687

    
5688
  def Exec(self, feedback_fn):
5689
    """Execute disk grow.
5690

5691
    """
5692
    instance = self.instance
5693
    disk = self.disk
5694
    for node in instance.all_nodes:
5695
      self.cfg.SetDiskID(disk, node)
5696
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
5697
      msg = result.RemoteFailMsg()
5698
      if msg:
5699
        raise errors.OpExecError("Grow request failed to node %s: %s" %
5700
                                 (node, msg))
5701
    disk.RecordGrow(self.op.amount)
5702
    self.cfg.Update(instance)
5703
    if self.op.wait_for_sync:
5704
      disk_abort = not _WaitForSync(self, instance)
5705
      if disk_abort:
5706
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
5707
                             " status.\nPlease check the instance.")
5708

    
5709

    
5710
class LUQueryInstanceData(NoHooksLU):
5711
  """Query runtime instance data.
5712

5713
  """
5714
  _OP_REQP = ["instances", "static"]
5715
  REQ_BGL = False
5716

    
5717
  def ExpandNames(self):
5718
    self.needed_locks = {}
5719
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
5720

    
5721
    if not isinstance(self.op.instances, list):
5722
      raise errors.OpPrereqError("Invalid argument type 'instances'")
5723

    
5724
    if self.op.instances:
5725
      self.wanted_names = []
5726
      for name in self.op.instances:
5727
        full_name = self.cfg.ExpandInstanceName(name)
5728
        if full_name is None:
5729
          raise errors.OpPrereqError("Instance '%s' not known" % name)
5730
        self.wanted_names.append(full_name)
5731
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
5732
    else:
5733
      self.wanted_names = None
5734
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5735

    
5736
    self.needed_locks[locking.LEVEL_NODE] = []
5737
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5738

    
5739
  def DeclareLocks(self, level):
5740
    if level == locking.LEVEL_NODE:
5741
      self._LockInstancesNodes()
5742

    
5743
  def CheckPrereq(self):
5744
    """Check prerequisites.
5745

5746
    This only checks the optional instance list against the existing names.
5747

5748
    """
5749
    if self.wanted_names is None:
5750
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5751

    
5752
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
5753
                             in self.wanted_names]
5754
    return
5755

    
5756
  def _ComputeDiskStatus(self, instance, snode, dev):
5757
    """Compute block device status.
5758

5759
    """
5760
    static = self.op.static
5761
    if not static:
5762
      self.cfg.SetDiskID(dev, instance.primary_node)
5763
      dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
5764
      if dev_pstatus.offline:
5765
        dev_pstatus = None
5766
      else:
5767
        msg = dev_pstatus.RemoteFailMsg()
5768
        if msg:
5769
          raise errors.OpExecError("Can't compute disk status for %s: %s" %
5770
                                   (instance.name, msg))
5771
        dev_pstatus = dev_pstatus.payload
5772
    else:
5773
      dev_pstatus = None
5774

    
5775
    if dev.dev_type in constants.LDS_DRBD:
5776
      # we change the snode then (otherwise we use the one passed in)
5777
      if dev.logical_id[0] == instance.primary_node:
5778
        snode = dev.logical_id[1]
5779
      else:
5780
        snode = dev.logical_id[0]
5781

    
5782
    if snode and not static:
5783
      self.cfg.SetDiskID(dev, snode)
5784
      dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
5785
      if dev_sstatus.offline:
5786
        dev_sstatus = None
5787
      else:
5788
        msg = dev_sstatus.RemoteFailMsg()
5789
        if msg:
5790
          raise errors.OpExecError("Can't compute disk status for %s: %s" %
5791
                                   (instance.name, msg))
5792
        dev_sstatus = dev_sstatus.payload
5793
    else:
5794
      dev_sstatus = None
5795

    
5796
    if dev.children:
5797
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
5798
                      for child in dev.children]
5799
    else:
5800
      dev_children = []
5801

    
5802
    data = {
5803
      "iv_name": dev.iv_name,
5804
      "dev_type": dev.dev_type,
5805
      "logical_id": dev.logical_id,
5806
      "physical_id": dev.physical_id,
5807
      "pstatus": dev_pstatus,
5808
      "sstatus": dev_sstatus,
5809
      "children": dev_children,
5810
      "mode": dev.mode,
5811
      }
5812

    
5813
    return data
5814

    
5815
  def Exec(self, feedback_fn):
5816
    """Gather and return data"""
5817
    result = {}
5818

    
5819
    cluster = self.cfg.GetClusterInfo()
5820

    
5821
    for instance in self.wanted_instances:
5822
      if not self.op.static:
5823
        remote_info = self.rpc.call_instance_info(instance.primary_node,
5824
                                                  instance.name,
5825
                                                  instance.hypervisor)
5826
        msg = remote_info.RemoteFailMsg()
5827
        if msg:
5828
          raise errors.OpExecError("Error checking node %s: %s" %
5829
                                   (instance.primary_node, msg))
5830
        remote_info = remote_info.payload
5831
        if remote_info and "state" in remote_info:
5832
          remote_state = "up"
5833
        else:
5834
          remote_state = "down"
5835
      else:
5836
        remote_state = None
5837
      if instance.admin_up:
5838
        config_state = "up"
5839
      else:
5840
        config_state = "down"
5841

    
5842
      disks = [self._ComputeDiskStatus(instance, None, device)
5843
               for device in instance.disks]
5844

    
5845
      idict = {
5846
        "name": instance.name,
5847
        "config_state": config_state,
5848
        "run_state": remote_state,
5849
        "pnode": instance.primary_node,
5850
        "snodes": instance.secondary_nodes,
5851
        "os": instance.os,
5852
        "nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
5853
        "disks": disks,
5854
        "hypervisor": instance.hypervisor,
5855
        "network_port": instance.network_port,
5856
        "hv_instance": instance.hvparams,
5857
        "hv_actual": cluster.FillHV(instance),
5858
        "be_instance": instance.beparams,
5859
        "be_actual": cluster.FillBE(instance),
5860
        }
5861

    
5862
      result[instance.name] = idict
5863

    
5864
    return result
5865

    
5866

    
5867
class LUSetInstanceParams(LogicalUnit):
5868
  """Modifies an instances's parameters.
5869

5870
  """
5871
  HPATH = "instance-modify"
5872
  HTYPE = constants.HTYPE_INSTANCE
5873
  _OP_REQP = ["instance_name"]
5874
  REQ_BGL = False
5875

    
5876
  def CheckArguments(self):
5877
    if not hasattr(self.op, 'nics'):
5878
      self.op.nics = []
5879
    if not hasattr(self.op, 'disks'):
5880
      self.op.disks = []
5881
    if not hasattr(self.op, 'beparams'):
5882
      self.op.beparams = {}
5883
    if not hasattr(self.op, 'hvparams'):
5884
      self.op.hvparams = {}
5885
    self.op.force = getattr(self.op, "force", False)
5886
    if not (self.op.nics or self.op.disks or
5887
            self.op.hvparams or self.op.beparams):
5888
      raise errors.OpPrereqError("No changes submitted")
5889

    
5890
    # Disk validation
5891
    disk_addremove = 0
5892
    for disk_op, disk_dict in self.op.disks:
5893
      if disk_op == constants.DDM_REMOVE:
5894
        disk_addremove += 1
5895
        continue
5896
      elif disk_op == constants.DDM_ADD:
5897
        disk_addremove += 1
5898
      else:
5899
        if not isinstance(disk_op, int):
5900
          raise errors.OpPrereqError("Invalid disk index")
5901
      if disk_op == constants.DDM_ADD:
5902
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
5903
        if mode not in constants.DISK_ACCESS_SET:
5904
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
5905
        size = disk_dict.get('size', None)
5906
        if size is None:
5907
          raise errors.OpPrereqError("Required disk parameter size missing")
5908
        try:
5909
          size = int(size)
5910
        except ValueError, err:
5911
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
5912
                                     str(err))
5913
        disk_dict['size'] = size
5914
      else:
5915
        # modification of disk
5916
        if 'size' in disk_dict:
5917
          raise errors.OpPrereqError("Disk size change not possible, use"
5918
                                     " grow-disk")
5919

    
5920
    if disk_addremove > 1:
5921
      raise errors.OpPrereqError("Only one disk add or remove operation"
5922
                                 " supported at a time")
5923

    
5924
    # NIC validation
5925
    nic_addremove = 0
5926
    for nic_op, nic_dict in self.op.nics:
5927
      if nic_op == constants.DDM_REMOVE:
5928
        nic_addremove += 1
5929
        continue
5930
      elif nic_op == constants.DDM_ADD:
5931
        nic_addremove += 1
5932
      else:
5933
        if not isinstance(nic_op, int):
5934
          raise errors.OpPrereqError("Invalid nic index")
5935

    
5936
      # nic_dict should be a dict
5937
      nic_ip = nic_dict.get('ip', None)
5938
      if nic_ip is not None:
5939
        if nic_ip.lower() == constants.VALUE_NONE:
5940
          nic_dict['ip'] = None
5941
        else:
5942
          if not utils.IsValidIP(nic_ip):
5943
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
5944

    
5945
      nic_bridge = nic_dict.get('bridge', None)
5946
      nic_link = nic_dict.get('link', None)
5947
      if nic_bridge and nic_link:
5948
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link' at the same time")
5949
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
5950
        nic_dict['bridge'] = None
5951
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
5952
        nic_dict['link'] = None
5953

    
5954
      if nic_op == constants.DDM_ADD:
5955
        nic_mac = nic_dict.get('mac', None)
5956
        if nic_mac is None:
5957
          nic_dict['mac'] = constants.VALUE_AUTO
5958

    
5959
      if 'mac' in nic_dict:
5960
        nic_mac = nic_dict['mac']
5961
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5962
          if not utils.IsValidMac(nic_mac):
5963
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
5964
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
5965
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
5966
                                     " modifying an existing nic")
5967

    
5968
    if nic_addremove > 1:
5969
      raise errors.OpPrereqError("Only one NIC add or remove operation"
5970
                                 " supported at a time")
5971

    
5972
  def ExpandNames(self):
5973
    self._ExpandAndLockInstance()
5974
    self.needed_locks[locking.LEVEL_NODE] = []
5975
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5976

    
5977
  def DeclareLocks(self, level):
5978
    if level == locking.LEVEL_NODE:
5979
      self._LockInstancesNodes()
5980

    
5981
  def BuildHooksEnv(self):
5982
    """Build hooks env.
5983

5984
    This runs on the master, primary and secondaries.
5985

5986
    """
5987
    args = dict()
5988
    if constants.BE_MEMORY in self.be_new:
5989
      args['memory'] = self.be_new[constants.BE_MEMORY]
5990
    if constants.BE_VCPUS in self.be_new:
5991
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
5992
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
5993
    # information at all.
5994
    if self.op.nics:
5995
      args['nics'] = []
5996
      nic_override = dict(self.op.nics)
5997
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
5998
      for idx, nic in enumerate(self.instance.nics):
5999
        if idx in nic_override:
6000
          this_nic_override = nic_override[idx]
6001
        else:
6002
          this_nic_override = {}
6003
        if 'ip' in this_nic_override:
6004
          ip = this_nic_override['ip']
6005
        else:
6006
          ip = nic.ip
6007
        if 'mac' in this_nic_override:
6008
          mac = this_nic_override['mac']
6009
        else:
6010
          mac = nic.mac
6011
        if idx in self.nic_pnew:
6012
          nicparams = self.nic_pnew[idx]
6013
        else:
6014
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
6015
        mode = nicparams[constants.NIC_MODE]
6016
        link = nicparams[constants.NIC_LINK]
6017
        args['nics'].append((ip, mac, mode, link))
6018
      if constants.DDM_ADD in nic_override:
6019
        ip = nic_override[constants.DDM_ADD].get('ip', None)
6020
        mac = nic_override[constants.DDM_ADD]['mac']
6021
        nicparams = self.nic_pnew[constants.DDM_ADD]
6022
        mode = nicparams[constants.NIC_MODE]
6023
        link = nicparams[constants.NIC_LINK]
6024
        args['nics'].append((ip, mac, mode, link))
6025
      elif constants.DDM_REMOVE in nic_override:
6026
        del args['nics'][-1]
6027

    
6028
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
6029
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6030
    return env, nl, nl
6031

    
6032
  def _GetUpdatedParams(self, old_params, update_dict,
6033
                        default_values, parameter_types):
6034
    """Return the new params dict for the given params.
6035

6036
    @type old_params: dict
6037
    @type old_params: old parameters
6038
    @type update_dict: dict
6039
    @type update_dict: dict containing new parameter values,
6040
                       or constants.VALUE_DEFAULT to reset the
6041
                       parameter to its default value
6042
    @type default_values: dict
6043
    @param default_values: default values for the filled parameters
6044
    @type parameter_types: dict
6045
    @param parameter_types: dict mapping target dict keys to types
6046
                            in constants.ENFORCEABLE_TYPES
6047
    @rtype: (dict, dict)
6048
    @return: (new_parameters, filled_parameters)
6049

6050
    """
6051
    params_copy = copy.deepcopy(old_params)
6052
    for key, val in update_dict.iteritems():
6053
      if val == constants.VALUE_DEFAULT:
6054
        try:
6055
          del params_copy[key]
6056
        except KeyError:
6057
          pass
6058
      else:
6059
        params_copy[key] = val
6060
    utils.ForceDictType(params_copy, parameter_types)
6061
    params_filled = objects.FillDict(default_values, params_copy)
6062
    return (params_copy, params_filled)
6063

    
6064
  def CheckPrereq(self):
6065
    """Check prerequisites.
6066

6067
    This only checks the instance list against the existing names.
6068

6069
    """
6070
    force = self.force = self.op.force
6071

    
6072
    # checking the new params on the primary/secondary nodes
6073

    
6074
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6075
    cluster = self.cluster = self.cfg.GetClusterInfo()
6076
    assert self.instance is not None, \
6077
      "Cannot retrieve locked instance %s" % self.op.instance_name
6078
    pnode = instance.primary_node
6079
    nodelist = list(instance.all_nodes)
6080

    
6081
    # hvparams processing
6082
    if self.op.hvparams:
6083
      i_hvdict, hv_new = self._GetUpdatedParams(
6084
                             instance.hvparams, self.op.hvparams,
6085
                             cluster.hvparams[instance.hypervisor],
6086
                             constants.HVS_PARAMETER_TYPES)
6087
      # local check
6088
      hypervisor.GetHypervisor(
6089
        instance.hypervisor).CheckParameterSyntax(hv_new)
6090
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
6091
      self.hv_new = hv_new # the new actual values
6092
      self.hv_inst = i_hvdict # the new dict (without defaults)
6093
    else:
6094
      self.hv_new = self.hv_inst = {}
6095

    
6096
    # beparams processing
6097
    if self.op.beparams:
6098
      i_bedict, be_new = self._GetUpdatedParams(
6099
                             instance.beparams, self.op.beparams,
6100
                             cluster.beparams[constants.PP_DEFAULT],
6101
                             constants.BES_PARAMETER_TYPES)
6102
      self.be_new = be_new # the new actual values
6103
      self.be_inst = i_bedict # the new dict (without defaults)
6104
    else:
6105
      self.be_new = self.be_inst = {}
6106

    
6107
    self.warn = []
6108

    
6109
    if constants.BE_MEMORY in self.op.beparams and not self.force:
6110
      mem_check_list = [pnode]
6111
      if be_new[constants.BE_AUTO_BALANCE]:
6112
        # either we changed auto_balance to yes or it was from before
6113
        mem_check_list.extend(instance.secondary_nodes)
6114
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
6115
                                                  instance.hypervisor)
6116
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
6117
                                         instance.hypervisor)
6118
      if nodeinfo[pnode].failed or not isinstance(nodeinfo[pnode].data, dict):
6119
        # Assume the primary node is unreachable and go ahead
6120
        self.warn.append("Can't get info from primary node %s" % pnode)
6121
      elif instance_info.RemoteFailMsg():
6122
        self.warn.append("Can't get instance runtime information: %s" %
6123
                        instance_info.RemoteFailMsg())
6124
      else:
6125
        if instance_info.payload:
6126
          current_mem = int(instance_info.payload['memory'])
6127
        else:
6128
          # Assume instance not running
6129
          # (there is a slight race condition here, but it's not very probable,
6130
          # and we have no other way to check)
6131
          current_mem = 0
6132
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
6133
                    nodeinfo[pnode].data['memory_free'])
6134
        if miss_mem > 0:
6135
          raise errors.OpPrereqError("This change will prevent the instance"
6136
                                     " from starting, due to %d MB of memory"
6137
                                     " missing on its primary node" % miss_mem)
6138

    
6139
      if be_new[constants.BE_AUTO_BALANCE]:
6140
        for node, nres in nodeinfo.iteritems():
6141
          if node not in instance.secondary_nodes:
6142
            continue
6143
          if nres.failed or not isinstance(nres.data, dict):
6144
            self.warn.append("Can't get info from secondary node %s" % node)
6145
          elif be_new[constants.BE_MEMORY] > nres.data['memory_free']:
6146
            self.warn.append("Not enough memory to failover instance to"
6147
                             " secondary node %s" % node)
6148

    
6149
    # NIC processing
6150
    self.nic_pnew = {}
6151
    self.nic_pinst = {}
6152
    for nic_op, nic_dict in self.op.nics:
6153
      if nic_op == constants.DDM_REMOVE:
6154
        if not instance.nics:
6155
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
6156
        continue
6157
      if nic_op != constants.DDM_ADD:
6158
        # an existing nic
6159
        if nic_op < 0 or nic_op >= len(instance.nics):
6160
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
6161
                                     " are 0 to %d" %
6162
                                     (nic_op, len(instance.nics)))
6163
        old_nic_params = instance.nics[nic_op].nicparams
6164
        old_nic_ip = instance.nics[nic_op].ip
6165
      else:
6166
        old_nic_params = {}
6167
        old_nic_ip = None
6168

    
6169
      update_params_dict = dict([(key, nic_dict[key])
6170
                                 for key in constants.NICS_PARAMETERS
6171
                                 if key in nic_dict])
6172

    
6173
      if 'bridge' in nic_dict:
6174
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
6175

    
6176
      new_nic_params, new_filled_nic_params = \
6177
          self._GetUpdatedParams(old_nic_params, update_params_dict,
6178
                                 cluster.nicparams[constants.PP_DEFAULT],
6179
                                 constants.NICS_PARAMETER_TYPES)
6180
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
6181
      self.nic_pinst[nic_op] = new_nic_params
6182
      self.nic_pnew[nic_op] = new_filled_nic_params
6183
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
6184

    
6185
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
6186
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
6187
        result = self.rpc.call_bridges_exist(pnode, [nic_bridge])
6188
        msg = result.RemoteFailMsg()
6189
        if msg:
6190
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
6191
          if self.force:
6192
            self.warn.append(msg)
6193
          else:
6194
            raise errors.OpPrereqError(msg)
6195
      if new_nic_mode == constants.NIC_MODE_ROUTED:
6196
        if 'ip' in nic_dict:
6197
          nic_ip = nic_dict['ip']
6198
        else:
6199
          nic_ip = old_nic_ip
6200
        if nic_ip is None:
6201
          raise errors.OpPrereqError('Cannot set the nic ip to None'
6202
                                     ' on a routed nic')
6203
      if 'mac' in nic_dict:
6204
        nic_mac = nic_dict['mac']
6205
        if nic_mac is None:
6206
          raise errors.OpPrereqError('Cannot set the nic mac to None')
6207
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6208
          # otherwise generate the mac
6209
          nic_dict['mac'] = self.cfg.GenerateMAC()
6210
        else:
6211
          # or validate/reserve the current one
6212
          if self.cfg.IsMacInUse(nic_mac):
6213
            raise errors.OpPrereqError("MAC address %s already in use"
6214
                                       " in cluster" % nic_mac)
6215

    
6216
    # DISK processing
6217
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
6218
      raise errors.OpPrereqError("Disk operations not supported for"
6219
                                 " diskless instances")
6220
    for disk_op, disk_dict in self.op.disks:
6221
      if disk_op == constants.DDM_REMOVE:
6222
        if len(instance.disks) == 1:
6223
          raise errors.OpPrereqError("Cannot remove the last disk of"
6224
                                     " an instance")
6225
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
6226
        ins_l = ins_l[pnode]
6227
        msg = ins_l.RemoteFailMsg()
6228
        if msg:
6229
          raise errors.OpPrereqError("Can't contact node %s: %s" %
6230
                                     (pnode, msg))
6231
        if instance.name in ins_l.payload:
6232
          raise errors.OpPrereqError("Instance is running, can't remove"
6233
                                     " disks.")
6234

    
6235
      if (disk_op == constants.DDM_ADD and
6236
          len(instance.nics) >= constants.MAX_DISKS):
6237
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
6238
                                   " add more" % constants.MAX_DISKS)
6239
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
6240
        # an existing disk
6241
        if disk_op < 0 or disk_op >= len(instance.disks):
6242
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
6243
                                     " are 0 to %d" %
6244
                                     (disk_op, len(instance.disks)))
6245

    
6246
    return
6247

    
6248
  def Exec(self, feedback_fn):
6249
    """Modifies an instance.
6250

6251
    All parameters take effect only at the next restart of the instance.
6252

6253
    """
6254
    # Process here the warnings from CheckPrereq, as we don't have a
6255
    # feedback_fn there.
6256
    for warn in self.warn:
6257
      feedback_fn("WARNING: %s" % warn)
6258

    
6259
    result = []
6260
    instance = self.instance
6261
    cluster = self.cluster
6262
    # disk changes
6263
    for disk_op, disk_dict in self.op.disks:
6264
      if disk_op == constants.DDM_REMOVE:
6265
        # remove the last disk
6266
        device = instance.disks.pop()
6267
        device_idx = len(instance.disks)
6268
        for node, disk in device.ComputeNodeTree(instance.primary_node):
6269
          self.cfg.SetDiskID(disk, node)
6270
          msg = self.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
6271
          if msg:
6272
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
6273
                            " continuing anyway", device_idx, node, msg)
6274
        result.append(("disk/%d" % device_idx, "remove"))
6275
      elif disk_op == constants.DDM_ADD:
6276
        # add a new disk
6277
        if instance.disk_template == constants.DT_FILE:
6278
          file_driver, file_path = instance.disks[0].logical_id
6279
          file_path = os.path.dirname(file_path)
6280
        else:
6281
          file_driver = file_path = None
6282
        disk_idx_base = len(instance.disks)
6283
        new_disk = _GenerateDiskTemplate(self,
6284
                                         instance.disk_template,
6285
                                         instance.name, instance.primary_node,
6286
                                         instance.secondary_nodes,
6287
                                         [disk_dict],
6288
                                         file_path,
6289
                                         file_driver,
6290
                                         disk_idx_base)[0]
6291
        instance.disks.append(new_disk)
6292
        info = _GetInstanceInfoText(instance)
6293

    
6294
        logging.info("Creating volume %s for instance %s",
6295
                     new_disk.iv_name, instance.name)
6296
        # Note: this needs to be kept in sync with _CreateDisks
6297
        #HARDCODE
6298
        for node in instance.all_nodes:
6299
          f_create = node == instance.primary_node
6300
          try:
6301
            _CreateBlockDev(self, node, instance, new_disk,
6302
                            f_create, info, f_create)
6303
          except errors.OpExecError, err:
6304
            self.LogWarning("Failed to create volume %s (%s) on"
6305
                            " node %s: %s",
6306
                            new_disk.iv_name, new_disk, node, err)
6307
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
6308
                       (new_disk.size, new_disk.mode)))
6309
      else:
6310
        # change a given disk
6311
        instance.disks[disk_op].mode = disk_dict['mode']
6312
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
6313
    # NIC changes
6314
    for nic_op, nic_dict in self.op.nics:
6315
      if nic_op == constants.DDM_REMOVE:
6316
        # remove the last nic
6317
        del instance.nics[-1]
6318
        result.append(("nic.%d" % len(instance.nics), "remove"))
6319
      elif nic_op == constants.DDM_ADD:
6320
        # mac and bridge should be set, by now
6321
        mac = nic_dict['mac']
6322
        ip = nic_dict.get('ip', None)
6323
        nicparams = self.nic_pinst[constants.DDM_ADD]
6324
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
6325
        instance.nics.append(new_nic)
6326
        result.append(("nic.%d" % (len(instance.nics) - 1),
6327
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
6328
                       (new_nic.mac, new_nic.ip,
6329
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
6330
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
6331
                       )))
6332
      else:
6333
        for key in 'mac', 'ip':
6334
          if key in nic_dict:
6335
            setattr(instance.nics[nic_op], key, nic_dict[key])
6336
        if nic_op in self.nic_pnew:
6337
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
6338
        for key, val in nic_dict.iteritems():
6339
          result.append(("nic.%s/%d" % (key, nic_op), val))
6340

    
6341
    # hvparams changes
6342
    if self.op.hvparams:
6343
      instance.hvparams = self.hv_inst
6344
      for key, val in self.op.hvparams.iteritems():
6345
        result.append(("hv/%s" % key, val))
6346

    
6347
    # beparams changes
6348
    if self.op.beparams:
6349
      instance.beparams = self.be_inst
6350
      for key, val in self.op.beparams.iteritems():
6351
        result.append(("be/%s" % key, val))
6352

    
6353
    self.cfg.Update(instance)
6354

    
6355
    return result
6356

    
6357

    
6358
class LUQueryExports(NoHooksLU):
6359
  """Query the exports list
6360

6361
  """
6362
  _OP_REQP = ['nodes']
6363
  REQ_BGL = False
6364

    
6365
  def ExpandNames(self):
6366
    self.needed_locks = {}
6367
    self.share_locks[locking.LEVEL_NODE] = 1
6368
    if not self.op.nodes:
6369
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6370
    else:
6371
      self.needed_locks[locking.LEVEL_NODE] = \
6372
        _GetWantedNodes(self, self.op.nodes)
6373

    
6374
  def CheckPrereq(self):
6375
    """Check prerequisites.
6376

6377
    """
6378
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
6379

    
6380
  def Exec(self, feedback_fn):
6381
    """Compute the list of all the exported system images.
6382

6383
    @rtype: dict
6384
    @return: a dictionary with the structure node->(export-list)
6385
        where export-list is a list of the instances exported on
6386
        that node.
6387

6388
    """
6389
    rpcresult = self.rpc.call_export_list(self.nodes)
6390
    result = {}
6391
    for node in rpcresult:
6392
      if rpcresult[node].RemoteFailMsg():
6393
        result[node] = False
6394
      else:
6395
        result[node] = rpcresult[node].payload
6396

    
6397
    return result
6398

    
6399

    
6400
class LUExportInstance(LogicalUnit):
6401
  """Export an instance to an image in the cluster.
6402

6403
  """
6404
  HPATH = "instance-export"
6405
  HTYPE = constants.HTYPE_INSTANCE
6406
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
6407
  REQ_BGL = False
6408

    
6409
  def ExpandNames(self):
6410
    self._ExpandAndLockInstance()
6411
    # FIXME: lock only instance primary and destination node
6412
    #
6413
    # Sad but true, for now we have do lock all nodes, as we don't know where
6414
    # the previous export might be, and and in this LU we search for it and
6415
    # remove it from its current node. In the future we could fix this by:
6416
    #  - making a tasklet to search (share-lock all), then create the new one,
6417
    #    then one to remove, after
6418
    #  - removing the removal operation altoghether
6419
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6420

    
6421
  def DeclareLocks(self, level):
6422
    """Last minute lock declaration."""
6423
    # All nodes are locked anyway, so nothing to do here.
6424

    
6425
  def BuildHooksEnv(self):
6426
    """Build hooks env.
6427

6428
    This will run on the master, primary node and target node.
6429

6430
    """
6431
    env = {
6432
      "EXPORT_NODE": self.op.target_node,
6433
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
6434
      }
6435
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6436
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
6437
          self.op.target_node]
6438
    return env, nl, nl
6439

    
6440
  def CheckPrereq(self):
6441
    """Check prerequisites.
6442

6443
    This checks that the instance and node names are valid.
6444

6445
    """
6446
    instance_name = self.op.instance_name
6447
    self.instance = self.cfg.GetInstanceInfo(instance_name)
6448
    assert self.instance is not None, \
6449
          "Cannot retrieve locked instance %s" % self.op.instance_name
6450
    _CheckNodeOnline(self, self.instance.primary_node)
6451

    
6452
    self.dst_node = self.cfg.GetNodeInfo(
6453
      self.cfg.ExpandNodeName(self.op.target_node))
6454

    
6455
    if self.dst_node is None:
6456
      # This is wrong node name, not a non-locked node
6457
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
6458
    _CheckNodeOnline(self, self.dst_node.name)
6459
    _CheckNodeNotDrained(self, self.dst_node.name)
6460

    
6461
    # instance disk type verification
6462
    for disk in self.instance.disks:
6463
      if disk.dev_type == constants.LD_FILE:
6464
        raise errors.OpPrereqError("Export not supported for instances with"
6465
                                   " file-based disks")
6466

    
6467
  def Exec(self, feedback_fn):
6468
    """Export an instance to an image in the cluster.
6469

6470
    """
6471
    instance = self.instance
6472
    dst_node = self.dst_node
6473
    src_node = instance.primary_node
6474
    if self.op.shutdown:
6475
      # shutdown the instance, but not the disks
6476
      result = self.rpc.call_instance_shutdown(src_node, instance)
6477
      msg = result.RemoteFailMsg()
6478
      if msg:
6479
        raise errors.OpExecError("Could not shutdown instance %s on"
6480
                                 " node %s: %s" %
6481
                                 (instance.name, src_node, msg))
6482

    
6483
    vgname = self.cfg.GetVGName()
6484

    
6485
    snap_disks = []
6486

    
6487
    # set the disks ID correctly since call_instance_start needs the
6488
    # correct drbd minor to create the symlinks
6489
    for disk in instance.disks:
6490
      self.cfg.SetDiskID(disk, src_node)
6491

    
6492
    try:
6493
      for disk in instance.disks:
6494
        # result.payload will be a snapshot of an lvm leaf of the one we passed
6495
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
6496
        msg = result.RemoteFailMsg()
6497
        if msg:
6498
          self.LogWarning("Could not snapshot block device %s on node %s: %s",
6499
                          disk.logical_id[1], src_node, msg)
6500
          snap_disks.append(False)
6501
        else:
6502
          disk_id = (vgname, result.payload)
6503
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
6504
                                 logical_id=disk_id, physical_id=disk_id,
6505
                                 iv_name=disk.iv_name)
6506
          snap_disks.append(new_dev)
6507

    
6508
    finally:
6509
      if self.op.shutdown and instance.admin_up:
6510
        result = self.rpc.call_instance_start(src_node, instance, None, None)
6511
        msg = result.RemoteFailMsg()
6512
        if msg:
6513
          _ShutdownInstanceDisks(self, instance)
6514
          raise errors.OpExecError("Could not start instance: %s" % msg)
6515

    
6516
    # TODO: check for size
6517

    
6518
    cluster_name = self.cfg.GetClusterName()
6519
    for idx, dev in enumerate(snap_disks):
6520
      if dev:
6521
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
6522
                                               instance, cluster_name, idx)
6523
        msg = result.RemoteFailMsg()
6524
        if msg:
6525
          self.LogWarning("Could not export block device %s from node %s to"
6526
                          " node %s: %s", dev.logical_id[1], src_node,
6527
                          dst_node.name, msg)
6528
        msg = self.rpc.call_blockdev_remove(src_node, dev).RemoteFailMsg()
6529
        if msg:
6530
          self.LogWarning("Could not remove snapshot block device %s from node"
6531
                          " %s: %s", dev.logical_id[1], src_node, msg)
6532

    
6533
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
6534
    msg = result.RemoteFailMsg()
6535
    if msg:
6536
      self.LogWarning("Could not finalize export for instance %s"
6537
                      " on node %s: %s", instance.name, dst_node.name, msg)
6538

    
6539
    nodelist = self.cfg.GetNodeList()
6540
    nodelist.remove(dst_node.name)
6541

    
6542
    # on one-node clusters nodelist will be empty after the removal
6543
    # if we proceed the backup would be removed because OpQueryExports
6544
    # substitutes an empty list with the full cluster node list.
6545
    iname = instance.name
6546
    if nodelist:
6547
      exportlist = self.rpc.call_export_list(nodelist)
6548
      for node in exportlist:
6549
        if exportlist[node].RemoteFailMsg():
6550
          continue
6551
        if iname in exportlist[node].payload:
6552
          msg = self.rpc.call_export_remove(node, iname).RemoteFailMsg()
6553
          if msg:
6554
            self.LogWarning("Could not remove older export for instance %s"
6555
                            " on node %s: %s", iname, node, msg)
6556

    
6557

    
6558
class LURemoveExport(NoHooksLU):
6559
  """Remove exports related to the named instance.
6560

6561
  """
6562
  _OP_REQP = ["instance_name"]
6563
  REQ_BGL = False
6564

    
6565
  def ExpandNames(self):
6566
    self.needed_locks = {}
6567
    # We need all nodes to be locked in order for RemoveExport to work, but we
6568
    # don't need to lock the instance itself, as nothing will happen to it (and
6569
    # we can remove exports also for a removed instance)
6570
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6571

    
6572
  def CheckPrereq(self):
6573
    """Check prerequisites.
6574
    """
6575
    pass
6576

    
6577
  def Exec(self, feedback_fn):
6578
    """Remove any export.
6579

6580
    """
6581
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
6582
    # If the instance was not found we'll try with the name that was passed in.
6583
    # This will only work if it was an FQDN, though.
6584
    fqdn_warn = False
6585
    if not instance_name:
6586
      fqdn_warn = True
6587
      instance_name = self.op.instance_name
6588

    
6589
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6590
    exportlist = self.rpc.call_export_list(locked_nodes)
6591
    found = False
6592
    for node in exportlist:
6593
      msg = exportlist[node].RemoteFailMsg()
6594
      if msg:
6595
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
6596
        continue
6597
      if instance_name in exportlist[node].payload:
6598
        found = True
6599
        result = self.rpc.call_export_remove(node, instance_name)
6600
        msg = result.RemoteFailMsg()
6601
        if msg:
6602
          logging.error("Could not remove export for instance %s"
6603
                        " on node %s: %s", instance_name, node, msg)
6604

    
6605
    if fqdn_warn and not found:
6606
      feedback_fn("Export not found. If trying to remove an export belonging"
6607
                  " to a deleted instance please use its Fully Qualified"
6608
                  " Domain Name.")
6609

    
6610

    
6611
class TagsLU(NoHooksLU):
6612
  """Generic tags LU.
6613

6614
  This is an abstract class which is the parent of all the other tags LUs.
6615

6616
  """
6617

    
6618
  def ExpandNames(self):
6619
    self.needed_locks = {}
6620
    if self.op.kind == constants.TAG_NODE:
6621
      name = self.cfg.ExpandNodeName(self.op.name)
6622
      if name is None:
6623
        raise errors.OpPrereqError("Invalid node name (%s)" %
6624
                                   (self.op.name,))
6625
      self.op.name = name
6626
      self.needed_locks[locking.LEVEL_NODE] = name
6627
    elif self.op.kind == constants.TAG_INSTANCE:
6628
      name = self.cfg.ExpandInstanceName(self.op.name)
6629
      if name is None:
6630
        raise errors.OpPrereqError("Invalid instance name (%s)" %
6631
                                   (self.op.name,))
6632
      self.op.name = name
6633
      self.needed_locks[locking.LEVEL_INSTANCE] = name
6634

    
6635
  def CheckPrereq(self):
6636
    """Check prerequisites.
6637

6638
    """
6639
    if self.op.kind == constants.TAG_CLUSTER:
6640
      self.target = self.cfg.GetClusterInfo()
6641
    elif self.op.kind == constants.TAG_NODE:
6642
      self.target = self.cfg.GetNodeInfo(self.op.name)
6643
    elif self.op.kind == constants.TAG_INSTANCE:
6644
      self.target = self.cfg.GetInstanceInfo(self.op.name)
6645
    else:
6646
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
6647
                                 str(self.op.kind))
6648

    
6649

    
6650
class LUGetTags(TagsLU):
6651
  """Returns the tags of a given object.
6652

6653
  """
6654
  _OP_REQP = ["kind", "name"]
6655
  REQ_BGL = False
6656

    
6657
  def Exec(self, feedback_fn):
6658
    """Returns the tag list.
6659

6660
    """
6661
    return list(self.target.GetTags())
6662

    
6663

    
6664
class LUSearchTags(NoHooksLU):
6665
  """Searches the tags for a given pattern.
6666

6667
  """
6668
  _OP_REQP = ["pattern"]
6669
  REQ_BGL = False
6670

    
6671
  def ExpandNames(self):
6672
    self.needed_locks = {}
6673

    
6674
  def CheckPrereq(self):
6675
    """Check prerequisites.
6676

6677
    This checks the pattern passed for validity by compiling it.
6678

6679
    """
6680
    try:
6681
      self.re = re.compile(self.op.pattern)
6682
    except re.error, err:
6683
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
6684
                                 (self.op.pattern, err))
6685

    
6686
  def Exec(self, feedback_fn):
6687
    """Returns the tag list.
6688

6689
    """
6690
    cfg = self.cfg
6691
    tgts = [("/cluster", cfg.GetClusterInfo())]
6692
    ilist = cfg.GetAllInstancesInfo().values()
6693
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
6694
    nlist = cfg.GetAllNodesInfo().values()
6695
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
6696
    results = []
6697
    for path, target in tgts:
6698
      for tag in target.GetTags():
6699
        if self.re.search(tag):
6700
          results.append((path, tag))
6701
    return results
6702

    
6703

    
6704
class LUAddTags(TagsLU):
6705
  """Sets a tag on a given object.
6706

6707
  """
6708
  _OP_REQP = ["kind", "name", "tags"]
6709
  REQ_BGL = False
6710

    
6711
  def CheckPrereq(self):
6712
    """Check prerequisites.
6713

6714
    This checks the type and length of the tag name and value.
6715

6716
    """
6717
    TagsLU.CheckPrereq(self)
6718
    for tag in self.op.tags:
6719
      objects.TaggableObject.ValidateTag(tag)
6720

    
6721
  def Exec(self, feedback_fn):
6722
    """Sets the tag.
6723

6724
    """
6725
    try:
6726
      for tag in self.op.tags:
6727
        self.target.AddTag(tag)
6728
    except errors.TagError, err:
6729
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
6730
    try:
6731
      self.cfg.Update(self.target)
6732
    except errors.ConfigurationError:
6733
      raise errors.OpRetryError("There has been a modification to the"
6734
                                " config file and the operation has been"
6735
                                " aborted. Please retry.")
6736

    
6737

    
6738
class LUDelTags(TagsLU):
6739
  """Delete a list of tags from a given object.
6740

6741
  """
6742
  _OP_REQP = ["kind", "name", "tags"]
6743
  REQ_BGL = False
6744

    
6745
  def CheckPrereq(self):
6746
    """Check prerequisites.
6747

6748
    This checks that we have the given tag.
6749

6750
    """
6751
    TagsLU.CheckPrereq(self)
6752
    for tag in self.op.tags:
6753
      objects.TaggableObject.ValidateTag(tag)
6754
    del_tags = frozenset(self.op.tags)
6755
    cur_tags = self.target.GetTags()
6756
    if not del_tags <= cur_tags:
6757
      diff_tags = del_tags - cur_tags
6758
      diff_names = ["'%s'" % tag for tag in diff_tags]
6759
      diff_names.sort()
6760
      raise errors.OpPrereqError("Tag(s) %s not found" %
6761
                                 (",".join(diff_names)))
6762

    
6763
  def Exec(self, feedback_fn):
6764
    """Remove the tag from the object.
6765

6766
    """
6767
    for tag in self.op.tags:
6768
      self.target.RemoveTag(tag)
6769
    try:
6770
      self.cfg.Update(self.target)
6771
    except errors.ConfigurationError:
6772
      raise errors.OpRetryError("There has been a modification to the"
6773
                                " config file and the operation has been"
6774
                                " aborted. Please retry.")
6775

    
6776

    
6777
class LUTestDelay(NoHooksLU):
6778
  """Sleep for a specified amount of time.
6779

6780
  This LU sleeps on the master and/or nodes for a specified amount of
6781
  time.
6782

6783
  """
6784
  _OP_REQP = ["duration", "on_master", "on_nodes"]
6785
  REQ_BGL = False
6786

    
6787
  def ExpandNames(self):
6788
    """Expand names and set required locks.
6789

6790
    This expands the node list, if any.
6791

6792
    """
6793
    self.needed_locks = {}
6794
    if self.op.on_nodes:
6795
      # _GetWantedNodes can be used here, but is not always appropriate to use
6796
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
6797
      # more information.
6798
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
6799
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
6800

    
6801
  def CheckPrereq(self):
6802
    """Check prerequisites.
6803

6804
    """
6805

    
6806
  def Exec(self, feedback_fn):
6807
    """Do the actual sleep.
6808

6809
    """
6810
    if self.op.on_master:
6811
      if not utils.TestDelay(self.op.duration):
6812
        raise errors.OpExecError("Error during master delay test")
6813
    if self.op.on_nodes:
6814
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
6815
      if not result:
6816
        raise errors.OpExecError("Complete failure from rpc call")
6817
      for node, node_result in result.items():
6818
        node_result.Raise()
6819
        if not node_result.data:
6820
          raise errors.OpExecError("Failure during rpc call to node %s,"
6821
                                   " result: %s" % (node, node_result.data))
6822

    
6823

    
6824
class IAllocator(object):
6825
  """IAllocator framework.
6826

6827
  An IAllocator instance has three sets of attributes:
6828
    - cfg that is needed to query the cluster
6829
    - input data (all members of the _KEYS class attribute are required)
6830
    - four buffer attributes (in|out_data|text), that represent the
6831
      input (to the external script) in text and data structure format,
6832
      and the output from it, again in two formats
6833
    - the result variables from the script (success, info, nodes) for
6834
      easy usage
6835

6836
  """
6837
  _ALLO_KEYS = [
6838
    "mem_size", "disks", "disk_template",
6839
    "os", "tags", "nics", "vcpus", "hypervisor",
6840
    ]
6841
  _RELO_KEYS = [
6842
    "relocate_from",
6843
    ]
6844

    
6845
  def __init__(self, lu, mode, name, **kwargs):
6846
    self.lu = lu
6847
    # init buffer variables
6848
    self.in_text = self.out_text = self.in_data = self.out_data = None
6849
    # init all input fields so that pylint is happy
6850
    self.mode = mode
6851
    self.name = name
6852
    self.mem_size = self.disks = self.disk_template = None
6853
    self.os = self.tags = self.nics = self.vcpus = None
6854
    self.hypervisor = None
6855
    self.relocate_from = None
6856
    # computed fields
6857
    self.required_nodes = None
6858
    # init result fields
6859
    self.success = self.info = self.nodes = None
6860
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6861
      keyset = self._ALLO_KEYS
6862
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6863
      keyset = self._RELO_KEYS
6864
    else:
6865
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
6866
                                   " IAllocator" % self.mode)
6867
    for key in kwargs:
6868
      if key not in keyset:
6869
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
6870
                                     " IAllocator" % key)
6871
      setattr(self, key, kwargs[key])
6872
    for key in keyset:
6873
      if key not in kwargs:
6874
        raise errors.ProgrammerError("Missing input parameter '%s' to"
6875
                                     " IAllocator" % key)
6876
    self._BuildInputData()
6877

    
6878
  def _ComputeClusterData(self):
6879
    """Compute the generic allocator input data.
6880

6881
    This is the data that is independent of the actual operation.
6882

6883
    """
6884
    cfg = self.lu.cfg
6885
    cluster_info = cfg.GetClusterInfo()
6886
    # cluster data
6887
    data = {
6888
      "version": constants.IALLOCATOR_VERSION,
6889
      "cluster_name": cfg.GetClusterName(),
6890
      "cluster_tags": list(cluster_info.GetTags()),
6891
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
6892
      # we don't have job IDs
6893
      }
6894
    iinfo = cfg.GetAllInstancesInfo().values()
6895
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
6896

    
6897
    # node data
6898
    node_results = {}
6899
    node_list = cfg.GetNodeList()
6900

    
6901
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6902
      hypervisor_name = self.hypervisor
6903
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6904
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
6905

    
6906
    node_data = self.lu.rpc.call_node_info(node_list, cfg.GetVGName(),
6907
                                           hypervisor_name)
6908
    node_iinfo = self.lu.rpc.call_all_instances_info(node_list,
6909
                       cluster_info.enabled_hypervisors)
6910
    for nname, nresult in node_data.items():
6911
      # first fill in static (config-based) values
6912
      ninfo = cfg.GetNodeInfo(nname)
6913
      pnr = {
6914
        "tags": list(ninfo.GetTags()),
6915
        "primary_ip": ninfo.primary_ip,
6916
        "secondary_ip": ninfo.secondary_ip,
6917
        "offline": ninfo.offline,
6918
        "drained": ninfo.drained,
6919
        "master_candidate": ninfo.master_candidate,
6920
        }
6921

    
6922
      if not ninfo.offline:
6923
        nresult.Raise()
6924
        if not isinstance(nresult.data, dict):
6925
          raise errors.OpExecError("Can't get data for node %s" % nname)
6926
        msg = node_iinfo[nname].RemoteFailMsg()
6927
        if msg:
6928
          raise errors.OpExecError("Can't get node instance info"
6929
                                   " from node %s: %s" % (nname, msg))
6930
        remote_info = nresult.data
6931
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
6932
                     'vg_size', 'vg_free', 'cpu_total']:
6933
          if attr not in remote_info:
6934
            raise errors.OpExecError("Node '%s' didn't return attribute"
6935
                                     " '%s'" % (nname, attr))
6936
          try:
6937
            remote_info[attr] = int(remote_info[attr])
6938
          except ValueError, err:
6939
            raise errors.OpExecError("Node '%s' returned invalid value"
6940
                                     " for '%s': %s" % (nname, attr, err))
6941
        # compute memory used by primary instances
6942
        i_p_mem = i_p_up_mem = 0
6943
        for iinfo, beinfo in i_list:
6944
          if iinfo.primary_node == nname:
6945
            i_p_mem += beinfo[constants.BE_MEMORY]
6946
            if iinfo.name not in node_iinfo[nname].payload:
6947
              i_used_mem = 0
6948
            else:
6949
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
6950
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
6951
            remote_info['memory_free'] -= max(0, i_mem_diff)
6952

    
6953
            if iinfo.admin_up:
6954
              i_p_up_mem += beinfo[constants.BE_MEMORY]
6955

    
6956
        # compute memory used by instances
6957
        pnr_dyn = {
6958
          "total_memory": remote_info['memory_total'],
6959
          "reserved_memory": remote_info['memory_dom0'],
6960
          "free_memory": remote_info['memory_free'],
6961
          "total_disk": remote_info['vg_size'],
6962
          "free_disk": remote_info['vg_free'],
6963
          "total_cpus": remote_info['cpu_total'],
6964
          "i_pri_memory": i_p_mem,
6965
          "i_pri_up_memory": i_p_up_mem,
6966
          }
6967
        pnr.update(pnr_dyn)
6968

    
6969
      node_results[nname] = pnr
6970
    data["nodes"] = node_results
6971

    
6972
    # instance data
6973
    instance_data = {}
6974
    for iinfo, beinfo in i_list:
6975
      nic_data = []
6976
      for nic in iinfo.nics:
6977
        filled_params = objects.FillDict(
6978
            cluster_info.nicparams[constants.PP_DEFAULT],
6979
            nic.nicparams)
6980
        nic_dict = {"mac": nic.mac,
6981
                    "ip": nic.ip,
6982
                    "mode": filled_params[constants.NIC_MODE],
6983
                    "link": filled_params[constants.NIC_LINK],
6984
                   }
6985
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
6986
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
6987
        nic_data.append(nic_dict)
6988
      pir = {
6989
        "tags": list(iinfo.GetTags()),
6990
        "admin_up": iinfo.admin_up,
6991
        "vcpus": beinfo[constants.BE_VCPUS],
6992
        "memory": beinfo[constants.BE_MEMORY],
6993
        "os": iinfo.os,
6994
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
6995
        "nics": nic_data,
6996
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
6997
        "disk_template": iinfo.disk_template,
6998
        "hypervisor": iinfo.hypervisor,
6999
        }
7000
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
7001
                                                 pir["disks"])
7002
      instance_data[iinfo.name] = pir
7003

    
7004
    data["instances"] = instance_data
7005

    
7006
    self.in_data = data
7007

    
7008
  def _AddNewInstance(self):
7009
    """Add new instance data to allocator structure.
7010

7011
    This in combination with _AllocatorGetClusterData will create the
7012
    correct structure needed as input for the allocator.
7013

7014
    The checks for the completeness of the opcode must have already been
7015
    done.
7016

7017
    """
7018
    data = self.in_data
7019

    
7020
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
7021

    
7022
    if self.disk_template in constants.DTS_NET_MIRROR:
7023
      self.required_nodes = 2
7024
    else:
7025
      self.required_nodes = 1
7026
    request = {
7027
      "type": "allocate",
7028
      "name": self.name,
7029
      "disk_template": self.disk_template,
7030
      "tags": self.tags,
7031
      "os": self.os,
7032
      "vcpus": self.vcpus,
7033
      "memory": self.mem_size,
7034
      "disks": self.disks,
7035
      "disk_space_total": disk_space,
7036
      "nics": self.nics,
7037
      "required_nodes": self.required_nodes,
7038
      }
7039
    data["request"] = request
7040

    
7041
  def _AddRelocateInstance(self):
7042
    """Add relocate instance data to allocator structure.
7043

7044
    This in combination with _IAllocatorGetClusterData will create the
7045
    correct structure needed as input for the allocator.
7046

7047
    The checks for the completeness of the opcode must have already been
7048
    done.
7049

7050
    """
7051
    instance = self.lu.cfg.GetInstanceInfo(self.name)
7052
    if instance is None:
7053
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
7054
                                   " IAllocator" % self.name)
7055

    
7056
    if instance.disk_template not in constants.DTS_NET_MIRROR:
7057
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
7058

    
7059
    if len(instance.secondary_nodes) != 1:
7060
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
7061

    
7062
    self.required_nodes = 1
7063
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
7064
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
7065

    
7066
    request = {
7067
      "type": "relocate",
7068
      "name": self.name,
7069
      "disk_space_total": disk_space,
7070
      "required_nodes": self.required_nodes,
7071
      "relocate_from": self.relocate_from,
7072
      }
7073
    self.in_data["request"] = request
7074

    
7075
  def _BuildInputData(self):
7076
    """Build input data structures.
7077

7078
    """
7079
    self._ComputeClusterData()
7080

    
7081
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7082
      self._AddNewInstance()
7083
    else:
7084
      self._AddRelocateInstance()
7085

    
7086
    self.in_text = serializer.Dump(self.in_data)
7087

    
7088
  def Run(self, name, validate=True, call_fn=None):
7089
    """Run an instance allocator and return the results.
7090

7091
    """
7092
    if call_fn is None:
7093
      call_fn = self.lu.rpc.call_iallocator_runner
7094
    data = self.in_text
7095

    
7096
    result = call_fn(self.lu.cfg.GetMasterNode(), name, self.in_text)
7097
    result.Raise()
7098

    
7099
    if not isinstance(result.data, (list, tuple)) or len(result.data) != 4:
7100
      raise errors.OpExecError("Invalid result from master iallocator runner")
7101

    
7102
    rcode, stdout, stderr, fail = result.data
7103

    
7104
    if rcode == constants.IARUN_NOTFOUND:
7105
      raise errors.OpExecError("Can't find allocator '%s'" % name)
7106
    elif rcode == constants.IARUN_FAILURE:
7107
      raise errors.OpExecError("Instance allocator call failed: %s,"
7108
                               " output: %s" % (fail, stdout+stderr))
7109
    self.out_text = stdout
7110
    if validate:
7111
      self._ValidateResult()
7112

    
7113
  def _ValidateResult(self):
7114
    """Process the allocator results.
7115

7116
    This will process and if successful save the result in
7117
    self.out_data and the other parameters.
7118

7119
    """
7120
    try:
7121
      rdict = serializer.Load(self.out_text)
7122
    except Exception, err:
7123
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
7124

    
7125
    if not isinstance(rdict, dict):
7126
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
7127

    
7128
    for key in "success", "info", "nodes":
7129
      if key not in rdict:
7130
        raise errors.OpExecError("Can't parse iallocator results:"
7131
                                 " missing key '%s'" % key)
7132
      setattr(self, key, rdict[key])
7133

    
7134
    if not isinstance(rdict["nodes"], list):
7135
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
7136
                               " is not a list")
7137
    self.out_data = rdict
7138

    
7139

    
7140
class LUTestAllocator(NoHooksLU):
7141
  """Run allocator tests.
7142

7143
  This LU runs the allocator tests
7144

7145
  """
7146
  _OP_REQP = ["direction", "mode", "name"]
7147

    
7148
  def CheckPrereq(self):
7149
    """Check prerequisites.
7150

7151
    This checks the opcode parameters depending on the director and mode test.
7152

7153
    """
7154
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7155
      for attr in ["name", "mem_size", "disks", "disk_template",
7156
                   "os", "tags", "nics", "vcpus"]:
7157
        if not hasattr(self.op, attr):
7158
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
7159
                                     attr)
7160
      iname = self.cfg.ExpandInstanceName(self.op.name)
7161
      if iname is not None:
7162
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
7163
                                   iname)
7164
      if not isinstance(self.op.nics, list):
7165
        raise errors.OpPrereqError("Invalid parameter 'nics'")
7166
      for row in self.op.nics:
7167
        if (not isinstance(row, dict) or
7168
            "mac" not in row or
7169
            "ip" not in row or
7170
            "bridge" not in row):
7171
          raise errors.OpPrereqError("Invalid contents of the"
7172
                                     " 'nics' parameter")
7173
      if not isinstance(self.op.disks, list):
7174
        raise errors.OpPrereqError("Invalid parameter 'disks'")
7175
      for row in self.op.disks:
7176
        if (not isinstance(row, dict) or
7177
            "size" not in row or
7178
            not isinstance(row["size"], int) or
7179
            "mode" not in row or
7180
            row["mode"] not in ['r', 'w']):
7181
          raise errors.OpPrereqError("Invalid contents of the"
7182
                                     " 'disks' parameter")
7183
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
7184
        self.op.hypervisor = self.cfg.GetHypervisorType()
7185
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
7186
      if not hasattr(self.op, "name"):
7187
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
7188
      fname = self.cfg.ExpandInstanceName(self.op.name)
7189
      if fname is None:
7190
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
7191
                                   self.op.name)
7192
      self.op.name = fname
7193
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
7194
    else:
7195
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
7196
                                 self.op.mode)
7197

    
7198
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
7199
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
7200
        raise errors.OpPrereqError("Missing allocator name")
7201
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
7202
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
7203
                                 self.op.direction)
7204

    
7205
  def Exec(self, feedback_fn):
7206
    """Run the allocator test.
7207

7208
    """
7209
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7210
      ial = IAllocator(self,
7211
                       mode=self.op.mode,
7212
                       name=self.op.name,
7213
                       mem_size=self.op.mem_size,
7214
                       disks=self.op.disks,
7215
                       disk_template=self.op.disk_template,
7216
                       os=self.op.os,
7217
                       tags=self.op.tags,
7218
                       nics=self.op.nics,
7219
                       vcpus=self.op.vcpus,
7220
                       hypervisor=self.op.hypervisor,
7221
                       )
7222
    else:
7223
      ial = IAllocator(self,
7224
                       mode=self.op.mode,
7225
                       name=self.op.name,
7226
                       relocate_from=list(self.relocate_from),
7227
                       )
7228

    
7229
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
7230
      result = ial.in_text
7231
    else:
7232
      ial.Run(self.op.allocator, validate=False)
7233
      result = ial.out_text
7234
    return result