Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 6f68a739

History | View | Annotate | Download (253 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import tempfile
30
import re
31
import platform
32
import logging
33
import copy
34
import random
35

    
36
from ganeti import ssh
37
from ganeti import utils
38
from ganeti import errors
39
from ganeti import hypervisor
40
from ganeti import locking
41
from ganeti import constants
42
from ganeti import objects
43
from ganeti import opcodes
44
from ganeti import serializer
45
from ganeti import ssconf
46

    
47

    
48
class LogicalUnit(object):
49
  """Logical Unit base class.
50

51
  Subclasses must follow these rules:
52
    - implement ExpandNames
53
    - implement CheckPrereq
54
    - implement Exec
55
    - implement BuildHooksEnv
56
    - redefine HPATH and HTYPE
57
    - optionally redefine their run requirements:
58
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
59

60
  Note that all commands require root permissions.
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overriden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict(((i, 0) for i in locking.LEVELS))
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92

    
93
    for attr_name in self._OP_REQP:
94
      attr_val = getattr(op, attr_name, None)
95
      if attr_val is None:
96
        raise errors.OpPrereqError("Required parameter '%s' missing" %
97
                                   attr_name)
98
    self.CheckArguments()
99

    
100
  def __GetSSH(self):
101
    """Returns the SshRunner object
102

103
    """
104
    if not self.__ssh:
105
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
106
    return self.__ssh
107

    
108
  ssh = property(fget=__GetSSH)
109

    
110
  def CheckArguments(self):
111
    """Check syntactic validity for the opcode arguments.
112

113
    This method is for doing a simple syntactic check and ensure
114
    validity of opcode parameters, without any cluster-related
115
    checks. While the same can be accomplished in ExpandNames and/or
116
    CheckPrereq, doing these separate is better because:
117

118
      - ExpandNames is left as as purely a lock-related function
119
      - CheckPrereq is run after we have aquired locks (and possible
120
        waited for them)
121

122
    The function is allowed to change the self.op attribute so that
123
    later methods can no longer worry about missing parameters.
124

125
    """
126
    pass
127

    
128
  def ExpandNames(self):
129
    """Expand names for this LU.
130

131
    This method is called before starting to execute the opcode, and it should
132
    update all the parameters of the opcode to their canonical form (e.g. a
133
    short node name must be fully expanded after this method has successfully
134
    completed). This way locking, hooks, logging, ecc. can work correctly.
135

136
    LUs which implement this method must also populate the self.needed_locks
137
    member, as a dict with lock levels as keys, and a list of needed lock names
138
    as values. Rules:
139

140
      - use an empty dict if you don't need any lock
141
      - if you don't need any lock at a particular level omit that level
142
      - don't put anything for the BGL level
143
      - if you want all locks at a level use locking.ALL_SET as a value
144

145
    If you need to share locks (rather than acquire them exclusively) at one
146
    level you can modify self.share_locks, setting a true value (usually 1) for
147
    that level. By default locks are not shared.
148

149
    Examples::
150

151
      # Acquire all nodes and one instance
152
      self.needed_locks = {
153
        locking.LEVEL_NODE: locking.ALL_SET,
154
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
155
      }
156
      # Acquire just two nodes
157
      self.needed_locks = {
158
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
159
      }
160
      # Acquire no locks
161
      self.needed_locks = {} # No, you can't leave it to the default value None
162

163
    """
164
    # The implementation of this method is mandatory only if the new LU is
165
    # concurrent, so that old LUs don't need to be changed all at the same
166
    # time.
167
    if self.REQ_BGL:
168
      self.needed_locks = {} # Exclusive LUs don't need locks.
169
    else:
170
      raise NotImplementedError
171

    
172
  def DeclareLocks(self, level):
173
    """Declare LU locking needs for a level
174

175
    While most LUs can just declare their locking needs at ExpandNames time,
176
    sometimes there's the need to calculate some locks after having acquired
177
    the ones before. This function is called just before acquiring locks at a
178
    particular level, but after acquiring the ones at lower levels, and permits
179
    such calculations. It can be used to modify self.needed_locks, and by
180
    default it does nothing.
181

182
    This function is only called if you have something already set in
183
    self.needed_locks for the level.
184

185
    @param level: Locking level which is going to be locked
186
    @type level: member of ganeti.locking.LEVELS
187

188
    """
189

    
190
  def CheckPrereq(self):
191
    """Check prerequisites for this LU.
192

193
    This method should check that the prerequisites for the execution
194
    of this LU are fulfilled. It can do internode communication, but
195
    it should be idempotent - no cluster or system changes are
196
    allowed.
197

198
    The method should raise errors.OpPrereqError in case something is
199
    not fulfilled. Its return value is ignored.
200

201
    This method should also update all the parameters of the opcode to
202
    their canonical form if it hasn't been done by ExpandNames before.
203

204
    """
205
    raise NotImplementedError
206

    
207
  def Exec(self, feedback_fn):
208
    """Execute the LU.
209

210
    This method should implement the actual work. It should raise
211
    errors.OpExecError for failures that are somewhat dealt with in
212
    code, or expected.
213

214
    """
215
    raise NotImplementedError
216

    
217
  def BuildHooksEnv(self):
218
    """Build hooks environment for this LU.
219

220
    This method should return a three-node tuple consisting of: a dict
221
    containing the environment that will be used for running the
222
    specific hook for this LU, a list of node names on which the hook
223
    should run before the execution, and a list of node names on which
224
    the hook should run after the execution.
225

226
    The keys of the dict must not have 'GANETI_' prefixed as this will
227
    be handled in the hooks runner. Also note additional keys will be
228
    added by the hooks runner. If the LU doesn't define any
229
    environment, an empty dict (and not None) should be returned.
230

231
    No nodes should be returned as an empty list (and not None).
232

233
    Note that if the HPATH for a LU class is None, this function will
234
    not be called.
235

236
    """
237
    raise NotImplementedError
238

    
239
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
240
    """Notify the LU about the results of its hooks.
241

242
    This method is called every time a hooks phase is executed, and notifies
243
    the Logical Unit about the hooks' result. The LU can then use it to alter
244
    its result based on the hooks.  By default the method does nothing and the
245
    previous result is passed back unchanged but any LU can define it if it
246
    wants to use the local cluster hook-scripts somehow.
247

248
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
249
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
250
    @param hook_results: the results of the multi-node hooks rpc call
251
    @param feedback_fn: function used send feedback back to the caller
252
    @param lu_result: the previous Exec result this LU had, or None
253
        in the PRE phase
254
    @return: the new Exec result, based on the previous result
255
        and hook results
256

257
    """
258
    return lu_result
259

    
260
  def _ExpandAndLockInstance(self):
261
    """Helper function to expand and lock an instance.
262

263
    Many LUs that work on an instance take its name in self.op.instance_name
264
    and need to expand it and then declare the expanded name for locking. This
265
    function does it, and then updates self.op.instance_name to the expanded
266
    name. It also initializes needed_locks as a dict, if this hasn't been done
267
    before.
268

269
    """
270
    if self.needed_locks is None:
271
      self.needed_locks = {}
272
    else:
273
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
274
        "_ExpandAndLockInstance called with instance-level locks set"
275
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
276
    if expanded_name is None:
277
      raise errors.OpPrereqError("Instance '%s' not known" %
278
                                  self.op.instance_name)
279
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
280
    self.op.instance_name = expanded_name
281

    
282
  def _LockInstancesNodes(self, primary_only=False):
283
    """Helper function to declare instances' nodes for locking.
284

285
    This function should be called after locking one or more instances to lock
286
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
287
    with all primary or secondary nodes for instances already locked and
288
    present in self.needed_locks[locking.LEVEL_INSTANCE].
289

290
    It should be called from DeclareLocks, and for safety only works if
291
    self.recalculate_locks[locking.LEVEL_NODE] is set.
292

293
    In the future it may grow parameters to just lock some instance's nodes, or
294
    to just lock primaries or secondary nodes, if needed.
295

296
    If should be called in DeclareLocks in a way similar to::
297

298
      if level == locking.LEVEL_NODE:
299
        self._LockInstancesNodes()
300

301
    @type primary_only: boolean
302
    @param primary_only: only lock primary nodes of locked instances
303

304
    """
305
    assert locking.LEVEL_NODE in self.recalculate_locks, \
306
      "_LockInstancesNodes helper function called with no nodes to recalculate"
307

    
308
    # TODO: check if we're really been called with the instance locks held
309

    
310
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
311
    # future we might want to have different behaviors depending on the value
312
    # of self.recalculate_locks[locking.LEVEL_NODE]
313
    wanted_nodes = []
314
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
315
      instance = self.context.cfg.GetInstanceInfo(instance_name)
316
      wanted_nodes.append(instance.primary_node)
317
      if not primary_only:
318
        wanted_nodes.extend(instance.secondary_nodes)
319

    
320
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
321
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
322
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
323
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
324

    
325
    del self.recalculate_locks[locking.LEVEL_NODE]
326

    
327

    
328
class NoHooksLU(LogicalUnit):
329
  """Simple LU which runs no hooks.
330

331
  This LU is intended as a parent for other LogicalUnits which will
332
  run no hooks, in order to reduce duplicate code.
333

334
  """
335
  HPATH = None
336
  HTYPE = None
337

    
338

    
339
def _GetWantedNodes(lu, nodes):
340
  """Returns list of checked and expanded node names.
341

342
  @type lu: L{LogicalUnit}
343
  @param lu: the logical unit on whose behalf we execute
344
  @type nodes: list
345
  @param nodes: list of node names or None for all nodes
346
  @rtype: list
347
  @return: the list of nodes, sorted
348
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
349

350
  """
351
  if not isinstance(nodes, list):
352
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
353

    
354
  if not nodes:
355
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
356
      " non-empty list of nodes whose name is to be expanded.")
357

    
358
  wanted = []
359
  for name in nodes:
360
    node = lu.cfg.ExpandNodeName(name)
361
    if node is None:
362
      raise errors.OpPrereqError("No such node name '%s'" % name)
363
    wanted.append(node)
364

    
365
  return utils.NiceSort(wanted)
366

    
367

    
368
def _GetWantedInstances(lu, instances):
369
  """Returns list of checked and expanded instance names.
370

371
  @type lu: L{LogicalUnit}
372
  @param lu: the logical unit on whose behalf we execute
373
  @type instances: list
374
  @param instances: list of instance names or None for all instances
375
  @rtype: list
376
  @return: the list of instances, sorted
377
  @raise errors.OpPrereqError: if the instances parameter is wrong type
378
  @raise errors.OpPrereqError: if any of the passed instances is not found
379

380
  """
381
  if not isinstance(instances, list):
382
    raise errors.OpPrereqError("Invalid argument type 'instances'")
383

    
384
  if instances:
385
    wanted = []
386

    
387
    for name in instances:
388
      instance = lu.cfg.ExpandInstanceName(name)
389
      if instance is None:
390
        raise errors.OpPrereqError("No such instance name '%s'" % name)
391
      wanted.append(instance)
392

    
393
  else:
394
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
395
  return wanted
396

    
397

    
398
def _CheckOutputFields(static, dynamic, selected):
399
  """Checks whether all selected fields are valid.
400

401
  @type static: L{utils.FieldSet}
402
  @param static: static fields set
403
  @type dynamic: L{utils.FieldSet}
404
  @param dynamic: dynamic fields set
405

406
  """
407
  f = utils.FieldSet()
408
  f.Extend(static)
409
  f.Extend(dynamic)
410

    
411
  delta = f.NonMatching(selected)
412
  if delta:
413
    raise errors.OpPrereqError("Unknown output fields selected: %s"
414
                               % ",".join(delta))
415

    
416

    
417
def _CheckBooleanOpField(op, name):
418
  """Validates boolean opcode parameters.
419

420
  This will ensure that an opcode parameter is either a boolean value,
421
  or None (but that it always exists).
422

423
  """
424
  val = getattr(op, name, None)
425
  if not (val is None or isinstance(val, bool)):
426
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
427
                               (name, str(val)))
428
  setattr(op, name, val)
429

    
430

    
431
def _CheckNodeOnline(lu, node):
432
  """Ensure that a given node is online.
433

434
  @param lu: the LU on behalf of which we make the check
435
  @param node: the node to check
436
  @raise errors.OpPrereqError: if the node is offline
437

438
  """
439
  if lu.cfg.GetNodeInfo(node).offline:
440
    raise errors.OpPrereqError("Can't use offline node %s" % node)
441

    
442

    
443
def _CheckNodeNotDrained(lu, node):
444
  """Ensure that a given node is not drained.
445

446
  @param lu: the LU on behalf of which we make the check
447
  @param node: the node to check
448
  @raise errors.OpPrereqError: if the node is drained
449

450
  """
451
  if lu.cfg.GetNodeInfo(node).drained:
452
    raise errors.OpPrereqError("Can't use drained node %s" % node)
453

    
454

    
455
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
456
                          memory, vcpus, nics, disk_template, disks):
457
  """Builds instance related env variables for hooks
458

459
  This builds the hook environment from individual variables.
460

461
  @type name: string
462
  @param name: the name of the instance
463
  @type primary_node: string
464
  @param primary_node: the name of the instance's primary node
465
  @type secondary_nodes: list
466
  @param secondary_nodes: list of secondary nodes as strings
467
  @type os_type: string
468
  @param os_type: the name of the instance's OS
469
  @type status: boolean
470
  @param status: the should_run status of the instance
471
  @type memory: string
472
  @param memory: the memory size of the instance
473
  @type vcpus: string
474
  @param vcpus: the count of VCPUs the instance has
475
  @type nics: list
476
  @param nics: list of tuples (ip, bridge, mac) representing
477
      the NICs the instance  has
478
  @type disk_template: string
479
  @param disk_template: the distk template of the instance
480
  @type disks: list
481
  @param disks: the list of (size, mode) pairs
482
  @rtype: dict
483
  @return: the hook environment for this instance
484

485
  """
486
  if status:
487
    str_status = "up"
488
  else:
489
    str_status = "down"
490
  env = {
491
    "OP_TARGET": name,
492
    "INSTANCE_NAME": name,
493
    "INSTANCE_PRIMARY": primary_node,
494
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
495
    "INSTANCE_OS_TYPE": os_type,
496
    "INSTANCE_STATUS": str_status,
497
    "INSTANCE_MEMORY": memory,
498
    "INSTANCE_VCPUS": vcpus,
499
    "INSTANCE_DISK_TEMPLATE": disk_template,
500
  }
501

    
502
  if nics:
503
    nic_count = len(nics)
504
    for idx, (ip, mac, mode, link) in enumerate(nics):
505
      if ip is None:
506
        ip = ""
507
      env["INSTANCE_NIC%d_IP" % idx] = ip
508
      env["INSTANCE_NIC%d_MAC" % idx] = mac
509
      env["INSTANCE_NIC%d_MODE" % idx] = mode
510
      env["INSTANCE_NIC%d_LINK" % idx] = link
511
      if mode == constants.NIC_MODE_BRIDGED:
512
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
513
  else:
514
    nic_count = 0
515

    
516
  env["INSTANCE_NIC_COUNT"] = nic_count
517

    
518
  if disks:
519
    disk_count = len(disks)
520
    for idx, (size, mode) in enumerate(disks):
521
      env["INSTANCE_DISK%d_SIZE" % idx] = size
522
      env["INSTANCE_DISK%d_MODE" % idx] = mode
523
  else:
524
    disk_count = 0
525

    
526
  env["INSTANCE_DISK_COUNT"] = disk_count
527

    
528
  return env
529

    
530
def _PreBuildNICHooksList(lu, nics):
531
  """Build a list of nic information tuples.
532

533
  This list is suitable to be passed to _BuildInstanceHookEnv.
534

535
  @type lu:  L{LogicalUnit}
536
  @param lu: the logical unit on whose behalf we execute
537
  @type nics: list of L{objects.NIC}
538
  @param nics: list of nics to convert to hooks tuples
539

540
  """
541
  hooks_nics = []
542
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
543
  for nic in nics:
544
    ip = nic.ip
545
    mac = nic.mac
546
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
547
    mode = filled_params[constants.NIC_MODE]
548
    link = filled_params[constants.NIC_LINK]
549
    hooks_nics.append((ip, mac, mode, link))
550
  return hooks_nics
551

    
552
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
553
  """Builds instance related env variables for hooks from an object.
554

555
  @type lu: L{LogicalUnit}
556
  @param lu: the logical unit on whose behalf we execute
557
  @type instance: L{objects.Instance}
558
  @param instance: the instance for which we should build the
559
      environment
560
  @type override: dict
561
  @param override: dictionary with key/values that will override
562
      our values
563
  @rtype: dict
564
  @return: the hook environment dictionary
565

566
  """
567
  bep = lu.cfg.GetClusterInfo().FillBE(instance)
568
  args = {
569
    'name': instance.name,
570
    'primary_node': instance.primary_node,
571
    'secondary_nodes': instance.secondary_nodes,
572
    'os_type': instance.os,
573
    'status': instance.admin_up,
574
    'memory': bep[constants.BE_MEMORY],
575
    'vcpus': bep[constants.BE_VCPUS],
576
    'nics': _PreBuildNICHooksList(lu, instance.nics),
577
    'disk_template': instance.disk_template,
578
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
579
  }
580
  if override:
581
    args.update(override)
582
  return _BuildInstanceHookEnv(**args)
583

    
584

    
585
def _AdjustCandidatePool(lu):
586
  """Adjust the candidate pool after node operations.
587

588
  """
589
  mod_list = lu.cfg.MaintainCandidatePool()
590
  if mod_list:
591
    lu.LogInfo("Promoted nodes to master candidate role: %s",
592
               ", ".join(node.name for node in mod_list))
593
    for name in mod_list:
594
      lu.context.ReaddNode(name)
595
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
596
  if mc_now > mc_max:
597
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
598
               (mc_now, mc_max))
599

    
600

    
601
def _CheckNicsBridgesExist(lu, target_nics, target_node,
602
                               profile=constants.PP_DEFAULT):
603
  """Check that the brigdes needed by a list of nics exist.
604

605
  """
606
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
607
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
608
                for nic in target_nics]
609
  brlist = [params[constants.NIC_LINK] for params in paramslist
610
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
611
  if brlist:
612
    result = lu.rpc.call_bridges_exist(target_node, brlist)
613
    msg = result.RemoteFailMsg()
614
    if msg:
615
      raise errors.OpPrereqError("Error checking bridges on destination node"
616
                                 " '%s': %s" % (target_node, msg))
617

    
618

    
619
def _CheckInstanceBridgesExist(lu, instance, node=None):
620
  """Check that the brigdes needed by an instance exist.
621

622
  """
623
  if node is None:
624
    node=instance.primary_node
625
  _CheckNicsBridgesExist(lu, instance.nics, node)
626

    
627

    
628
class LUDestroyCluster(NoHooksLU):
629
  """Logical unit for destroying the cluster.
630

631
  """
632
  _OP_REQP = []
633

    
634
  def CheckPrereq(self):
635
    """Check prerequisites.
636

637
    This checks whether the cluster is empty.
638

639
    Any errors are signalled by raising errors.OpPrereqError.
640

641
    """
642
    master = self.cfg.GetMasterNode()
643

    
644
    nodelist = self.cfg.GetNodeList()
645
    if len(nodelist) != 1 or nodelist[0] != master:
646
      raise errors.OpPrereqError("There are still %d node(s) in"
647
                                 " this cluster." % (len(nodelist) - 1))
648
    instancelist = self.cfg.GetInstanceList()
649
    if instancelist:
650
      raise errors.OpPrereqError("There are still %d instance(s) in"
651
                                 " this cluster." % len(instancelist))
652

    
653
  def Exec(self, feedback_fn):
654
    """Destroys the cluster.
655

656
    """
657
    master = self.cfg.GetMasterNode()
658
    result = self.rpc.call_node_stop_master(master, False)
659
    result.Raise()
660
    if not result.data:
661
      raise errors.OpExecError("Could not disable the master role")
662
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
663
    utils.CreateBackup(priv_key)
664
    utils.CreateBackup(pub_key)
665
    return master
666

    
667

    
668
class LUVerifyCluster(LogicalUnit):
669
  """Verifies the cluster status.
670

671
  """
672
  HPATH = "cluster-verify"
673
  HTYPE = constants.HTYPE_CLUSTER
674
  _OP_REQP = ["skip_checks"]
675
  REQ_BGL = False
676

    
677
  def ExpandNames(self):
678
    self.needed_locks = {
679
      locking.LEVEL_NODE: locking.ALL_SET,
680
      locking.LEVEL_INSTANCE: locking.ALL_SET,
681
    }
682
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
683

    
684
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
685
                  node_result, feedback_fn, master_files,
686
                  drbd_map, vg_name):
687
    """Run multiple tests against a node.
688

689
    Test list:
690

691
      - compares ganeti version
692
      - checks vg existance and size > 20G
693
      - checks config file checksum
694
      - checks ssh to other nodes
695

696
    @type nodeinfo: L{objects.Node}
697
    @param nodeinfo: the node to check
698
    @param file_list: required list of files
699
    @param local_cksum: dictionary of local files and their checksums
700
    @param node_result: the results from the node
701
    @param feedback_fn: function used to accumulate results
702
    @param master_files: list of files that only masters should have
703
    @param drbd_map: the useddrbd minors for this node, in
704
        form of minor: (instance, must_exist) which correspond to instances
705
        and their running status
706
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
707

708
    """
709
    node = nodeinfo.name
710

    
711
    # main result, node_result should be a non-empty dict
712
    if not node_result or not isinstance(node_result, dict):
713
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
714
      return True
715

    
716
    # compares ganeti version
717
    local_version = constants.PROTOCOL_VERSION
718
    remote_version = node_result.get('version', None)
719
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
720
            len(remote_version) == 2):
721
      feedback_fn("  - ERROR: connection to %s failed" % (node))
722
      return True
723

    
724
    if local_version != remote_version[0]:
725
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
726
                  " node %s %s" % (local_version, node, remote_version[0]))
727
      return True
728

    
729
    # node seems compatible, we can actually try to look into its results
730

    
731
    bad = False
732

    
733
    # full package version
734
    if constants.RELEASE_VERSION != remote_version[1]:
735
      feedback_fn("  - WARNING: software version mismatch: master %s,"
736
                  " node %s %s" %
737
                  (constants.RELEASE_VERSION, node, remote_version[1]))
738

    
739
    # checks vg existence and size > 20G
740
    if vg_name is not None:
741
      vglist = node_result.get(constants.NV_VGLIST, None)
742
      if not vglist:
743
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
744
                        (node,))
745
        bad = True
746
      else:
747
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
748
                                              constants.MIN_VG_SIZE)
749
        if vgstatus:
750
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
751
          bad = True
752

    
753
    # checks config file checksum
754

    
755
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
756
    if not isinstance(remote_cksum, dict):
757
      bad = True
758
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
759
    else:
760
      for file_name in file_list:
761
        node_is_mc = nodeinfo.master_candidate
762
        must_have_file = file_name not in master_files
763
        if file_name not in remote_cksum:
764
          if node_is_mc or must_have_file:
765
            bad = True
766
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
767
        elif remote_cksum[file_name] != local_cksum[file_name]:
768
          if node_is_mc or must_have_file:
769
            bad = True
770
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
771
          else:
772
            # not candidate and this is not a must-have file
773
            bad = True
774
            feedback_fn("  - ERROR: non master-candidate has old/wrong file"
775
                        " '%s'" % file_name)
776
        else:
777
          # all good, except non-master/non-must have combination
778
          if not node_is_mc and not must_have_file:
779
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
780
                        " candidates" % file_name)
781

    
782
    # checks ssh to any
783

    
784
    if constants.NV_NODELIST not in node_result:
785
      bad = True
786
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
787
    else:
788
      if node_result[constants.NV_NODELIST]:
789
        bad = True
790
        for node in node_result[constants.NV_NODELIST]:
791
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
792
                          (node, node_result[constants.NV_NODELIST][node]))
793

    
794
    if constants.NV_NODENETTEST not in node_result:
795
      bad = True
796
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
797
    else:
798
      if node_result[constants.NV_NODENETTEST]:
799
        bad = True
800
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
801
        for node in nlist:
802
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
803
                          (node, node_result[constants.NV_NODENETTEST][node]))
804

    
805
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
806
    if isinstance(hyp_result, dict):
807
      for hv_name, hv_result in hyp_result.iteritems():
808
        if hv_result is not None:
809
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
810
                      (hv_name, hv_result))
811

    
812
    # check used drbd list
813
    if vg_name is not None:
814
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
815
      if not isinstance(used_minors, (tuple, list)):
816
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
817
                    str(used_minors))
818
      else:
819
        for minor, (iname, must_exist) in drbd_map.items():
820
          if minor not in used_minors and must_exist:
821
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
822
                        " not active" % (minor, iname))
823
            bad = True
824
        for minor in used_minors:
825
          if minor not in drbd_map:
826
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
827
                        minor)
828
            bad = True
829

    
830
    return bad
831

    
832
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
833
                      node_instance, feedback_fn, n_offline):
834
    """Verify an instance.
835

836
    This function checks to see if the required block devices are
837
    available on the instance's node.
838

839
    """
840
    bad = False
841

    
842
    node_current = instanceconfig.primary_node
843

    
844
    node_vol_should = {}
845
    instanceconfig.MapLVsByNode(node_vol_should)
846

    
847
    for node in node_vol_should:
848
      if node in n_offline:
849
        # ignore missing volumes on offline nodes
850
        continue
851
      for volume in node_vol_should[node]:
852
        if node not in node_vol_is or volume not in node_vol_is[node]:
853
          feedback_fn("  - ERROR: volume %s missing on node %s" %
854
                          (volume, node))
855
          bad = True
856

    
857
    if instanceconfig.admin_up:
858
      if ((node_current not in node_instance or
859
          not instance in node_instance[node_current]) and
860
          node_current not in n_offline):
861
        feedback_fn("  - ERROR: instance %s not running on node %s" %
862
                        (instance, node_current))
863
        bad = True
864

    
865
    for node in node_instance:
866
      if (not node == node_current):
867
        if instance in node_instance[node]:
868
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
869
                          (instance, node))
870
          bad = True
871

    
872
    return bad
873

    
874
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
875
    """Verify if there are any unknown volumes in the cluster.
876

877
    The .os, .swap and backup volumes are ignored. All other volumes are
878
    reported as unknown.
879

880
    """
881
    bad = False
882

    
883
    for node in node_vol_is:
884
      for volume in node_vol_is[node]:
885
        if node not in node_vol_should or volume not in node_vol_should[node]:
886
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
887
                      (volume, node))
888
          bad = True
889
    return bad
890

    
891
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
892
    """Verify the list of running instances.
893

894
    This checks what instances are running but unknown to the cluster.
895

896
    """
897
    bad = False
898
    for node in node_instance:
899
      for runninginstance in node_instance[node]:
900
        if runninginstance not in instancelist:
901
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
902
                          (runninginstance, node))
903
          bad = True
904
    return bad
905

    
906
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
907
    """Verify N+1 Memory Resilience.
908

909
    Check that if one single node dies we can still start all the instances it
910
    was primary for.
911

912
    """
913
    bad = False
914

    
915
    for node, nodeinfo in node_info.iteritems():
916
      # This code checks that every node which is now listed as secondary has
917
      # enough memory to host all instances it is supposed to should a single
918
      # other node in the cluster fail.
919
      # FIXME: not ready for failover to an arbitrary node
920
      # FIXME: does not support file-backed instances
921
      # WARNING: we currently take into account down instances as well as up
922
      # ones, considering that even if they're down someone might want to start
923
      # them even in the event of a node failure.
924
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
925
        needed_mem = 0
926
        for instance in instances:
927
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
928
          if bep[constants.BE_AUTO_BALANCE]:
929
            needed_mem += bep[constants.BE_MEMORY]
930
        if nodeinfo['mfree'] < needed_mem:
931
          feedback_fn("  - ERROR: not enough memory on node %s to accomodate"
932
                      " failovers should node %s fail" % (node, prinode))
933
          bad = True
934
    return bad
935

    
936
  def CheckPrereq(self):
937
    """Check prerequisites.
938

939
    Transform the list of checks we're going to skip into a set and check that
940
    all its members are valid.
941

942
    """
943
    self.skip_set = frozenset(self.op.skip_checks)
944
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
945
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
946

    
947
  def BuildHooksEnv(self):
948
    """Build hooks env.
949

950
    Cluster-Verify hooks just rone in the post phase and their failure makes
951
    the output be logged in the verify output and the verification to fail.
952

953
    """
954
    all_nodes = self.cfg.GetNodeList()
955
    env = {
956
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
957
      }
958
    for node in self.cfg.GetAllNodesInfo().values():
959
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
960

    
961
    return env, [], all_nodes
962

    
963
  def Exec(self, feedback_fn):
964
    """Verify integrity of cluster, performing various test on nodes.
965

966
    """
967
    bad = False
968
    feedback_fn("* Verifying global settings")
969
    for msg in self.cfg.VerifyConfig():
970
      feedback_fn("  - ERROR: %s" % msg)
971

    
972
    vg_name = self.cfg.GetVGName()
973
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
974
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
975
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
976
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
977
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
978
                        for iname in instancelist)
979
    i_non_redundant = [] # Non redundant instances
980
    i_non_a_balanced = [] # Non auto-balanced instances
981
    n_offline = [] # List of offline nodes
982
    n_drained = [] # List of nodes being drained
983
    node_volume = {}
984
    node_instance = {}
985
    node_info = {}
986
    instance_cfg = {}
987

    
988
    # FIXME: verify OS list
989
    # do local checksums
990
    master_files = [constants.CLUSTER_CONF_FILE]
991

    
992
    file_names = ssconf.SimpleStore().GetFileList()
993
    file_names.append(constants.SSL_CERT_FILE)
994
    file_names.append(constants.RAPI_CERT_FILE)
995
    file_names.extend(master_files)
996

    
997
    local_checksums = utils.FingerprintFiles(file_names)
998

    
999
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1000
    node_verify_param = {
1001
      constants.NV_FILELIST: file_names,
1002
      constants.NV_NODELIST: [node.name for node in nodeinfo
1003
                              if not node.offline],
1004
      constants.NV_HYPERVISOR: hypervisors,
1005
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1006
                                  node.secondary_ip) for node in nodeinfo
1007
                                 if not node.offline],
1008
      constants.NV_INSTANCELIST: hypervisors,
1009
      constants.NV_VERSION: None,
1010
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1011
      }
1012
    if vg_name is not None:
1013
      node_verify_param[constants.NV_VGLIST] = None
1014
      node_verify_param[constants.NV_LVLIST] = vg_name
1015
      node_verify_param[constants.NV_DRBDLIST] = None
1016
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1017
                                           self.cfg.GetClusterName())
1018

    
1019
    cluster = self.cfg.GetClusterInfo()
1020
    master_node = self.cfg.GetMasterNode()
1021
    all_drbd_map = self.cfg.ComputeDRBDMap()
1022

    
1023
    for node_i in nodeinfo:
1024
      node = node_i.name
1025

    
1026
      if node_i.offline:
1027
        feedback_fn("* Skipping offline node %s" % (node,))
1028
        n_offline.append(node)
1029
        continue
1030

    
1031
      if node == master_node:
1032
        ntype = "master"
1033
      elif node_i.master_candidate:
1034
        ntype = "master candidate"
1035
      elif node_i.drained:
1036
        ntype = "drained"
1037
        n_drained.append(node)
1038
      else:
1039
        ntype = "regular"
1040
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1041

    
1042
      msg = all_nvinfo[node].RemoteFailMsg()
1043
      if msg:
1044
        feedback_fn("  - ERROR: while contacting node %s: %s" % (node, msg))
1045
        bad = True
1046
        continue
1047

    
1048
      nresult = all_nvinfo[node].payload
1049
      node_drbd = {}
1050
      for minor, instance in all_drbd_map[node].items():
1051
        if instance not in instanceinfo:
1052
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1053
                      instance)
1054
          # ghost instance should not be running, but otherwise we
1055
          # don't give double warnings (both ghost instance and
1056
          # unallocated minor in use)
1057
          node_drbd[minor] = (instance, False)
1058
        else:
1059
          instance = instanceinfo[instance]
1060
          node_drbd[minor] = (instance.name, instance.admin_up)
1061
      result = self._VerifyNode(node_i, file_names, local_checksums,
1062
                                nresult, feedback_fn, master_files,
1063
                                node_drbd, vg_name)
1064
      bad = bad or result
1065

    
1066
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1067
      if vg_name is None:
1068
        node_volume[node] = {}
1069
      elif isinstance(lvdata, basestring):
1070
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1071
                    (node, utils.SafeEncode(lvdata)))
1072
        bad = True
1073
        node_volume[node] = {}
1074
      elif not isinstance(lvdata, dict):
1075
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1076
        bad = True
1077
        continue
1078
      else:
1079
        node_volume[node] = lvdata
1080

    
1081
      # node_instance
1082
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1083
      if not isinstance(idata, list):
1084
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1085
                    (node,))
1086
        bad = True
1087
        continue
1088

    
1089
      node_instance[node] = idata
1090

    
1091
      # node_info
1092
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1093
      if not isinstance(nodeinfo, dict):
1094
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1095
        bad = True
1096
        continue
1097

    
1098
      try:
1099
        node_info[node] = {
1100
          "mfree": int(nodeinfo['memory_free']),
1101
          "pinst": [],
1102
          "sinst": [],
1103
          # dictionary holding all instances this node is secondary for,
1104
          # grouped by their primary node. Each key is a cluster node, and each
1105
          # value is a list of instances which have the key as primary and the
1106
          # current node as secondary.  this is handy to calculate N+1 memory
1107
          # availability if you can only failover from a primary to its
1108
          # secondary.
1109
          "sinst-by-pnode": {},
1110
        }
1111
        # FIXME: devise a free space model for file based instances as well
1112
        if vg_name is not None:
1113
          if (constants.NV_VGLIST not in nresult or
1114
              vg_name not in nresult[constants.NV_VGLIST]):
1115
            feedback_fn("  - ERROR: node %s didn't return data for the"
1116
                        " volume group '%s' - it is either missing or broken" %
1117
                        (node, vg_name))
1118
            bad = True
1119
            continue
1120
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1121
      except (ValueError, KeyError):
1122
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1123
                    " from node %s" % (node,))
1124
        bad = True
1125
        continue
1126

    
1127
    node_vol_should = {}
1128

    
1129
    for instance in instancelist:
1130
      feedback_fn("* Verifying instance %s" % instance)
1131
      inst_config = instanceinfo[instance]
1132
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1133
                                     node_instance, feedback_fn, n_offline)
1134
      bad = bad or result
1135
      inst_nodes_offline = []
1136

    
1137
      inst_config.MapLVsByNode(node_vol_should)
1138

    
1139
      instance_cfg[instance] = inst_config
1140

    
1141
      pnode = inst_config.primary_node
1142
      if pnode in node_info:
1143
        node_info[pnode]['pinst'].append(instance)
1144
      elif pnode not in n_offline:
1145
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1146
                    " %s failed" % (instance, pnode))
1147
        bad = True
1148

    
1149
      if pnode in n_offline:
1150
        inst_nodes_offline.append(pnode)
1151

    
1152
      # If the instance is non-redundant we cannot survive losing its primary
1153
      # node, so we are not N+1 compliant. On the other hand we have no disk
1154
      # templates with more than one secondary so that situation is not well
1155
      # supported either.
1156
      # FIXME: does not support file-backed instances
1157
      if len(inst_config.secondary_nodes) == 0:
1158
        i_non_redundant.append(instance)
1159
      elif len(inst_config.secondary_nodes) > 1:
1160
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1161
                    % instance)
1162

    
1163
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1164
        i_non_a_balanced.append(instance)
1165

    
1166
      for snode in inst_config.secondary_nodes:
1167
        if snode in node_info:
1168
          node_info[snode]['sinst'].append(instance)
1169
          if pnode not in node_info[snode]['sinst-by-pnode']:
1170
            node_info[snode]['sinst-by-pnode'][pnode] = []
1171
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1172
        elif snode not in n_offline:
1173
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1174
                      " %s failed" % (instance, snode))
1175
          bad = True
1176
        if snode in n_offline:
1177
          inst_nodes_offline.append(snode)
1178

    
1179
      if inst_nodes_offline:
1180
        # warn that the instance lives on offline nodes, and set bad=True
1181
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1182
                    ", ".join(inst_nodes_offline))
1183
        bad = True
1184

    
1185
    feedback_fn("* Verifying orphan volumes")
1186
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1187
                                       feedback_fn)
1188
    bad = bad or result
1189

    
1190
    feedback_fn("* Verifying remaining instances")
1191
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1192
                                         feedback_fn)
1193
    bad = bad or result
1194

    
1195
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1196
      feedback_fn("* Verifying N+1 Memory redundancy")
1197
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1198
      bad = bad or result
1199

    
1200
    feedback_fn("* Other Notes")
1201
    if i_non_redundant:
1202
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1203
                  % len(i_non_redundant))
1204

    
1205
    if i_non_a_balanced:
1206
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1207
                  % len(i_non_a_balanced))
1208

    
1209
    if n_offline:
1210
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1211

    
1212
    if n_drained:
1213
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1214

    
1215
    return not bad
1216

    
1217
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1218
    """Analize the post-hooks' result
1219

1220
    This method analyses the hook result, handles it, and sends some
1221
    nicely-formatted feedback back to the user.
1222

1223
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1224
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1225
    @param hooks_results: the results of the multi-node hooks rpc call
1226
    @param feedback_fn: function used send feedback back to the caller
1227
    @param lu_result: previous Exec result
1228
    @return: the new Exec result, based on the previous result
1229
        and hook results
1230

1231
    """
1232
    # We only really run POST phase hooks, and are only interested in
1233
    # their results
1234
    if phase == constants.HOOKS_PHASE_POST:
1235
      # Used to change hooks' output to proper indentation
1236
      indent_re = re.compile('^', re.M)
1237
      feedback_fn("* Hooks Results")
1238
      if not hooks_results:
1239
        feedback_fn("  - ERROR: general communication failure")
1240
        lu_result = 1
1241
      else:
1242
        for node_name in hooks_results:
1243
          show_node_header = True
1244
          res = hooks_results[node_name]
1245
          if res.failed or res.data is False or not isinstance(res.data, list):
1246
            if res.offline:
1247
              # no need to warn or set fail return value
1248
              continue
1249
            feedback_fn("    Communication failure in hooks execution")
1250
            lu_result = 1
1251
            continue
1252
          for script, hkr, output in res.data:
1253
            if hkr == constants.HKR_FAIL:
1254
              # The node header is only shown once, if there are
1255
              # failing hooks on that node
1256
              if show_node_header:
1257
                feedback_fn("  Node %s:" % node_name)
1258
                show_node_header = False
1259
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1260
              output = indent_re.sub('      ', output)
1261
              feedback_fn("%s" % output)
1262
              lu_result = 1
1263

    
1264
      return lu_result
1265

    
1266

    
1267
class LUVerifyDisks(NoHooksLU):
1268
  """Verifies the cluster disks status.
1269

1270
  """
1271
  _OP_REQP = []
1272
  REQ_BGL = False
1273

    
1274
  def ExpandNames(self):
1275
    self.needed_locks = {
1276
      locking.LEVEL_NODE: locking.ALL_SET,
1277
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1278
    }
1279
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1280

    
1281
  def CheckPrereq(self):
1282
    """Check prerequisites.
1283

1284
    This has no prerequisites.
1285

1286
    """
1287
    pass
1288

    
1289
  def Exec(self, feedback_fn):
1290
    """Verify integrity of cluster disks.
1291

1292
    @rtype: tuple of three items
1293
    @return: a tuple of (dict of node-to-node_error, list of instances
1294
        which need activate-disks, dict of instance: (node, volume) for
1295
        missing volumes
1296

1297
    """
1298
    result = res_nodes, res_instances, res_missing = {}, [], {}
1299

    
1300
    vg_name = self.cfg.GetVGName()
1301
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1302
    instances = [self.cfg.GetInstanceInfo(name)
1303
                 for name in self.cfg.GetInstanceList()]
1304

    
1305
    nv_dict = {}
1306
    for inst in instances:
1307
      inst_lvs = {}
1308
      if (not inst.admin_up or
1309
          inst.disk_template not in constants.DTS_NET_MIRROR):
1310
        continue
1311
      inst.MapLVsByNode(inst_lvs)
1312
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1313
      for node, vol_list in inst_lvs.iteritems():
1314
        for vol in vol_list:
1315
          nv_dict[(node, vol)] = inst
1316

    
1317
    if not nv_dict:
1318
      return result
1319

    
1320
    node_lvs = self.rpc.call_volume_list(nodes, vg_name)
1321

    
1322
    to_act = set()
1323
    for node in nodes:
1324
      # node_volume
1325
      node_res = node_lvs[node]
1326
      if node_res.offline:
1327
        continue
1328
      msg = node_res.RemoteFailMsg()
1329
      if msg:
1330
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1331
        res_nodes[node] = msg
1332
        continue
1333

    
1334
      lvs = node_res.payload
1335
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1336
        inst = nv_dict.pop((node, lv_name), None)
1337
        if (not lv_online and inst is not None
1338
            and inst.name not in res_instances):
1339
          res_instances.append(inst.name)
1340

    
1341
    # any leftover items in nv_dict are missing LVs, let's arrange the
1342
    # data better
1343
    for key, inst in nv_dict.iteritems():
1344
      if inst.name not in res_missing:
1345
        res_missing[inst.name] = []
1346
      res_missing[inst.name].append(key)
1347

    
1348
    return result
1349

    
1350

    
1351
class LURenameCluster(LogicalUnit):
1352
  """Rename the cluster.
1353

1354
  """
1355
  HPATH = "cluster-rename"
1356
  HTYPE = constants.HTYPE_CLUSTER
1357
  _OP_REQP = ["name"]
1358

    
1359
  def BuildHooksEnv(self):
1360
    """Build hooks env.
1361

1362
    """
1363
    env = {
1364
      "OP_TARGET": self.cfg.GetClusterName(),
1365
      "NEW_NAME": self.op.name,
1366
      }
1367
    mn = self.cfg.GetMasterNode()
1368
    return env, [mn], [mn]
1369

    
1370
  def CheckPrereq(self):
1371
    """Verify that the passed name is a valid one.
1372

1373
    """
1374
    hostname = utils.HostInfo(self.op.name)
1375

    
1376
    new_name = hostname.name
1377
    self.ip = new_ip = hostname.ip
1378
    old_name = self.cfg.GetClusterName()
1379
    old_ip = self.cfg.GetMasterIP()
1380
    if new_name == old_name and new_ip == old_ip:
1381
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1382
                                 " cluster has changed")
1383
    if new_ip != old_ip:
1384
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1385
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1386
                                   " reachable on the network. Aborting." %
1387
                                   new_ip)
1388

    
1389
    self.op.name = new_name
1390

    
1391
  def Exec(self, feedback_fn):
1392
    """Rename the cluster.
1393

1394
    """
1395
    clustername = self.op.name
1396
    ip = self.ip
1397

    
1398
    # shutdown the master IP
1399
    master = self.cfg.GetMasterNode()
1400
    result = self.rpc.call_node_stop_master(master, False)
1401
    if result.failed or not result.data:
1402
      raise errors.OpExecError("Could not disable the master role")
1403

    
1404
    try:
1405
      cluster = self.cfg.GetClusterInfo()
1406
      cluster.cluster_name = clustername
1407
      cluster.master_ip = ip
1408
      self.cfg.Update(cluster)
1409

    
1410
      # update the known hosts file
1411
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1412
      node_list = self.cfg.GetNodeList()
1413
      try:
1414
        node_list.remove(master)
1415
      except ValueError:
1416
        pass
1417
      result = self.rpc.call_upload_file(node_list,
1418
                                         constants.SSH_KNOWN_HOSTS_FILE)
1419
      for to_node, to_result in result.iteritems():
1420
         msg = to_result.RemoteFailMsg()
1421
         if msg:
1422
           msg = ("Copy of file %s to node %s failed: %s" %
1423
                   (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1424
           self.proc.LogWarning(msg)
1425

    
1426
    finally:
1427
      result = self.rpc.call_node_start_master(master, False)
1428
      if result.failed or not result.data:
1429
        self.LogWarning("Could not re-enable the master role on"
1430
                        " the master, please restart manually.")
1431

    
1432

    
1433
def _RecursiveCheckIfLVMBased(disk):
1434
  """Check if the given disk or its children are lvm-based.
1435

1436
  @type disk: L{objects.Disk}
1437
  @param disk: the disk to check
1438
  @rtype: booleean
1439
  @return: boolean indicating whether a LD_LV dev_type was found or not
1440

1441
  """
1442
  if disk.children:
1443
    for chdisk in disk.children:
1444
      if _RecursiveCheckIfLVMBased(chdisk):
1445
        return True
1446
  return disk.dev_type == constants.LD_LV
1447

    
1448

    
1449
class LUSetClusterParams(LogicalUnit):
1450
  """Change the parameters of the cluster.
1451

1452
  """
1453
  HPATH = "cluster-modify"
1454
  HTYPE = constants.HTYPE_CLUSTER
1455
  _OP_REQP = []
1456
  REQ_BGL = False
1457

    
1458
  def CheckArguments(self):
1459
    """Check parameters
1460

1461
    """
1462
    if not hasattr(self.op, "candidate_pool_size"):
1463
      self.op.candidate_pool_size = None
1464
    if self.op.candidate_pool_size is not None:
1465
      try:
1466
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1467
      except (ValueError, TypeError), err:
1468
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1469
                                   str(err))
1470
      if self.op.candidate_pool_size < 1:
1471
        raise errors.OpPrereqError("At least one master candidate needed")
1472

    
1473
  def ExpandNames(self):
1474
    # FIXME: in the future maybe other cluster params won't require checking on
1475
    # all nodes to be modified.
1476
    self.needed_locks = {
1477
      locking.LEVEL_NODE: locking.ALL_SET,
1478
    }
1479
    self.share_locks[locking.LEVEL_NODE] = 1
1480

    
1481
  def BuildHooksEnv(self):
1482
    """Build hooks env.
1483

1484
    """
1485
    env = {
1486
      "OP_TARGET": self.cfg.GetClusterName(),
1487
      "NEW_VG_NAME": self.op.vg_name,
1488
      }
1489
    mn = self.cfg.GetMasterNode()
1490
    return env, [mn], [mn]
1491

    
1492
  def CheckPrereq(self):
1493
    """Check prerequisites.
1494

1495
    This checks whether the given params don't conflict and
1496
    if the given volume group is valid.
1497

1498
    """
1499
    if self.op.vg_name is not None and not self.op.vg_name:
1500
      instances = self.cfg.GetAllInstancesInfo().values()
1501
      for inst in instances:
1502
        for disk in inst.disks:
1503
          if _RecursiveCheckIfLVMBased(disk):
1504
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1505
                                       " lvm-based instances exist")
1506

    
1507
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1508

    
1509
    # if vg_name not None, checks given volume group on all nodes
1510
    if self.op.vg_name:
1511
      vglist = self.rpc.call_vg_list(node_list)
1512
      for node in node_list:
1513
        msg = vglist[node].RemoteFailMsg()
1514
        if msg:
1515
          # ignoring down node
1516
          self.LogWarning("Error while gathering data on node %s"
1517
                          " (ignoring node): %s", node, msg)
1518
          continue
1519
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1520
                                              self.op.vg_name,
1521
                                              constants.MIN_VG_SIZE)
1522
        if vgstatus:
1523
          raise errors.OpPrereqError("Error on node '%s': %s" %
1524
                                     (node, vgstatus))
1525

    
1526
    self.cluster = cluster = self.cfg.GetClusterInfo()
1527
    # validate params changes
1528
    if self.op.beparams:
1529
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1530
      self.new_beparams = objects.FillDict(
1531
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1532

    
1533
    if self.op.nicparams:
1534
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1535
      self.new_nicparams = objects.FillDict(
1536
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1537
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1538

    
1539
    # hypervisor list/parameters
1540
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1541
    if self.op.hvparams:
1542
      if not isinstance(self.op.hvparams, dict):
1543
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1544
      for hv_name, hv_dict in self.op.hvparams.items():
1545
        if hv_name not in self.new_hvparams:
1546
          self.new_hvparams[hv_name] = hv_dict
1547
        else:
1548
          self.new_hvparams[hv_name].update(hv_dict)
1549

    
1550
    if self.op.enabled_hypervisors is not None:
1551
      self.hv_list = self.op.enabled_hypervisors
1552
    else:
1553
      self.hv_list = cluster.enabled_hypervisors
1554

    
1555
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1556
      # either the enabled list has changed, or the parameters have, validate
1557
      for hv_name, hv_params in self.new_hvparams.items():
1558
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1559
            (self.op.enabled_hypervisors and
1560
             hv_name in self.op.enabled_hypervisors)):
1561
          # either this is a new hypervisor, or its parameters have changed
1562
          hv_class = hypervisor.GetHypervisor(hv_name)
1563
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1564
          hv_class.CheckParameterSyntax(hv_params)
1565
          _CheckHVParams(self, node_list, hv_name, hv_params)
1566

    
1567
  def Exec(self, feedback_fn):
1568
    """Change the parameters of the cluster.
1569

1570
    """
1571
    if self.op.vg_name is not None:
1572
      new_volume = self.op.vg_name
1573
      if not new_volume:
1574
        new_volume = None
1575
      if new_volume != self.cfg.GetVGName():
1576
        self.cfg.SetVGName(new_volume)
1577
      else:
1578
        feedback_fn("Cluster LVM configuration already in desired"
1579
                    " state, not changing")
1580
    if self.op.hvparams:
1581
      self.cluster.hvparams = self.new_hvparams
1582
    if self.op.enabled_hypervisors is not None:
1583
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1584
    if self.op.beparams:
1585
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1586
    if self.op.nicparams:
1587
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1588

    
1589
    if self.op.candidate_pool_size is not None:
1590
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1591

    
1592
    self.cfg.Update(self.cluster)
1593

    
1594
    # we want to update nodes after the cluster so that if any errors
1595
    # happen, we have recorded and saved the cluster info
1596
    if self.op.candidate_pool_size is not None:
1597
      _AdjustCandidatePool(self)
1598

    
1599

    
1600
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1601
  """Distribute additional files which are part of the cluster configuration.
1602

1603
  ConfigWriter takes care of distributing the config and ssconf files, but
1604
  there are more files which should be distributed to all nodes. This function
1605
  makes sure those are copied.
1606

1607
  @param lu: calling logical unit
1608
  @param additional_nodes: list of nodes not in the config to distribute to
1609

1610
  """
1611
  # 1. Gather target nodes
1612
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1613
  dist_nodes = lu.cfg.GetNodeList()
1614
  if additional_nodes is not None:
1615
    dist_nodes.extend(additional_nodes)
1616
  if myself.name in dist_nodes:
1617
    dist_nodes.remove(myself.name)
1618
  # 2. Gather files to distribute
1619
  dist_files = set([constants.ETC_HOSTS,
1620
                    constants.SSH_KNOWN_HOSTS_FILE,
1621
                    constants.RAPI_CERT_FILE,
1622
                    constants.RAPI_USERS_FILE,
1623
                   ])
1624

    
1625
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1626
  for hv_name in enabled_hypervisors:
1627
    hv_class = hypervisor.GetHypervisor(hv_name)
1628
    dist_files.update(hv_class.GetAncillaryFiles())
1629

    
1630
  # 3. Perform the files upload
1631
  for fname in dist_files:
1632
    if os.path.exists(fname):
1633
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1634
      for to_node, to_result in result.items():
1635
         msg = to_result.RemoteFailMsg()
1636
         if msg:
1637
           msg = ("Copy of file %s to node %s failed: %s" %
1638
                   (fname, to_node, msg))
1639
           lu.proc.LogWarning(msg)
1640

    
1641

    
1642
class LURedistributeConfig(NoHooksLU):
1643
  """Force the redistribution of cluster configuration.
1644

1645
  This is a very simple LU.
1646

1647
  """
1648
  _OP_REQP = []
1649
  REQ_BGL = False
1650

    
1651
  def ExpandNames(self):
1652
    self.needed_locks = {
1653
      locking.LEVEL_NODE: locking.ALL_SET,
1654
    }
1655
    self.share_locks[locking.LEVEL_NODE] = 1
1656

    
1657
  def CheckPrereq(self):
1658
    """Check prerequisites.
1659

1660
    """
1661

    
1662
  def Exec(self, feedback_fn):
1663
    """Redistribute the configuration.
1664

1665
    """
1666
    self.cfg.Update(self.cfg.GetClusterInfo())
1667
    _RedistributeAncillaryFiles(self)
1668

    
1669

    
1670
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1671
  """Sleep and poll for an instance's disk to sync.
1672

1673
  """
1674
  if not instance.disks:
1675
    return True
1676

    
1677
  if not oneshot:
1678
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1679

    
1680
  node = instance.primary_node
1681

    
1682
  for dev in instance.disks:
1683
    lu.cfg.SetDiskID(dev, node)
1684

    
1685
  retries = 0
1686
  while True:
1687
    max_time = 0
1688
    done = True
1689
    cumul_degraded = False
1690
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1691
    msg = rstats.RemoteFailMsg()
1692
    if msg:
1693
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1694
      retries += 1
1695
      if retries >= 10:
1696
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1697
                                 " aborting." % node)
1698
      time.sleep(6)
1699
      continue
1700
    rstats = rstats.payload
1701
    retries = 0
1702
    for i, mstat in enumerate(rstats):
1703
      if mstat is None:
1704
        lu.LogWarning("Can't compute data for node %s/%s",
1705
                           node, instance.disks[i].iv_name)
1706
        continue
1707
      # we ignore the ldisk parameter
1708
      perc_done, est_time, is_degraded, _ = mstat
1709
      cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1710
      if perc_done is not None:
1711
        done = False
1712
        if est_time is not None:
1713
          rem_time = "%d estimated seconds remaining" % est_time
1714
          max_time = est_time
1715
        else:
1716
          rem_time = "no time estimate"
1717
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1718
                        (instance.disks[i].iv_name, perc_done, rem_time))
1719
    if done or oneshot:
1720
      break
1721

    
1722
    time.sleep(min(60, max_time))
1723

    
1724
  if done:
1725
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1726
  return not cumul_degraded
1727

    
1728

    
1729
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
1730
  """Check that mirrors are not degraded.
1731

1732
  The ldisk parameter, if True, will change the test from the
1733
  is_degraded attribute (which represents overall non-ok status for
1734
  the device(s)) to the ldisk (representing the local storage status).
1735

1736
  """
1737
  lu.cfg.SetDiskID(dev, node)
1738
  if ldisk:
1739
    idx = 6
1740
  else:
1741
    idx = 5
1742

    
1743
  result = True
1744
  if on_primary or dev.AssembleOnSecondary():
1745
    rstats = lu.rpc.call_blockdev_find(node, dev)
1746
    msg = rstats.RemoteFailMsg()
1747
    if msg:
1748
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
1749
      result = False
1750
    elif not rstats.payload:
1751
      lu.LogWarning("Can't find disk on node %s", node)
1752
      result = False
1753
    else:
1754
      result = result and (not rstats.payload[idx])
1755
  if dev.children:
1756
    for child in dev.children:
1757
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
1758

    
1759
  return result
1760

    
1761

    
1762
class LUDiagnoseOS(NoHooksLU):
1763
  """Logical unit for OS diagnose/query.
1764

1765
  """
1766
  _OP_REQP = ["output_fields", "names"]
1767
  REQ_BGL = False
1768
  _FIELDS_STATIC = utils.FieldSet()
1769
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
1770

    
1771
  def ExpandNames(self):
1772
    if self.op.names:
1773
      raise errors.OpPrereqError("Selective OS query not supported")
1774

    
1775
    _CheckOutputFields(static=self._FIELDS_STATIC,
1776
                       dynamic=self._FIELDS_DYNAMIC,
1777
                       selected=self.op.output_fields)
1778

    
1779
    # Lock all nodes, in shared mode
1780
    # Temporary removal of locks, should be reverted later
1781
    # TODO: reintroduce locks when they are lighter-weight
1782
    self.needed_locks = {}
1783
    #self.share_locks[locking.LEVEL_NODE] = 1
1784
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1785

    
1786
  def CheckPrereq(self):
1787
    """Check prerequisites.
1788

1789
    """
1790

    
1791
  @staticmethod
1792
  def _DiagnoseByOS(node_list, rlist):
1793
    """Remaps a per-node return list into an a per-os per-node dictionary
1794

1795
    @param node_list: a list with the names of all nodes
1796
    @param rlist: a map with node names as keys and OS objects as values
1797

1798
    @rtype: dict
1799
    @return: a dictionary with osnames as keys and as value another map, with
1800
        nodes as keys and list of OS objects as values, eg::
1801

1802
          {"debian-etch": {"node1": [<object>,...],
1803
                           "node2": [<object>,]}
1804
          }
1805

1806
    """
1807
    all_os = {}
1808
    # we build here the list of nodes that didn't fail the RPC (at RPC
1809
    # level), so that nodes with a non-responding node daemon don't
1810
    # make all OSes invalid
1811
    good_nodes = [node_name for node_name in rlist
1812
                  if not rlist[node_name].failed]
1813
    for node_name, nr in rlist.iteritems():
1814
      if nr.failed or not nr.data:
1815
        continue
1816
      for os_obj in nr.data:
1817
        if os_obj.name not in all_os:
1818
          # build a list of nodes for this os containing empty lists
1819
          # for each node in node_list
1820
          all_os[os_obj.name] = {}
1821
          for nname in good_nodes:
1822
            all_os[os_obj.name][nname] = []
1823
        all_os[os_obj.name][node_name].append(os_obj)
1824
    return all_os
1825

    
1826
  def Exec(self, feedback_fn):
1827
    """Compute the list of OSes.
1828

1829
    """
1830
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
1831
    node_data = self.rpc.call_os_diagnose(valid_nodes)
1832
    if node_data == False:
1833
      raise errors.OpExecError("Can't gather the list of OSes")
1834
    pol = self._DiagnoseByOS(valid_nodes, node_data)
1835
    output = []
1836
    for os_name, os_data in pol.iteritems():
1837
      row = []
1838
      for field in self.op.output_fields:
1839
        if field == "name":
1840
          val = os_name
1841
        elif field == "valid":
1842
          val = utils.all([osl and osl[0] for osl in os_data.values()])
1843
        elif field == "node_status":
1844
          val = {}
1845
          for node_name, nos_list in os_data.iteritems():
1846
            val[node_name] = [(v.status, v.path) for v in nos_list]
1847
        else:
1848
          raise errors.ParameterError(field)
1849
        row.append(val)
1850
      output.append(row)
1851

    
1852
    return output
1853

    
1854

    
1855
class LURemoveNode(LogicalUnit):
1856
  """Logical unit for removing a node.
1857

1858
  """
1859
  HPATH = "node-remove"
1860
  HTYPE = constants.HTYPE_NODE
1861
  _OP_REQP = ["node_name"]
1862

    
1863
  def BuildHooksEnv(self):
1864
    """Build hooks env.
1865

1866
    This doesn't run on the target node in the pre phase as a failed
1867
    node would then be impossible to remove.
1868

1869
    """
1870
    env = {
1871
      "OP_TARGET": self.op.node_name,
1872
      "NODE_NAME": self.op.node_name,
1873
      }
1874
    all_nodes = self.cfg.GetNodeList()
1875
    all_nodes.remove(self.op.node_name)
1876
    return env, all_nodes, all_nodes
1877

    
1878
  def CheckPrereq(self):
1879
    """Check prerequisites.
1880

1881
    This checks:
1882
     - the node exists in the configuration
1883
     - it does not have primary or secondary instances
1884
     - it's not the master
1885

1886
    Any errors are signalled by raising errors.OpPrereqError.
1887

1888
    """
1889
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1890
    if node is None:
1891
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1892

    
1893
    instance_list = self.cfg.GetInstanceList()
1894

    
1895
    masternode = self.cfg.GetMasterNode()
1896
    if node.name == masternode:
1897
      raise errors.OpPrereqError("Node is the master node,"
1898
                                 " you need to failover first.")
1899

    
1900
    for instance_name in instance_list:
1901
      instance = self.cfg.GetInstanceInfo(instance_name)
1902
      if node.name in instance.all_nodes:
1903
        raise errors.OpPrereqError("Instance %s is still running on the node,"
1904
                                   " please remove first." % instance_name)
1905
    self.op.node_name = node.name
1906
    self.node = node
1907

    
1908
  def Exec(self, feedback_fn):
1909
    """Removes the node from the cluster.
1910

1911
    """
1912
    node = self.node
1913
    logging.info("Stopping the node daemon and removing configs from node %s",
1914
                 node.name)
1915

    
1916
    self.context.RemoveNode(node.name)
1917

    
1918
    self.rpc.call_node_leave_cluster(node.name)
1919

    
1920
    # Promote nodes to master candidate as needed
1921
    _AdjustCandidatePool(self)
1922

    
1923

    
1924
class LUQueryNodes(NoHooksLU):
1925
  """Logical unit for querying nodes.
1926

1927
  """
1928
  _OP_REQP = ["output_fields", "names", "use_locking"]
1929
  REQ_BGL = False
1930
  _FIELDS_DYNAMIC = utils.FieldSet(
1931
    "dtotal", "dfree",
1932
    "mtotal", "mnode", "mfree",
1933
    "bootid",
1934
    "ctotal", "cnodes", "csockets",
1935
    )
1936

    
1937
  _FIELDS_STATIC = utils.FieldSet(
1938
    "name", "pinst_cnt", "sinst_cnt",
1939
    "pinst_list", "sinst_list",
1940
    "pip", "sip", "tags",
1941
    "serial_no",
1942
    "master_candidate",
1943
    "master",
1944
    "offline",
1945
    "drained",
1946
    )
1947

    
1948
  def ExpandNames(self):
1949
    _CheckOutputFields(static=self._FIELDS_STATIC,
1950
                       dynamic=self._FIELDS_DYNAMIC,
1951
                       selected=self.op.output_fields)
1952

    
1953
    self.needed_locks = {}
1954
    self.share_locks[locking.LEVEL_NODE] = 1
1955

    
1956
    if self.op.names:
1957
      self.wanted = _GetWantedNodes(self, self.op.names)
1958
    else:
1959
      self.wanted = locking.ALL_SET
1960

    
1961
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
1962
    self.do_locking = self.do_node_query and self.op.use_locking
1963
    if self.do_locking:
1964
      # if we don't request only static fields, we need to lock the nodes
1965
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
1966

    
1967

    
1968
  def CheckPrereq(self):
1969
    """Check prerequisites.
1970

1971
    """
1972
    # The validation of the node list is done in the _GetWantedNodes,
1973
    # if non empty, and if empty, there's no validation to do
1974
    pass
1975

    
1976
  def Exec(self, feedback_fn):
1977
    """Computes the list of nodes and their attributes.
1978

1979
    """
1980
    all_info = self.cfg.GetAllNodesInfo()
1981
    if self.do_locking:
1982
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
1983
    elif self.wanted != locking.ALL_SET:
1984
      nodenames = self.wanted
1985
      missing = set(nodenames).difference(all_info.keys())
1986
      if missing:
1987
        raise errors.OpExecError(
1988
          "Some nodes were removed before retrieving their data: %s" % missing)
1989
    else:
1990
      nodenames = all_info.keys()
1991

    
1992
    nodenames = utils.NiceSort(nodenames)
1993
    nodelist = [all_info[name] for name in nodenames]
1994

    
1995
    # begin data gathering
1996

    
1997
    if self.do_node_query:
1998
      live_data = {}
1999
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2000
                                          self.cfg.GetHypervisorType())
2001
      for name in nodenames:
2002
        nodeinfo = node_data[name]
2003
        if not nodeinfo.RemoteFailMsg() and nodeinfo.payload:
2004
          nodeinfo = nodeinfo.payload
2005
          fn = utils.TryConvert
2006
          live_data[name] = {
2007
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2008
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2009
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2010
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2011
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2012
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2013
            "bootid": nodeinfo.get('bootid', None),
2014
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2015
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2016
            }
2017
        else:
2018
          live_data[name] = {}
2019
    else:
2020
      live_data = dict.fromkeys(nodenames, {})
2021

    
2022
    node_to_primary = dict([(name, set()) for name in nodenames])
2023
    node_to_secondary = dict([(name, set()) for name in nodenames])
2024

    
2025
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2026
                             "sinst_cnt", "sinst_list"))
2027
    if inst_fields & frozenset(self.op.output_fields):
2028
      instancelist = self.cfg.GetInstanceList()
2029

    
2030
      for instance_name in instancelist:
2031
        inst = self.cfg.GetInstanceInfo(instance_name)
2032
        if inst.primary_node in node_to_primary:
2033
          node_to_primary[inst.primary_node].add(inst.name)
2034
        for secnode in inst.secondary_nodes:
2035
          if secnode in node_to_secondary:
2036
            node_to_secondary[secnode].add(inst.name)
2037

    
2038
    master_node = self.cfg.GetMasterNode()
2039

    
2040
    # end data gathering
2041

    
2042
    output = []
2043
    for node in nodelist:
2044
      node_output = []
2045
      for field in self.op.output_fields:
2046
        if field == "name":
2047
          val = node.name
2048
        elif field == "pinst_list":
2049
          val = list(node_to_primary[node.name])
2050
        elif field == "sinst_list":
2051
          val = list(node_to_secondary[node.name])
2052
        elif field == "pinst_cnt":
2053
          val = len(node_to_primary[node.name])
2054
        elif field == "sinst_cnt":
2055
          val = len(node_to_secondary[node.name])
2056
        elif field == "pip":
2057
          val = node.primary_ip
2058
        elif field == "sip":
2059
          val = node.secondary_ip
2060
        elif field == "tags":
2061
          val = list(node.GetTags())
2062
        elif field == "serial_no":
2063
          val = node.serial_no
2064
        elif field == "master_candidate":
2065
          val = node.master_candidate
2066
        elif field == "master":
2067
          val = node.name == master_node
2068
        elif field == "offline":
2069
          val = node.offline
2070
        elif field == "drained":
2071
          val = node.drained
2072
        elif self._FIELDS_DYNAMIC.Matches(field):
2073
          val = live_data[node.name].get(field, None)
2074
        else:
2075
          raise errors.ParameterError(field)
2076
        node_output.append(val)
2077
      output.append(node_output)
2078

    
2079
    return output
2080

    
2081

    
2082
class LUQueryNodeVolumes(NoHooksLU):
2083
  """Logical unit for getting volumes on node(s).
2084

2085
  """
2086
  _OP_REQP = ["nodes", "output_fields"]
2087
  REQ_BGL = False
2088
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2089
  _FIELDS_STATIC = utils.FieldSet("node")
2090

    
2091
  def ExpandNames(self):
2092
    _CheckOutputFields(static=self._FIELDS_STATIC,
2093
                       dynamic=self._FIELDS_DYNAMIC,
2094
                       selected=self.op.output_fields)
2095

    
2096
    self.needed_locks = {}
2097
    self.share_locks[locking.LEVEL_NODE] = 1
2098
    if not self.op.nodes:
2099
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2100
    else:
2101
      self.needed_locks[locking.LEVEL_NODE] = \
2102
        _GetWantedNodes(self, self.op.nodes)
2103

    
2104
  def CheckPrereq(self):
2105
    """Check prerequisites.
2106

2107
    This checks that the fields required are valid output fields.
2108

2109
    """
2110
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2111

    
2112
  def Exec(self, feedback_fn):
2113
    """Computes the list of nodes and their attributes.
2114

2115
    """
2116
    nodenames = self.nodes
2117
    volumes = self.rpc.call_node_volumes(nodenames)
2118

    
2119
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2120
             in self.cfg.GetInstanceList()]
2121

    
2122
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2123

    
2124
    output = []
2125
    for node in nodenames:
2126
      if node not in volumes or volumes[node].failed or not volumes[node].data:
2127
        continue
2128

    
2129
      node_vols = volumes[node].data[:]
2130
      node_vols.sort(key=lambda vol: vol['dev'])
2131

    
2132
      for vol in node_vols:
2133
        node_output = []
2134
        for field in self.op.output_fields:
2135
          if field == "node":
2136
            val = node
2137
          elif field == "phys":
2138
            val = vol['dev']
2139
          elif field == "vg":
2140
            val = vol['vg']
2141
          elif field == "name":
2142
            val = vol['name']
2143
          elif field == "size":
2144
            val = int(float(vol['size']))
2145
          elif field == "instance":
2146
            for inst in ilist:
2147
              if node not in lv_by_node[inst]:
2148
                continue
2149
              if vol['name'] in lv_by_node[inst][node]:
2150
                val = inst.name
2151
                break
2152
            else:
2153
              val = '-'
2154
          else:
2155
            raise errors.ParameterError(field)
2156
          node_output.append(str(val))
2157

    
2158
        output.append(node_output)
2159

    
2160
    return output
2161

    
2162

    
2163
class LUAddNode(LogicalUnit):
2164
  """Logical unit for adding node to the cluster.
2165

2166
  """
2167
  HPATH = "node-add"
2168
  HTYPE = constants.HTYPE_NODE
2169
  _OP_REQP = ["node_name"]
2170

    
2171
  def BuildHooksEnv(self):
2172
    """Build hooks env.
2173

2174
    This will run on all nodes before, and on all nodes + the new node after.
2175

2176
    """
2177
    env = {
2178
      "OP_TARGET": self.op.node_name,
2179
      "NODE_NAME": self.op.node_name,
2180
      "NODE_PIP": self.op.primary_ip,
2181
      "NODE_SIP": self.op.secondary_ip,
2182
      }
2183
    nodes_0 = self.cfg.GetNodeList()
2184
    nodes_1 = nodes_0 + [self.op.node_name, ]
2185
    return env, nodes_0, nodes_1
2186

    
2187
  def CheckPrereq(self):
2188
    """Check prerequisites.
2189

2190
    This checks:
2191
     - the new node is not already in the config
2192
     - it is resolvable
2193
     - its parameters (single/dual homed) matches the cluster
2194

2195
    Any errors are signalled by raising errors.OpPrereqError.
2196

2197
    """
2198
    node_name = self.op.node_name
2199
    cfg = self.cfg
2200

    
2201
    dns_data = utils.HostInfo(node_name)
2202

    
2203
    node = dns_data.name
2204
    primary_ip = self.op.primary_ip = dns_data.ip
2205
    secondary_ip = getattr(self.op, "secondary_ip", None)
2206
    if secondary_ip is None:
2207
      secondary_ip = primary_ip
2208
    if not utils.IsValidIP(secondary_ip):
2209
      raise errors.OpPrereqError("Invalid secondary IP given")
2210
    self.op.secondary_ip = secondary_ip
2211

    
2212
    node_list = cfg.GetNodeList()
2213
    if not self.op.readd and node in node_list:
2214
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2215
                                 node)
2216
    elif self.op.readd and node not in node_list:
2217
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2218

    
2219
    for existing_node_name in node_list:
2220
      existing_node = cfg.GetNodeInfo(existing_node_name)
2221

    
2222
      if self.op.readd and node == existing_node_name:
2223
        if (existing_node.primary_ip != primary_ip or
2224
            existing_node.secondary_ip != secondary_ip):
2225
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2226
                                     " address configuration as before")
2227
        continue
2228

    
2229
      if (existing_node.primary_ip == primary_ip or
2230
          existing_node.secondary_ip == primary_ip or
2231
          existing_node.primary_ip == secondary_ip or
2232
          existing_node.secondary_ip == secondary_ip):
2233
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2234
                                   " existing node %s" % existing_node.name)
2235

    
2236
    # check that the type of the node (single versus dual homed) is the
2237
    # same as for the master
2238
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2239
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2240
    newbie_singlehomed = secondary_ip == primary_ip
2241
    if master_singlehomed != newbie_singlehomed:
2242
      if master_singlehomed:
2243
        raise errors.OpPrereqError("The master has no private ip but the"
2244
                                   " new node has one")
2245
      else:
2246
        raise errors.OpPrereqError("The master has a private ip but the"
2247
                                   " new node doesn't have one")
2248

    
2249
    # checks reachablity
2250
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2251
      raise errors.OpPrereqError("Node not reachable by ping")
2252

    
2253
    if not newbie_singlehomed:
2254
      # check reachability from my secondary ip to newbie's secondary ip
2255
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2256
                           source=myself.secondary_ip):
2257
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2258
                                   " based ping to noded port")
2259

    
2260
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2261
    mc_now, _ = self.cfg.GetMasterCandidateStats()
2262
    master_candidate = mc_now < cp_size
2263

    
2264
    self.new_node = objects.Node(name=node,
2265
                                 primary_ip=primary_ip,
2266
                                 secondary_ip=secondary_ip,
2267
                                 master_candidate=master_candidate,
2268
                                 offline=False, drained=False)
2269

    
2270
  def Exec(self, feedback_fn):
2271
    """Adds the new node to the cluster.
2272

2273
    """
2274
    new_node = self.new_node
2275
    node = new_node.name
2276

    
2277
    # check connectivity
2278
    result = self.rpc.call_version([node])[node]
2279
    result.Raise()
2280
    if result.data:
2281
      if constants.PROTOCOL_VERSION == result.data:
2282
        logging.info("Communication to node %s fine, sw version %s match",
2283
                     node, result.data)
2284
      else:
2285
        raise errors.OpExecError("Version mismatch master version %s,"
2286
                                 " node version %s" %
2287
                                 (constants.PROTOCOL_VERSION, result.data))
2288
    else:
2289
      raise errors.OpExecError("Cannot get version from the new node")
2290

    
2291
    # setup ssh on node
2292
    logging.info("Copy ssh key to node %s", node)
2293
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2294
    keyarray = []
2295
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2296
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2297
                priv_key, pub_key]
2298

    
2299
    for i in keyfiles:
2300
      f = open(i, 'r')
2301
      try:
2302
        keyarray.append(f.read())
2303
      finally:
2304
        f.close()
2305

    
2306
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2307
                                    keyarray[2],
2308
                                    keyarray[3], keyarray[4], keyarray[5])
2309

    
2310
    msg = result.RemoteFailMsg()
2311
    if msg:
2312
      raise errors.OpExecError("Cannot transfer ssh keys to the"
2313
                               " new node: %s" % msg)
2314

    
2315
    # Add node to our /etc/hosts, and add key to known_hosts
2316
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2317
      utils.AddHostToEtcHosts(new_node.name)
2318

    
2319
    if new_node.secondary_ip != new_node.primary_ip:
2320
      result = self.rpc.call_node_has_ip_address(new_node.name,
2321
                                                 new_node.secondary_ip)
2322
      msg = result.RemoteFailMsg()
2323
      if msg:
2324
        raise errors.OpPrereqError("Failure checking secondary ip"
2325
                                   " on node %s: %s" % (new_node.name, msg))
2326
      if not result.payload:
2327
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2328
                                 " you gave (%s). Please fix and re-run this"
2329
                                 " command." % new_node.secondary_ip)
2330

    
2331
    node_verify_list = [self.cfg.GetMasterNode()]
2332
    node_verify_param = {
2333
      'nodelist': [node],
2334
      # TODO: do a node-net-test as well?
2335
    }
2336

    
2337
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2338
                                       self.cfg.GetClusterName())
2339
    for verifier in node_verify_list:
2340
      msg = result[verifier].RemoteFailMsg()
2341
      if msg:
2342
        raise errors.OpExecError("Cannot communicate with node %s: %s" %
2343
                                 (verifier, msg))
2344
      nl_payload = result[verifier].payload['nodelist']
2345
      if nl_payload:
2346
        for failed in nl_payload:
2347
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2348
                      (verifier, nl_payload[failed]))
2349
        raise errors.OpExecError("ssh/hostname verification failed.")
2350

    
2351
    if self.op.readd:
2352
      _RedistributeAncillaryFiles(self)
2353
      self.context.ReaddNode(new_node)
2354
    else:
2355
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2356
      self.context.AddNode(new_node)
2357

    
2358

    
2359
class LUSetNodeParams(LogicalUnit):
2360
  """Modifies the parameters of a node.
2361

2362
  """
2363
  HPATH = "node-modify"
2364
  HTYPE = constants.HTYPE_NODE
2365
  _OP_REQP = ["node_name"]
2366
  REQ_BGL = False
2367

    
2368
  def CheckArguments(self):
2369
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2370
    if node_name is None:
2371
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2372
    self.op.node_name = node_name
2373
    _CheckBooleanOpField(self.op, 'master_candidate')
2374
    _CheckBooleanOpField(self.op, 'offline')
2375
    _CheckBooleanOpField(self.op, 'drained')
2376
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2377
    if all_mods.count(None) == 3:
2378
      raise errors.OpPrereqError("Please pass at least one modification")
2379
    if all_mods.count(True) > 1:
2380
      raise errors.OpPrereqError("Can't set the node into more than one"
2381
                                 " state at the same time")
2382

    
2383
  def ExpandNames(self):
2384
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2385

    
2386
  def BuildHooksEnv(self):
2387
    """Build hooks env.
2388

2389
    This runs on the master node.
2390

2391
    """
2392
    env = {
2393
      "OP_TARGET": self.op.node_name,
2394
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2395
      "OFFLINE": str(self.op.offline),
2396
      "DRAINED": str(self.op.drained),
2397
      }
2398
    nl = [self.cfg.GetMasterNode(),
2399
          self.op.node_name]
2400
    return env, nl, nl
2401

    
2402
  def CheckPrereq(self):
2403
    """Check prerequisites.
2404

2405
    This only checks the instance list against the existing names.
2406

2407
    """
2408
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2409

    
2410
    if ((self.op.master_candidate == False or self.op.offline == True or
2411
         self.op.drained == True) and node.master_candidate):
2412
      # we will demote the node from master_candidate
2413
      if self.op.node_name == self.cfg.GetMasterNode():
2414
        raise errors.OpPrereqError("The master node has to be a"
2415
                                   " master candidate, online and not drained")
2416
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2417
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2418
      if num_candidates <= cp_size:
2419
        msg = ("Not enough master candidates (desired"
2420
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2421
        if self.op.force:
2422
          self.LogWarning(msg)
2423
        else:
2424
          raise errors.OpPrereqError(msg)
2425

    
2426
    if (self.op.master_candidate == True and
2427
        ((node.offline and not self.op.offline == False) or
2428
         (node.drained and not self.op.drained == False))):
2429
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2430
                                 " to master_candidate" % node.name)
2431

    
2432
    return
2433

    
2434
  def Exec(self, feedback_fn):
2435
    """Modifies a node.
2436

2437
    """
2438
    node = self.node
2439

    
2440
    result = []
2441
    changed_mc = False
2442

    
2443
    if self.op.offline is not None:
2444
      node.offline = self.op.offline
2445
      result.append(("offline", str(self.op.offline)))
2446
      if self.op.offline == True:
2447
        if node.master_candidate:
2448
          node.master_candidate = False
2449
          changed_mc = True
2450
          result.append(("master_candidate", "auto-demotion due to offline"))
2451
        if node.drained:
2452
          node.drained = False
2453
          result.append(("drained", "clear drained status due to offline"))
2454

    
2455
    if self.op.master_candidate is not None:
2456
      node.master_candidate = self.op.master_candidate
2457
      changed_mc = True
2458
      result.append(("master_candidate", str(self.op.master_candidate)))
2459
      if self.op.master_candidate == False:
2460
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2461
        msg = rrc.RemoteFailMsg()
2462
        if msg:
2463
          self.LogWarning("Node failed to demote itself: %s" % msg)
2464

    
2465
    if self.op.drained is not None:
2466
      node.drained = self.op.drained
2467
      result.append(("drained", str(self.op.drained)))
2468
      if self.op.drained == True:
2469
        if node.master_candidate:
2470
          node.master_candidate = False
2471
          changed_mc = True
2472
          result.append(("master_candidate", "auto-demotion due to drain"))
2473
        if node.offline:
2474
          node.offline = False
2475
          result.append(("offline", "clear offline status due to drain"))
2476

    
2477
    # this will trigger configuration file update, if needed
2478
    self.cfg.Update(node)
2479
    # this will trigger job queue propagation or cleanup
2480
    if changed_mc:
2481
      self.context.ReaddNode(node)
2482

    
2483
    return result
2484

    
2485

    
2486
class LUPowercycleNode(NoHooksLU):
2487
  """Powercycles a node.
2488

2489
  """
2490
  _OP_REQP = ["node_name", "force"]
2491
  REQ_BGL = False
2492

    
2493
  def CheckArguments(self):
2494
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2495
    if node_name is None:
2496
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2497
    self.op.node_name = node_name
2498
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
2499
      raise errors.OpPrereqError("The node is the master and the force"
2500
                                 " parameter was not set")
2501

    
2502
  def ExpandNames(self):
2503
    """Locking for PowercycleNode.
2504

2505
    This is a last-resource option and shouldn't block on other
2506
    jobs. Therefore, we grab no locks.
2507

2508
    """
2509
    self.needed_locks = {}
2510

    
2511
  def CheckPrereq(self):
2512
    """Check prerequisites.
2513

2514
    This LU has no prereqs.
2515

2516
    """
2517
    pass
2518

    
2519
  def Exec(self, feedback_fn):
2520
    """Reboots a node.
2521

2522
    """
2523
    result = self.rpc.call_node_powercycle(self.op.node_name,
2524
                                           self.cfg.GetHypervisorType())
2525
    msg = result.RemoteFailMsg()
2526
    if msg:
2527
      raise errors.OpExecError("Failed to schedule the reboot: %s" % msg)
2528
    return result.payload
2529

    
2530

    
2531
class LUQueryClusterInfo(NoHooksLU):
2532
  """Query cluster configuration.
2533

2534
  """
2535
  _OP_REQP = []
2536
  REQ_BGL = False
2537

    
2538
  def ExpandNames(self):
2539
    self.needed_locks = {}
2540

    
2541
  def CheckPrereq(self):
2542
    """No prerequsites needed for this LU.
2543

2544
    """
2545
    pass
2546

    
2547
  def Exec(self, feedback_fn):
2548
    """Return cluster config.
2549

2550
    """
2551
    cluster = self.cfg.GetClusterInfo()
2552
    result = {
2553
      "software_version": constants.RELEASE_VERSION,
2554
      "protocol_version": constants.PROTOCOL_VERSION,
2555
      "config_version": constants.CONFIG_VERSION,
2556
      "os_api_version": constants.OS_API_VERSION,
2557
      "export_version": constants.EXPORT_VERSION,
2558
      "architecture": (platform.architecture()[0], platform.machine()),
2559
      "name": cluster.cluster_name,
2560
      "master": cluster.master_node,
2561
      "default_hypervisor": cluster.default_hypervisor,
2562
      "enabled_hypervisors": cluster.enabled_hypervisors,
2563
      "hvparams": dict([(hypervisor, cluster.hvparams[hypervisor])
2564
                        for hypervisor in cluster.enabled_hypervisors]),
2565
      "beparams": cluster.beparams,
2566
      "nicparams": cluster.nicparams,
2567
      "candidate_pool_size": cluster.candidate_pool_size,
2568
      "master_netdev": cluster.master_netdev,
2569
      "volume_group_name": cluster.volume_group_name,
2570
      "file_storage_dir": cluster.file_storage_dir,
2571
      }
2572

    
2573
    return result
2574

    
2575

    
2576
class LUQueryConfigValues(NoHooksLU):
2577
  """Return configuration values.
2578

2579
  """
2580
  _OP_REQP = []
2581
  REQ_BGL = False
2582
  _FIELDS_DYNAMIC = utils.FieldSet()
2583
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
2584

    
2585
  def ExpandNames(self):
2586
    self.needed_locks = {}
2587

    
2588
    _CheckOutputFields(static=self._FIELDS_STATIC,
2589
                       dynamic=self._FIELDS_DYNAMIC,
2590
                       selected=self.op.output_fields)
2591

    
2592
  def CheckPrereq(self):
2593
    """No prerequisites.
2594

2595
    """
2596
    pass
2597

    
2598
  def Exec(self, feedback_fn):
2599
    """Dump a representation of the cluster config to the standard output.
2600

2601
    """
2602
    values = []
2603
    for field in self.op.output_fields:
2604
      if field == "cluster_name":
2605
        entry = self.cfg.GetClusterName()
2606
      elif field == "master_node":
2607
        entry = self.cfg.GetMasterNode()
2608
      elif field == "drain_flag":
2609
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
2610
      else:
2611
        raise errors.ParameterError(field)
2612
      values.append(entry)
2613
    return values
2614

    
2615

    
2616
class LUActivateInstanceDisks(NoHooksLU):
2617
  """Bring up an instance's disks.
2618

2619
  """
2620
  _OP_REQP = ["instance_name"]
2621
  REQ_BGL = False
2622

    
2623
  def ExpandNames(self):
2624
    self._ExpandAndLockInstance()
2625
    self.needed_locks[locking.LEVEL_NODE] = []
2626
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2627

    
2628
  def DeclareLocks(self, level):
2629
    if level == locking.LEVEL_NODE:
2630
      self._LockInstancesNodes()
2631

    
2632
  def CheckPrereq(self):
2633
    """Check prerequisites.
2634

2635
    This checks that the instance is in the cluster.
2636

2637
    """
2638
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2639
    assert self.instance is not None, \
2640
      "Cannot retrieve locked instance %s" % self.op.instance_name
2641
    _CheckNodeOnline(self, self.instance.primary_node)
2642

    
2643
  def Exec(self, feedback_fn):
2644
    """Activate the disks.
2645

2646
    """
2647
    disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
2648
    if not disks_ok:
2649
      raise errors.OpExecError("Cannot activate block devices")
2650

    
2651
    return disks_info
2652

    
2653

    
2654
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
2655
  """Prepare the block devices for an instance.
2656

2657
  This sets up the block devices on all nodes.
2658

2659
  @type lu: L{LogicalUnit}
2660
  @param lu: the logical unit on whose behalf we execute
2661
  @type instance: L{objects.Instance}
2662
  @param instance: the instance for whose disks we assemble
2663
  @type ignore_secondaries: boolean
2664
  @param ignore_secondaries: if true, errors on secondary nodes
2665
      won't result in an error return from the function
2666
  @return: False if the operation failed, otherwise a list of
2667
      (host, instance_visible_name, node_visible_name)
2668
      with the mapping from node devices to instance devices
2669

2670
  """
2671
  device_info = []
2672
  disks_ok = True
2673
  iname = instance.name
2674
  # With the two passes mechanism we try to reduce the window of
2675
  # opportunity for the race condition of switching DRBD to primary
2676
  # before handshaking occured, but we do not eliminate it
2677

    
2678
  # The proper fix would be to wait (with some limits) until the
2679
  # connection has been made and drbd transitions from WFConnection
2680
  # into any other network-connected state (Connected, SyncTarget,
2681
  # SyncSource, etc.)
2682

    
2683
  # 1st pass, assemble on all nodes in secondary mode
2684
  for inst_disk in instance.disks:
2685
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2686
      lu.cfg.SetDiskID(node_disk, node)
2687
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
2688
      msg = result.RemoteFailMsg()
2689
      if msg:
2690
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2691
                           " (is_primary=False, pass=1): %s",
2692
                           inst_disk.iv_name, node, msg)
2693
        if not ignore_secondaries:
2694
          disks_ok = False
2695

    
2696
  # FIXME: race condition on drbd migration to primary
2697

    
2698
  # 2nd pass, do only the primary node
2699
  for inst_disk in instance.disks:
2700
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2701
      if node != instance.primary_node:
2702
        continue
2703
      lu.cfg.SetDiskID(node_disk, node)
2704
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
2705
      msg = result.RemoteFailMsg()
2706
      if msg:
2707
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2708
                           " (is_primary=True, pass=2): %s",
2709
                           inst_disk.iv_name, node, msg)
2710
        disks_ok = False
2711
    device_info.append((instance.primary_node, inst_disk.iv_name,
2712
                        result.payload))
2713

    
2714
  # leave the disks configured for the primary node
2715
  # this is a workaround that would be fixed better by
2716
  # improving the logical/physical id handling
2717
  for disk in instance.disks:
2718
    lu.cfg.SetDiskID(disk, instance.primary_node)
2719

    
2720
  return disks_ok, device_info
2721

    
2722

    
2723
def _StartInstanceDisks(lu, instance, force):
2724
  """Start the disks of an instance.
2725

2726
  """
2727
  disks_ok, dummy = _AssembleInstanceDisks(lu, instance,
2728
                                           ignore_secondaries=force)
2729
  if not disks_ok:
2730
    _ShutdownInstanceDisks(lu, instance)
2731
    if force is not None and not force:
2732
      lu.proc.LogWarning("", hint="If the message above refers to a"
2733
                         " secondary node,"
2734
                         " you can retry the operation using '--force'.")
2735
    raise errors.OpExecError("Disk consistency error")
2736

    
2737

    
2738
class LUDeactivateInstanceDisks(NoHooksLU):
2739
  """Shutdown an instance's disks.
2740

2741
  """
2742
  _OP_REQP = ["instance_name"]
2743
  REQ_BGL = False
2744

    
2745
  def ExpandNames(self):
2746
    self._ExpandAndLockInstance()
2747
    self.needed_locks[locking.LEVEL_NODE] = []
2748
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2749

    
2750
  def DeclareLocks(self, level):
2751
    if level == locking.LEVEL_NODE:
2752
      self._LockInstancesNodes()
2753

    
2754
  def CheckPrereq(self):
2755
    """Check prerequisites.
2756

2757
    This checks that the instance is in the cluster.
2758

2759
    """
2760
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2761
    assert self.instance is not None, \
2762
      "Cannot retrieve locked instance %s" % self.op.instance_name
2763

    
2764
  def Exec(self, feedback_fn):
2765
    """Deactivate the disks
2766

2767
    """
2768
    instance = self.instance
2769
    _SafeShutdownInstanceDisks(self, instance)
2770

    
2771

    
2772
def _SafeShutdownInstanceDisks(lu, instance):
2773
  """Shutdown block devices of an instance.
2774

2775
  This function checks if an instance is running, before calling
2776
  _ShutdownInstanceDisks.
2777

2778
  """
2779
  pnode = instance.primary_node
2780
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])
2781
  ins_l = ins_l[pnode]
2782
  msg = ins_l.RemoteFailMsg()
2783
  if msg:
2784
    raise errors.OpExecError("Can't contact node %s: %s" % (pnode, msg))
2785

    
2786
  if instance.name in ins_l.payload:
2787
    raise errors.OpExecError("Instance is running, can't shutdown"
2788
                             " block devices.")
2789

    
2790
  _ShutdownInstanceDisks(lu, instance)
2791

    
2792

    
2793
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
2794
  """Shutdown block devices of an instance.
2795

2796
  This does the shutdown on all nodes of the instance.
2797

2798
  If the ignore_primary is false, errors on the primary node are
2799
  ignored.
2800

2801
  """
2802
  all_result = True
2803
  for disk in instance.disks:
2804
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
2805
      lu.cfg.SetDiskID(top_disk, node)
2806
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
2807
      msg = result.RemoteFailMsg()
2808
      if msg:
2809
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
2810
                      disk.iv_name, node, msg)
2811
        if not ignore_primary or node != instance.primary_node:
2812
          all_result = False
2813
  return all_result
2814

    
2815

    
2816
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
2817
  """Checks if a node has enough free memory.
2818

2819
  This function check if a given node has the needed amount of free
2820
  memory. In case the node has less memory or we cannot get the
2821
  information from the node, this function raise an OpPrereqError
2822
  exception.
2823

2824
  @type lu: C{LogicalUnit}
2825
  @param lu: a logical unit from which we get configuration data
2826
  @type node: C{str}
2827
  @param node: the node to check
2828
  @type reason: C{str}
2829
  @param reason: string to use in the error message
2830
  @type requested: C{int}
2831
  @param requested: the amount of memory in MiB to check for
2832
  @type hypervisor_name: C{str}
2833
  @param hypervisor_name: the hypervisor to ask for memory stats
2834
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
2835
      we cannot check the node
2836

2837
  """
2838
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
2839
  msg = nodeinfo[node].RemoteFailMsg()
2840
  if msg:
2841
    raise errors.OpPrereqError("Can't get data from node %s: %s" % (node, msg))
2842
  free_mem = nodeinfo[node].payload.get('memory_free', None)
2843
  if not isinstance(free_mem, int):
2844
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
2845
                               " was '%s'" % (node, free_mem))
2846
  if requested > free_mem:
2847
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
2848
                               " needed %s MiB, available %s MiB" %
2849
                               (node, reason, requested, free_mem))
2850

    
2851

    
2852
class LUStartupInstance(LogicalUnit):
2853
  """Starts an instance.
2854

2855
  """
2856
  HPATH = "instance-start"
2857
  HTYPE = constants.HTYPE_INSTANCE
2858
  _OP_REQP = ["instance_name", "force"]
2859
  REQ_BGL = False
2860

    
2861
  def ExpandNames(self):
2862
    self._ExpandAndLockInstance()
2863

    
2864
  def BuildHooksEnv(self):
2865
    """Build hooks env.
2866

2867
    This runs on master, primary and secondary nodes of the instance.
2868

2869
    """
2870
    env = {
2871
      "FORCE": self.op.force,
2872
      }
2873
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2874
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2875
    return env, nl, nl
2876

    
2877
  def CheckPrereq(self):
2878
    """Check prerequisites.
2879

2880
    This checks that the instance is in the cluster.
2881

2882
    """
2883
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2884
    assert self.instance is not None, \
2885
      "Cannot retrieve locked instance %s" % self.op.instance_name
2886

    
2887
    # extra beparams
2888
    self.beparams = getattr(self.op, "beparams", {})
2889
    if self.beparams:
2890
      if not isinstance(self.beparams, dict):
2891
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
2892
                                   " dict" % (type(self.beparams), ))
2893
      # fill the beparams dict
2894
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
2895
      self.op.beparams = self.beparams
2896

    
2897
    # extra hvparams
2898
    self.hvparams = getattr(self.op, "hvparams", {})
2899
    if self.hvparams:
2900
      if not isinstance(self.hvparams, dict):
2901
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
2902
                                   " dict" % (type(self.hvparams), ))
2903

    
2904
      # check hypervisor parameter syntax (locally)
2905
      cluster = self.cfg.GetClusterInfo()
2906
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
2907
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
2908
                                    instance.hvparams)
2909
      filled_hvp.update(self.hvparams)
2910
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
2911
      hv_type.CheckParameterSyntax(filled_hvp)
2912
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
2913
      self.op.hvparams = self.hvparams
2914

    
2915
    _CheckNodeOnline(self, instance.primary_node)
2916

    
2917
    bep = self.cfg.GetClusterInfo().FillBE(instance)
2918
    # check bridges existance
2919
    _CheckInstanceBridgesExist(self, instance)
2920

    
2921
    remote_info = self.rpc.call_instance_info(instance.primary_node,
2922
                                              instance.name,
2923
                                              instance.hypervisor)
2924
    msg = remote_info.RemoteFailMsg()
2925
    if msg:
2926
      raise errors.OpPrereqError("Error checking node %s: %s" %
2927
                                 (instance.primary_node, msg))
2928
    if not remote_info.payload: # not running already
2929
      _CheckNodeFreeMemory(self, instance.primary_node,
2930
                           "starting instance %s" % instance.name,
2931
                           bep[constants.BE_MEMORY], instance.hypervisor)
2932

    
2933
  def Exec(self, feedback_fn):
2934
    """Start the instance.
2935

2936
    """
2937
    instance = self.instance
2938
    force = self.op.force
2939

    
2940
    self.cfg.MarkInstanceUp(instance.name)
2941

    
2942
    node_current = instance.primary_node
2943

    
2944
    _StartInstanceDisks(self, instance, force)
2945

    
2946
    result = self.rpc.call_instance_start(node_current, instance,
2947
                                          self.hvparams, self.beparams)
2948
    msg = result.RemoteFailMsg()
2949
    if msg:
2950
      _ShutdownInstanceDisks(self, instance)
2951
      raise errors.OpExecError("Could not start instance: %s" % msg)
2952

    
2953

    
2954
class LURebootInstance(LogicalUnit):
2955
  """Reboot an instance.
2956

2957
  """
2958
  HPATH = "instance-reboot"
2959
  HTYPE = constants.HTYPE_INSTANCE
2960
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
2961
  REQ_BGL = False
2962

    
2963
  def ExpandNames(self):
2964
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
2965
                                   constants.INSTANCE_REBOOT_HARD,
2966
                                   constants.INSTANCE_REBOOT_FULL]:
2967
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
2968
                                  (constants.INSTANCE_REBOOT_SOFT,
2969
                                   constants.INSTANCE_REBOOT_HARD,
2970
                                   constants.INSTANCE_REBOOT_FULL))
2971
    self._ExpandAndLockInstance()
2972

    
2973
  def BuildHooksEnv(self):
2974
    """Build hooks env.
2975

2976
    This runs on master, primary and secondary nodes of the instance.
2977

2978
    """
2979
    env = {
2980
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
2981
      "REBOOT_TYPE": self.op.reboot_type,
2982
      }
2983
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2984
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2985
    return env, nl, nl
2986

    
2987
  def CheckPrereq(self):
2988
    """Check prerequisites.
2989

2990
    This checks that the instance is in the cluster.
2991

2992
    """
2993
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2994
    assert self.instance is not None, \
2995
      "Cannot retrieve locked instance %s" % self.op.instance_name
2996

    
2997
    _CheckNodeOnline(self, instance.primary_node)
2998

    
2999
    # check bridges existance
3000
    _CheckInstanceBridgesExist(self, instance)
3001

    
3002
  def Exec(self, feedback_fn):
3003
    """Reboot the instance.
3004

3005
    """
3006
    instance = self.instance
3007
    ignore_secondaries = self.op.ignore_secondaries
3008
    reboot_type = self.op.reboot_type
3009

    
3010
    node_current = instance.primary_node
3011

    
3012
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3013
                       constants.INSTANCE_REBOOT_HARD]:
3014
      for disk in instance.disks:
3015
        self.cfg.SetDiskID(disk, node_current)
3016
      result = self.rpc.call_instance_reboot(node_current, instance,
3017
                                             reboot_type)
3018
      msg = result.RemoteFailMsg()
3019
      if msg:
3020
        raise errors.OpExecError("Could not reboot instance: %s" % msg)
3021
    else:
3022
      result = self.rpc.call_instance_shutdown(node_current, instance)
3023
      msg = result.RemoteFailMsg()
3024
      if msg:
3025
        raise errors.OpExecError("Could not shutdown instance for"
3026
                                 " full reboot: %s" % msg)
3027
      _ShutdownInstanceDisks(self, instance)
3028
      _StartInstanceDisks(self, instance, ignore_secondaries)
3029
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3030
      msg = result.RemoteFailMsg()
3031
      if msg:
3032
        _ShutdownInstanceDisks(self, instance)
3033
        raise errors.OpExecError("Could not start instance for"
3034
                                 " full reboot: %s" % msg)
3035

    
3036
    self.cfg.MarkInstanceUp(instance.name)
3037

    
3038

    
3039
class LUShutdownInstance(LogicalUnit):
3040
  """Shutdown an instance.
3041

3042
  """
3043
  HPATH = "instance-stop"
3044
  HTYPE = constants.HTYPE_INSTANCE
3045
  _OP_REQP = ["instance_name"]
3046
  REQ_BGL = False
3047

    
3048
  def ExpandNames(self):
3049
    self._ExpandAndLockInstance()
3050

    
3051
  def BuildHooksEnv(self):
3052
    """Build hooks env.
3053

3054
    This runs on master, primary and secondary nodes of the instance.
3055

3056
    """
3057
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3058
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3059
    return env, nl, nl
3060

    
3061
  def CheckPrereq(self):
3062
    """Check prerequisites.
3063

3064
    This checks that the instance is in the cluster.
3065

3066
    """
3067
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3068
    assert self.instance is not None, \
3069
      "Cannot retrieve locked instance %s" % self.op.instance_name
3070
    _CheckNodeOnline(self, self.instance.primary_node)
3071

    
3072
  def Exec(self, feedback_fn):
3073
    """Shutdown the instance.
3074

3075
    """
3076
    instance = self.instance
3077
    node_current = instance.primary_node
3078
    self.cfg.MarkInstanceDown(instance.name)
3079
    result = self.rpc.call_instance_shutdown(node_current, instance)
3080
    msg = result.RemoteFailMsg()
3081
    if msg:
3082
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3083

    
3084
    _ShutdownInstanceDisks(self, instance)
3085

    
3086

    
3087
class LUReinstallInstance(LogicalUnit):
3088
  """Reinstall an instance.
3089

3090
  """
3091
  HPATH = "instance-reinstall"
3092
  HTYPE = constants.HTYPE_INSTANCE
3093
  _OP_REQP = ["instance_name"]
3094
  REQ_BGL = False
3095

    
3096
  def ExpandNames(self):
3097
    self._ExpandAndLockInstance()
3098

    
3099
  def BuildHooksEnv(self):
3100
    """Build hooks env.
3101

3102
    This runs on master, primary and secondary nodes of the instance.
3103

3104
    """
3105
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3106
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3107
    return env, nl, nl
3108

    
3109
  def CheckPrereq(self):
3110
    """Check prerequisites.
3111

3112
    This checks that the instance is in the cluster and is not running.
3113

3114
    """
3115
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3116
    assert instance is not None, \
3117
      "Cannot retrieve locked instance %s" % self.op.instance_name
3118
    _CheckNodeOnline(self, instance.primary_node)
3119

    
3120
    if instance.disk_template == constants.DT_DISKLESS:
3121
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3122
                                 self.op.instance_name)
3123
    if instance.admin_up:
3124
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3125
                                 self.op.instance_name)
3126
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3127
                                              instance.name,
3128
                                              instance.hypervisor)
3129
    msg = remote_info.RemoteFailMsg()
3130
    if msg:
3131
      raise errors.OpPrereqError("Error checking node %s: %s" %
3132
                                 (instance.primary_node, msg))
3133
    if remote_info.payload:
3134
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3135
                                 (self.op.instance_name,
3136
                                  instance.primary_node))
3137

    
3138
    self.op.os_type = getattr(self.op, "os_type", None)
3139
    if self.op.os_type is not None:
3140
      # OS verification
3141
      pnode = self.cfg.GetNodeInfo(
3142
        self.cfg.ExpandNodeName(instance.primary_node))
3143
      if pnode is None:
3144
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3145
                                   self.op.pnode)
3146
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3147
      result.Raise()
3148
      if not isinstance(result.data, objects.OS):
3149
        raise errors.OpPrereqError("OS '%s' not in supported OS list for"
3150
                                   " primary node"  % self.op.os_type)
3151

    
3152
    self.instance = instance
3153

    
3154
  def Exec(self, feedback_fn):
3155
    """Reinstall the instance.
3156

3157
    """
3158
    inst = self.instance
3159

    
3160
    if self.op.os_type is not None:
3161
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3162
      inst.os = self.op.os_type
3163
      self.cfg.Update(inst)
3164

    
3165
    _StartInstanceDisks(self, inst, None)
3166
    try:
3167
      feedback_fn("Running the instance OS create scripts...")
3168
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3169
      msg = result.RemoteFailMsg()
3170
      if msg:
3171
        raise errors.OpExecError("Could not install OS for instance %s"
3172
                                 " on node %s: %s" %
3173
                                 (inst.name, inst.primary_node, msg))
3174
    finally:
3175
      _ShutdownInstanceDisks(self, inst)
3176

    
3177

    
3178
class LURenameInstance(LogicalUnit):
3179
  """Rename an instance.
3180

3181
  """
3182
  HPATH = "instance-rename"
3183
  HTYPE = constants.HTYPE_INSTANCE
3184
  _OP_REQP = ["instance_name", "new_name"]
3185

    
3186
  def BuildHooksEnv(self):
3187
    """Build hooks env.
3188

3189
    This runs on master, primary and secondary nodes of the instance.
3190

3191
    """
3192
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3193
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3194
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3195
    return env, nl, nl
3196

    
3197
  def CheckPrereq(self):
3198
    """Check prerequisites.
3199

3200
    This checks that the instance is in the cluster and is not running.
3201

3202
    """
3203
    instance = self.cfg.GetInstanceInfo(
3204
      self.cfg.ExpandInstanceName(self.op.instance_name))
3205
    if instance is None:
3206
      raise errors.OpPrereqError("Instance '%s' not known" %
3207
                                 self.op.instance_name)
3208
    _CheckNodeOnline(self, instance.primary_node)
3209

    
3210
    if instance.admin_up:
3211
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3212
                                 self.op.instance_name)
3213
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3214
                                              instance.name,
3215
                                              instance.hypervisor)
3216
    msg = remote_info.RemoteFailMsg()
3217
    if msg:
3218
      raise errors.OpPrereqError("Error checking node %s: %s" %
3219
                                 (instance.primary_node, msg))
3220
    if remote_info.payload:
3221
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3222
                                 (self.op.instance_name,
3223
                                  instance.primary_node))
3224
    self.instance = instance
3225

    
3226
    # new name verification
3227
    name_info = utils.HostInfo(self.op.new_name)
3228

    
3229
    self.op.new_name = new_name = name_info.name
3230
    instance_list = self.cfg.GetInstanceList()
3231
    if new_name in instance_list:
3232
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3233
                                 new_name)
3234

    
3235
    if not getattr(self.op, "ignore_ip", False):
3236
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3237
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3238
                                   (name_info.ip, new_name))
3239

    
3240

    
3241
  def Exec(self, feedback_fn):
3242
    """Reinstall the instance.
3243

3244
    """
3245
    inst = self.instance
3246
    old_name = inst.name
3247

    
3248
    if inst.disk_template == constants.DT_FILE:
3249
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3250

    
3251
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3252
    # Change the instance lock. This is definitely safe while we hold the BGL
3253
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3254
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3255

    
3256
    # re-read the instance from the configuration after rename
3257
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3258

    
3259
    if inst.disk_template == constants.DT_FILE:
3260
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3261
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3262
                                                     old_file_storage_dir,
3263
                                                     new_file_storage_dir)
3264
      result.Raise()
3265
      if not result.data:
3266
        raise errors.OpExecError("Could not connect to node '%s' to rename"
3267
                                 " directory '%s' to '%s' (but the instance"
3268
                                 " has been renamed in Ganeti)" % (
3269
                                 inst.primary_node, old_file_storage_dir,
3270
                                 new_file_storage_dir))
3271

    
3272
      if not result.data[0]:
3273
        raise errors.OpExecError("Could not rename directory '%s' to '%s'"
3274
                                 " (but the instance has been renamed in"
3275
                                 " Ganeti)" % (old_file_storage_dir,
3276
                                               new_file_storage_dir))
3277

    
3278
    _StartInstanceDisks(self, inst, None)
3279
    try:
3280
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3281
                                                 old_name)
3282
      msg = result.RemoteFailMsg()
3283
      if msg:
3284
        msg = ("Could not run OS rename script for instance %s on node %s"
3285
               " (but the instance has been renamed in Ganeti): %s" %
3286
               (inst.name, inst.primary_node, msg))
3287
        self.proc.LogWarning(msg)
3288
    finally:
3289
      _ShutdownInstanceDisks(self, inst)
3290

    
3291

    
3292
class LURemoveInstance(LogicalUnit):
3293
  """Remove an instance.
3294

3295
  """
3296
  HPATH = "instance-remove"
3297
  HTYPE = constants.HTYPE_INSTANCE
3298
  _OP_REQP = ["instance_name", "ignore_failures"]
3299
  REQ_BGL = False
3300

    
3301
  def ExpandNames(self):
3302
    self._ExpandAndLockInstance()
3303
    self.needed_locks[locking.LEVEL_NODE] = []
3304
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3305

    
3306
  def DeclareLocks(self, level):
3307
    if level == locking.LEVEL_NODE:
3308
      self._LockInstancesNodes()
3309

    
3310
  def BuildHooksEnv(self):
3311
    """Build hooks env.
3312

3313
    This runs on master, primary and secondary nodes of the instance.
3314

3315
    """
3316
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3317
    nl = [self.cfg.GetMasterNode()]
3318
    return env, nl, nl
3319

    
3320
  def CheckPrereq(self):
3321
    """Check prerequisites.
3322

3323
    This checks that the instance is in the cluster.
3324

3325
    """
3326
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3327
    assert self.instance is not None, \
3328
      "Cannot retrieve locked instance %s" % self.op.instance_name
3329

    
3330
  def Exec(self, feedback_fn):
3331
    """Remove the instance.
3332

3333
    """
3334
    instance = self.instance
3335
    logging.info("Shutting down instance %s on node %s",
3336
                 instance.name, instance.primary_node)
3337

    
3338
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3339
    msg = result.RemoteFailMsg()
3340
    if msg:
3341
      if self.op.ignore_failures:
3342
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3343
      else:
3344
        raise errors.OpExecError("Could not shutdown instance %s on"
3345
                                 " node %s: %s" %
3346
                                 (instance.name, instance.primary_node, msg))
3347

    
3348
    logging.info("Removing block devices for instance %s", instance.name)
3349

    
3350
    if not _RemoveDisks(self, instance):
3351
      if self.op.ignore_failures:
3352
        feedback_fn("Warning: can't remove instance's disks")
3353
      else:
3354
        raise errors.OpExecError("Can't remove instance's disks")
3355

    
3356
    logging.info("Removing instance %s out of cluster config", instance.name)
3357

    
3358
    self.cfg.RemoveInstance(instance.name)
3359
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3360

    
3361

    
3362
class LUQueryInstances(NoHooksLU):
3363
  """Logical unit for querying instances.
3364

3365
  """
3366
  _OP_REQP = ["output_fields", "names", "use_locking"]
3367
  REQ_BGL = False
3368
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3369
                                    "admin_state",
3370
                                    "disk_template", "ip", "mac", "bridge",
3371
                                    "sda_size", "sdb_size", "vcpus", "tags",
3372
                                    "network_port", "beparams",
3373
                                    r"(disk)\.(size)/([0-9]+)",
3374
                                    r"(disk)\.(sizes)", "disk_usage",
3375
                                    r"(nic)\.(mac|ip|bridge)/([0-9]+)",
3376
                                    r"(nic)\.(macs|ips|bridges)",
3377
                                    r"(disk|nic)\.(count)",
3378
                                    "serial_no", "hypervisor", "hvparams",] +
3379
                                  ["hv/%s" % name
3380
                                   for name in constants.HVS_PARAMETERS] +
3381
                                  ["be/%s" % name
3382
                                   for name in constants.BES_PARAMETERS])
3383
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3384

    
3385

    
3386
  def ExpandNames(self):
3387
    _CheckOutputFields(static=self._FIELDS_STATIC,
3388
                       dynamic=self._FIELDS_DYNAMIC,
3389
                       selected=self.op.output_fields)
3390

    
3391
    self.needed_locks = {}
3392
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3393
    self.share_locks[locking.LEVEL_NODE] = 1
3394

    
3395
    if self.op.names:
3396
      self.wanted = _GetWantedInstances(self, self.op.names)
3397
    else:
3398
      self.wanted = locking.ALL_SET
3399

    
3400
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3401
    self.do_locking = self.do_node_query and self.op.use_locking
3402
    if self.do_locking:
3403
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3404
      self.needed_locks[locking.LEVEL_NODE] = []
3405
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3406

    
3407
  def DeclareLocks(self, level):
3408
    if level == locking.LEVEL_NODE and self.do_locking:
3409
      self._LockInstancesNodes()
3410

    
3411
  def CheckPrereq(self):
3412
    """Check prerequisites.
3413

3414
    """
3415
    pass
3416

    
3417
  def Exec(self, feedback_fn):
3418
    """Computes the list of nodes and their attributes.
3419

3420
    """
3421
    all_info = self.cfg.GetAllInstancesInfo()
3422
    if self.wanted == locking.ALL_SET:
3423
      # caller didn't specify instance names, so ordering is not important
3424
      if self.do_locking:
3425
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
3426
      else:
3427
        instance_names = all_info.keys()
3428
      instance_names = utils.NiceSort(instance_names)
3429
    else:
3430
      # caller did specify names, so we must keep the ordering
3431
      if self.do_locking:
3432
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
3433
      else:
3434
        tgt_set = all_info.keys()
3435
      missing = set(self.wanted).difference(tgt_set)
3436
      if missing:
3437
        raise errors.OpExecError("Some instances were removed before"
3438
                                 " retrieving their data: %s" % missing)
3439
      instance_names = self.wanted
3440

    
3441
    instance_list = [all_info[iname] for iname in instance_names]
3442

    
3443
    # begin data gathering
3444

    
3445
    nodes = frozenset([inst.primary_node for inst in instance_list])
3446
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3447

    
3448
    bad_nodes = []
3449
    off_nodes = []
3450
    if self.do_node_query:
3451
      live_data = {}
3452
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
3453
      for name in nodes:
3454
        result = node_data[name]
3455
        if result.offline:
3456
          # offline nodes will be in both lists
3457
          off_nodes.append(name)
3458
        if result.failed or result.RemoteFailMsg():
3459
          bad_nodes.append(name)
3460
        else:
3461
          if result.payload:
3462
            live_data.update(result.payload)
3463
          # else no instance is alive
3464
    else:
3465
      live_data = dict([(name, {}) for name in instance_names])
3466

    
3467
    # end data gathering
3468

    
3469
    HVPREFIX = "hv/"
3470
    BEPREFIX = "be/"
3471
    output = []
3472
    for instance in instance_list:
3473
      iout = []
3474
      i_hv = self.cfg.GetClusterInfo().FillHV(instance)
3475
      i_be = self.cfg.GetClusterInfo().FillBE(instance)
3476
      for field in self.op.output_fields:
3477
        st_match = self._FIELDS_STATIC.Matches(field)
3478
        if field == "name":
3479
          val = instance.name
3480
        elif field == "os":
3481
          val = instance.os
3482
        elif field == "pnode":
3483
          val = instance.primary_node
3484
        elif field == "snodes":
3485
          val = list(instance.secondary_nodes)
3486
        elif field == "admin_state":
3487
          val = instance.admin_up
3488
        elif field == "oper_state":
3489
          if instance.primary_node in bad_nodes:
3490
            val = None
3491
          else:
3492
            val = bool(live_data.get(instance.name))
3493
        elif field == "status":
3494
          if instance.primary_node in off_nodes:
3495
            val = "ERROR_nodeoffline"
3496
          elif instance.primary_node in bad_nodes:
3497
            val = "ERROR_nodedown"
3498
          else:
3499
            running = bool(live_data.get(instance.name))
3500
            if running:
3501
              if instance.admin_up:
3502
                val = "running"
3503
              else:
3504
                val = "ERROR_up"
3505
            else:
3506
              if instance.admin_up:
3507
                val = "ERROR_down"
3508
              else:
3509
                val = "ADMIN_down"
3510
        elif field == "oper_ram":
3511
          if instance.primary_node in bad_nodes:
3512
            val = None
3513
          elif instance.name in live_data:
3514
            val = live_data[instance.name].get("memory", "?")
3515
          else:
3516
            val = "-"
3517
        elif field == "disk_template":
3518
          val = instance.disk_template
3519
        elif field == "ip":
3520
          val = instance.nics[0].ip
3521
        elif field == "bridge":
3522
          val = instance.nics[0].bridge
3523
        elif field == "mac":
3524
          val = instance.nics[0].mac
3525
        elif field == "sda_size" or field == "sdb_size":
3526
          idx = ord(field[2]) - ord('a')
3527
          try:
3528
            val = instance.FindDisk(idx).size
3529
          except errors.OpPrereqError:
3530
            val = None
3531
        elif field == "disk_usage": # total disk usage per node
3532
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
3533
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
3534
        elif field == "tags":
3535
          val = list(instance.GetTags())
3536
        elif field == "serial_no":
3537
          val = instance.serial_no
3538
        elif field == "network_port":
3539
          val = instance.network_port
3540
        elif field == "hypervisor":
3541
          val = instance.hypervisor
3542
        elif field == "hvparams":
3543
          val = i_hv
3544
        elif (field.startswith(HVPREFIX) and
3545
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
3546
          val = i_hv.get(field[len(HVPREFIX):], None)
3547
        elif field == "beparams":
3548
          val = i_be
3549
        elif (field.startswith(BEPREFIX) and
3550
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
3551
          val = i_be.get(field[len(BEPREFIX):], None)
3552
        elif st_match and st_match.groups():
3553
          # matches a variable list
3554
          st_groups = st_match.groups()
3555
          if st_groups and st_groups[0] == "disk":
3556
            if st_groups[1] == "count":
3557
              val = len(instance.disks)
3558
            elif st_groups[1] == "sizes":
3559
              val = [disk.size for disk in instance.disks]
3560
            elif st_groups[1] == "size":
3561
              try:
3562
                val = instance.FindDisk(st_groups[2]).size
3563
              except errors.OpPrereqError:
3564
                val = None
3565
            else:
3566
              assert False, "Unhandled disk parameter"
3567
          elif st_groups[0] == "nic":
3568
            if st_groups[1] == "count":
3569
              val = len(instance.nics)
3570
            elif st_groups[1] == "macs":
3571
              val = [nic.mac for nic in instance.nics]
3572
            elif st_groups[1] == "ips":
3573
              val = [nic.ip for nic in instance.nics]
3574
            elif st_groups[1] == "bridges":
3575
              val = [nic.bridge for nic in instance.nics]
3576
            else:
3577
              # index-based item
3578
              nic_idx = int(st_groups[2])
3579
              if nic_idx >= len(instance.nics):
3580
                val = None
3581
              else:
3582
                if st_groups[1] == "mac":
3583
                  val = instance.nics[nic_idx].mac
3584
                elif st_groups[1] == "ip":
3585
                  val = instance.nics[nic_idx].ip
3586
                elif st_groups[1] == "bridge":
3587
                  val = instance.nics[nic_idx].bridge
3588
                else:
3589
                  assert False, "Unhandled NIC parameter"
3590
          else:
3591
            assert False, "Unhandled variable parameter"
3592
        else:
3593
          raise errors.ParameterError(field)
3594
        iout.append(val)
3595
      output.append(iout)
3596

    
3597
    return output
3598

    
3599

    
3600
class LUFailoverInstance(LogicalUnit):
3601
  """Failover an instance.
3602

3603
  """
3604
  HPATH = "instance-failover"
3605
  HTYPE = constants.HTYPE_INSTANCE
3606
  _OP_REQP = ["instance_name", "ignore_consistency"]
3607
  REQ_BGL = False
3608

    
3609
  def ExpandNames(self):
3610
    self._ExpandAndLockInstance()
3611
    self.needed_locks[locking.LEVEL_NODE] = []
3612
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3613

    
3614
  def DeclareLocks(self, level):
3615
    if level == locking.LEVEL_NODE:
3616
      self._LockInstancesNodes()
3617

    
3618
  def BuildHooksEnv(self):
3619
    """Build hooks env.
3620

3621
    This runs on master, primary and secondary nodes of the instance.
3622

3623
    """
3624
    env = {
3625
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
3626
      }
3627
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3628
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3629
    return env, nl, nl
3630

    
3631
  def CheckPrereq(self):
3632
    """Check prerequisites.
3633

3634
    This checks that the instance is in the cluster.
3635

3636
    """
3637
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3638
    assert self.instance is not None, \
3639
      "Cannot retrieve locked instance %s" % self.op.instance_name
3640

    
3641
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3642
    if instance.disk_template not in constants.DTS_NET_MIRROR:
3643
      raise errors.OpPrereqError("Instance's disk layout is not"
3644
                                 " network mirrored, cannot failover.")
3645

    
3646
    secondary_nodes = instance.secondary_nodes
3647
    if not secondary_nodes:
3648
      raise errors.ProgrammerError("no secondary node but using "
3649
                                   "a mirrored disk template")
3650

    
3651
    target_node = secondary_nodes[0]
3652
    _CheckNodeOnline(self, target_node)
3653
    _CheckNodeNotDrained(self, target_node)
3654
    # check memory requirements on the secondary node
3655
    _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
3656
                         instance.name, bep[constants.BE_MEMORY],
3657
                         instance.hypervisor)
3658
    # check bridge existance
3659
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3660

    
3661
  def Exec(self, feedback_fn):
3662
    """Failover an instance.
3663

3664
    The failover is done by shutting it down on its present node and
3665
    starting it on the secondary.
3666

3667
    """
3668
    instance = self.instance
3669

    
3670
    source_node = instance.primary_node
3671
    target_node = instance.secondary_nodes[0]
3672

    
3673
    feedback_fn("* checking disk consistency between source and target")
3674
    for dev in instance.disks:
3675
      # for drbd, these are drbd over lvm
3676
      if not _CheckDiskConsistency(self, dev, target_node, False):
3677
        if instance.admin_up and not self.op.ignore_consistency:
3678
          raise errors.OpExecError("Disk %s is degraded on target node,"
3679
                                   " aborting failover." % dev.iv_name)
3680

    
3681
    feedback_fn("* shutting down instance on source node")
3682
    logging.info("Shutting down instance %s on node %s",
3683
                 instance.name, source_node)
3684

    
3685
    result = self.rpc.call_instance_shutdown(source_node, instance)
3686
    msg = result.RemoteFailMsg()
3687
    if msg:
3688
      if self.op.ignore_consistency:
3689
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
3690
                             " Proceeding anyway. Please make sure node"
3691
                             " %s is down. Error details: %s",
3692
                             instance.name, source_node, source_node, msg)
3693
      else:
3694
        raise errors.OpExecError("Could not shutdown instance %s on"
3695
                                 " node %s: %s" %
3696
                                 (instance.name, source_node, msg))
3697

    
3698
    feedback_fn("* deactivating the instance's disks on source node")
3699
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
3700
      raise errors.OpExecError("Can't shut down the instance's disks.")
3701

    
3702
    instance.primary_node = target_node
3703
    # distribute new instance config to the other nodes
3704
    self.cfg.Update(instance)
3705

    
3706
    # Only start the instance if it's marked as up
3707
    if instance.admin_up:
3708
      feedback_fn("* activating the instance's disks on target node")
3709
      logging.info("Starting instance %s on node %s",
3710
                   instance.name, target_node)
3711

    
3712
      disks_ok, dummy = _AssembleInstanceDisks(self, instance,
3713
                                               ignore_secondaries=True)
3714
      if not disks_ok:
3715
        _ShutdownInstanceDisks(self, instance)
3716
        raise errors.OpExecError("Can't activate the instance's disks")
3717

    
3718
      feedback_fn("* starting the instance on the target node")
3719
      result = self.rpc.call_instance_start(target_node, instance, None, None)
3720
      msg = result.RemoteFailMsg()
3721
      if msg:
3722
        _ShutdownInstanceDisks(self, instance)
3723
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
3724
                                 (instance.name, target_node, msg))
3725

    
3726

    
3727
class LUMigrateInstance(LogicalUnit):
3728
  """Migrate an instance.
3729

3730
  This is migration without shutting down, compared to the failover,
3731
  which is done with shutdown.
3732

3733
  """
3734
  HPATH = "instance-migrate"
3735
  HTYPE = constants.HTYPE_INSTANCE
3736
  _OP_REQP = ["instance_name", "live", "cleanup"]
3737

    
3738
  REQ_BGL = False
3739

    
3740
  def ExpandNames(self):
3741
    self._ExpandAndLockInstance()
3742
    self.needed_locks[locking.LEVEL_NODE] = []
3743
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3744

    
3745
  def DeclareLocks(self, level):
3746
    if level == locking.LEVEL_NODE:
3747
      self._LockInstancesNodes()
3748

    
3749
  def BuildHooksEnv(self):
3750
    """Build hooks env.
3751

3752
    This runs on master, primary and secondary nodes of the instance.
3753

3754
    """
3755
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3756
    env["MIGRATE_LIVE"] = self.op.live
3757
    env["MIGRATE_CLEANUP"] = self.op.cleanup
3758
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3759
    return env, nl, nl
3760

    
3761
  def CheckPrereq(self):
3762
    """Check prerequisites.
3763

3764
    This checks that the instance is in the cluster.
3765

3766
    """
3767
    instance = self.cfg.GetInstanceInfo(
3768
      self.cfg.ExpandInstanceName(self.op.instance_name))
3769
    if instance is None:
3770
      raise errors.OpPrereqError("Instance '%s' not known" %
3771
                                 self.op.instance_name)
3772

    
3773
    if instance.disk_template != constants.DT_DRBD8:
3774
      raise errors.OpPrereqError("Instance's disk layout is not"
3775
                                 " drbd8, cannot migrate.")
3776

    
3777
    secondary_nodes = instance.secondary_nodes
3778
    if not secondary_nodes:
3779
      raise errors.ConfigurationError("No secondary node but using"
3780
                                      " drbd8 disk template")
3781

    
3782
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
3783

    
3784
    target_node = secondary_nodes[0]
3785
    # check memory requirements on the secondary node
3786
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
3787
                         instance.name, i_be[constants.BE_MEMORY],
3788
                         instance.hypervisor)
3789

    
3790
    # check bridge existance
3791
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3792

    
3793
    if not self.op.cleanup:
3794
      _CheckNodeNotDrained(self, target_node)
3795
      result = self.rpc.call_instance_migratable(instance.primary_node,
3796
                                                 instance)
3797
      msg = result.RemoteFailMsg()
3798
      if msg:
3799
        raise errors.OpPrereqError("Can't migrate: %s - please use failover" %
3800
                                   msg)
3801

    
3802
    self.instance = instance
3803

    
3804
  def _WaitUntilSync(self):
3805
    """Poll with custom rpc for disk sync.
3806

3807
    This uses our own step-based rpc call.
3808

3809
    """
3810
    self.feedback_fn("* wait until resync is done")
3811
    all_done = False
3812
    while not all_done:
3813
      all_done = True
3814
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
3815
                                            self.nodes_ip,
3816
                                            self.instance.disks)
3817
      min_percent = 100
3818
      for node, nres in result.items():
3819
        msg = nres.RemoteFailMsg()
3820
        if msg:
3821
          raise errors.OpExecError("Cannot resync disks on node %s: %s" %
3822
                                   (node, msg))
3823
        node_done, node_percent = nres.payload
3824
        all_done = all_done and node_done
3825
        if node_percent is not None:
3826
          min_percent = min(min_percent, node_percent)
3827
      if not all_done:
3828
        if min_percent < 100:
3829
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
3830
        time.sleep(2)
3831

    
3832
  def _EnsureSecondary(self, node):
3833
    """Demote a node to secondary.
3834

3835
    """
3836
    self.feedback_fn("* switching node %s to secondary mode" % node)
3837

    
3838
    for dev in self.instance.disks:
3839
      self.cfg.SetDiskID(dev, node)
3840

    
3841
    result = self.rpc.call_blockdev_close(node, self.instance.name,
3842
                                          self.instance.disks)
3843
    msg = result.RemoteFailMsg()
3844
    if msg:
3845
      raise errors.OpExecError("Cannot change disk to secondary on node %s,"
3846
                               " error %s" % (node, msg))
3847

    
3848
  def _GoStandalone(self):
3849
    """Disconnect from the network.
3850

3851
    """
3852
    self.feedback_fn("* changing into standalone mode")
3853
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
3854
                                               self.instance.disks)
3855
    for node, nres in result.items():
3856
      msg = nres.RemoteFailMsg()
3857
      if msg:
3858
        raise errors.OpExecError("Cannot disconnect disks node %s,"
3859
                                 " error %s" % (node, msg))
3860

    
3861
  def _GoReconnect(self, multimaster):
3862
    """Reconnect to the network.
3863

3864
    """
3865
    if multimaster:
3866
      msg = "dual-master"
3867
    else:
3868
      msg = "single-master"
3869
    self.feedback_fn("* changing disks into %s mode" % msg)
3870
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
3871
                                           self.instance.disks,
3872
                                           self.instance.name, multimaster)
3873
    for node, nres in result.items():
3874
      msg = nres.RemoteFailMsg()
3875
      if msg:
3876
        raise errors.OpExecError("Cannot change disks config on node %s,"
3877
                                 " error: %s" % (node, msg))
3878

    
3879
  def _ExecCleanup(self):
3880
    """Try to cleanup after a failed migration.
3881

3882
    The cleanup is done by:
3883
      - check that the instance is running only on one node
3884
        (and update the config if needed)
3885
      - change disks on its secondary node to secondary
3886
      - wait until disks are fully synchronized
3887
      - disconnect from the network
3888
      - change disks into single-master mode
3889
      - wait again until disks are fully synchronized
3890

3891
    """
3892
    instance = self.instance
3893
    target_node = self.target_node
3894
    source_node = self.source_node
3895

    
3896
    # check running on only one node
3897
    self.feedback_fn("* checking where the instance actually runs"
3898
                     " (if this hangs, the hypervisor might be in"
3899
                     " a bad state)")
3900
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
3901
    for node, result in ins_l.items():
3902
      msg = result.RemoteFailMsg()
3903
      if msg:
3904
        raise errors.OpExecError("Can't contact node %s: %s" % (node, msg))
3905

    
3906
    runningon_source = instance.name in ins_l[source_node].payload
3907
    runningon_target = instance.name in ins_l[target_node].payload
3908

    
3909
    if runningon_source and runningon_target:
3910
      raise errors.OpExecError("Instance seems to be running on two nodes,"
3911
                               " or the hypervisor is confused. You will have"
3912
                               " to ensure manually that it runs only on one"
3913
                               " and restart this operation.")
3914

    
3915
    if not (runningon_source or runningon_target):
3916
      raise errors.OpExecError("Instance does not seem to be running at all."
3917
                               " In this case, it's safer to repair by"
3918
                               " running 'gnt-instance stop' to ensure disk"
3919
                               " shutdown, and then restarting it.")
3920

    
3921
    if runningon_target:
3922
      # the migration has actually succeeded, we need to update the config
3923
      self.feedback_fn("* instance running on secondary node (%s),"
3924
                       " updating config" % target_node)
3925
      instance.primary_node = target_node
3926
      self.cfg.Update(instance)
3927
      demoted_node = source_node
3928
    else:
3929
      self.feedback_fn("* instance confirmed to be running on its"
3930
                       " primary node (%s)" % source_node)
3931
      demoted_node = target_node
3932

    
3933
    self._EnsureSecondary(demoted_node)
3934
    try:
3935
      self._WaitUntilSync()
3936
    except errors.OpExecError:
3937
      # we ignore here errors, since if the device is standalone, it
3938
      # won't be able to sync
3939
      pass
3940
    self._GoStandalone()
3941
    self._GoReconnect(False)
3942
    self._WaitUntilSync()
3943

    
3944
    self.feedback_fn("* done")
3945

    
3946
  def _RevertDiskStatus(self):
3947
    """Try to revert the disk status after a failed migration.
3948

3949
    """
3950
    target_node = self.target_node
3951
    try:
3952
      self._EnsureSecondary(target_node)
3953
      self._GoStandalone()
3954
      self._GoReconnect(False)
3955
      self._WaitUntilSync()
3956
    except errors.OpExecError, err:
3957
      self.LogWarning("Migration failed and I can't reconnect the"
3958
                      " drives: error '%s'\n"
3959
                      "Please look and recover the instance status" %
3960
                      str(err))
3961

    
3962
  def _AbortMigration(self):
3963
    """Call the hypervisor code to abort a started migration.
3964

3965
    """
3966
    instance = self.instance
3967
    target_node = self.target_node
3968
    migration_info = self.migration_info
3969

    
3970
    abort_result = self.rpc.call_finalize_migration(target_node,
3971
                                                    instance,
3972
                                                    migration_info,
3973
                                                    False)
3974
    abort_msg = abort_result.RemoteFailMsg()
3975
    if abort_msg:
3976
      logging.error("Aborting migration failed on target node %s: %s" %
3977
                    (target_node, abort_msg))
3978
      # Don't raise an exception here, as we stil have to try to revert the
3979
      # disk status, even if this step failed.
3980

    
3981
  def _ExecMigration(self):
3982
    """Migrate an instance.
3983

3984
    The migrate is done by:
3985
      - change the disks into dual-master mode
3986
      - wait until disks are fully synchronized again
3987
      - migrate the instance
3988
      - change disks on the new secondary node (the old primary) to secondary
3989
      - wait until disks are fully synchronized
3990
      - change disks into single-master mode
3991

3992
    """
3993
    instance = self.instance
3994
    target_node = self.target_node
3995
    source_node = self.source_node
3996

    
3997
    self.feedback_fn("* checking disk consistency between source and target")
3998
    for dev in instance.disks:
3999
      if not _CheckDiskConsistency(self, dev, target_node, False):
4000
        raise errors.OpExecError("Disk %s is degraded or not fully"
4001
                                 " synchronized on target node,"
4002
                                 " aborting migrate." % dev.iv_name)
4003

    
4004
    # First get the migration information from the remote node
4005
    result = self.rpc.call_migration_info(source_node, instance)
4006
    msg = result.RemoteFailMsg()
4007
    if msg:
4008
      log_err = ("Failed fetching source migration information from %s: %s" %
4009
                 (source_node, msg))
4010
      logging.error(log_err)
4011
      raise errors.OpExecError(log_err)
4012

    
4013
    self.migration_info = migration_info = result.payload
4014

    
4015
    # Then switch the disks to master/master mode
4016
    self._EnsureSecondary(target_node)
4017
    self._GoStandalone()
4018
    self._GoReconnect(True)
4019
    self._WaitUntilSync()
4020

    
4021
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4022
    result = self.rpc.call_accept_instance(target_node,
4023
                                           instance,
4024
                                           migration_info,
4025
                                           self.nodes_ip[target_node])
4026

    
4027
    msg = result.RemoteFailMsg()
4028
    if msg:
4029
      logging.error("Instance pre-migration failed, trying to revert"
4030
                    " disk status: %s", msg)
4031
      self._AbortMigration()
4032
      self._RevertDiskStatus()
4033
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4034
                               (instance.name, msg))
4035

    
4036
    self.feedback_fn("* migrating instance to %s" % target_node)
4037
    time.sleep(10)
4038
    result = self.rpc.call_instance_migrate(source_node, instance,
4039
                                            self.nodes_ip[target_node],
4040
                                            self.op.live)
4041
    msg = result.RemoteFailMsg()
4042
    if msg:
4043
      logging.error("Instance migration failed, trying to revert"
4044
                    " disk status: %s", msg)
4045
      self._AbortMigration()
4046
      self._RevertDiskStatus()
4047
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4048
                               (instance.name, msg))
4049
    time.sleep(10)
4050

    
4051
    instance.primary_node = target_node
4052
    # distribute new instance config to the other nodes
4053
    self.cfg.Update(instance)
4054

    
4055
    result = self.rpc.call_finalize_migration(target_node,
4056
                                              instance,
4057
                                              migration_info,
4058
                                              True)
4059
    msg = result.RemoteFailMsg()
4060
    if msg:
4061
      logging.error("Instance migration succeeded, but finalization failed:"
4062
                    " %s" % msg)
4063
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4064
                               msg)
4065

    
4066
    self._EnsureSecondary(source_node)
4067
    self._WaitUntilSync()
4068
    self._GoStandalone()
4069
    self._GoReconnect(False)
4070
    self._WaitUntilSync()
4071

    
4072
    self.feedback_fn("* done")
4073

    
4074
  def Exec(self, feedback_fn):
4075
    """Perform the migration.
4076

4077
    """
4078
    self.feedback_fn = feedback_fn
4079

    
4080
    self.source_node = self.instance.primary_node
4081
    self.target_node = self.instance.secondary_nodes[0]
4082
    self.all_nodes = [self.source_node, self.target_node]
4083
    self.nodes_ip = {
4084
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4085
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4086
      }
4087
    if self.op.cleanup:
4088
      return self._ExecCleanup()
4089
    else:
4090
      return self._ExecMigration()
4091

    
4092

    
4093
def _CreateBlockDev(lu, node, instance, device, force_create,
4094
                    info, force_open):
4095
  """Create a tree of block devices on a given node.
4096

4097
  If this device type has to be created on secondaries, create it and
4098
  all its children.
4099

4100
  If not, just recurse to children keeping the same 'force' value.
4101

4102
  @param lu: the lu on whose behalf we execute
4103
  @param node: the node on which to create the device
4104
  @type instance: L{objects.Instance}
4105
  @param instance: the instance which owns the device
4106
  @type device: L{objects.Disk}
4107
  @param device: the device to create
4108
  @type force_create: boolean
4109
  @param force_create: whether to force creation of this device; this
4110
      will be change to True whenever we find a device which has
4111
      CreateOnSecondary() attribute
4112
  @param info: the extra 'metadata' we should attach to the device
4113
      (this will be represented as a LVM tag)
4114
  @type force_open: boolean
4115
  @param force_open: this parameter will be passes to the
4116
      L{backend.BlockdevCreate} function where it specifies
4117
      whether we run on primary or not, and it affects both
4118
      the child assembly and the device own Open() execution
4119

4120
  """
4121
  if device.CreateOnSecondary():
4122
    force_create = True
4123

    
4124
  if device.children:
4125
    for child in device.children:
4126
      _CreateBlockDev(lu, node, instance, child, force_create,
4127
                      info, force_open)
4128

    
4129
  if not force_create:
4130
    return
4131

    
4132
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
4133

    
4134

    
4135
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
4136
  """Create a single block device on a given node.
4137

4138
  This will not recurse over children of the device, so they must be
4139
  created in advance.
4140

4141
  @param lu: the lu on whose behalf we execute
4142
  @param node: the node on which to create the device
4143
  @type instance: L{objects.Instance}
4144
  @param instance: the instance which owns the device
4145
  @type device: L{objects.Disk}
4146
  @param device: the device to create
4147
  @param info: the extra 'metadata' we should attach to the device
4148
      (this will be represented as a LVM tag)
4149
  @type force_open: boolean
4150
  @param force_open: this parameter will be passes to the
4151
      L{backend.BlockdevCreate} function where it specifies
4152
      whether we run on primary or not, and it affects both
4153
      the child assembly and the device own Open() execution
4154

4155
  """
4156
  lu.cfg.SetDiskID(device, node)
4157
  result = lu.rpc.call_blockdev_create(node, device, device.size,
4158
                                       instance.name, force_open, info)
4159
  msg = result.RemoteFailMsg()
4160
  if msg:
4161
    raise errors.OpExecError("Can't create block device %s on"
4162
                             " node %s for instance %s: %s" %
4163
                             (device, node, instance.name, msg))
4164
  if device.physical_id is None:
4165
    device.physical_id = result.payload
4166

    
4167

    
4168
def _GenerateUniqueNames(lu, exts):
4169
  """Generate a suitable LV name.
4170

4171
  This will generate a logical volume name for the given instance.
4172

4173
  """
4174
  results = []
4175
  for val in exts:
4176
    new_id = lu.cfg.GenerateUniqueID()
4177
    results.append("%s%s" % (new_id, val))
4178
  return results
4179

    
4180

    
4181
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
4182
                         p_minor, s_minor):
4183
  """Generate a drbd8 device complete with its children.
4184

4185
  """
4186
  port = lu.cfg.AllocatePort()
4187
  vgname = lu.cfg.GetVGName()
4188
  shared_secret = lu.cfg.GenerateDRBDSecret()
4189
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
4190
                          logical_id=(vgname, names[0]))
4191
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
4192
                          logical_id=(vgname, names[1]))
4193
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
4194
                          logical_id=(primary, secondary, port,
4195
                                      p_minor, s_minor,
4196
                                      shared_secret),
4197
                          children=[dev_data, dev_meta],
4198
                          iv_name=iv_name)
4199
  return drbd_dev
4200

    
4201

    
4202
def _GenerateDiskTemplate(lu, template_name,
4203
                          instance_name, primary_node,
4204
                          secondary_nodes, disk_info,
4205
                          file_storage_dir, file_driver,
4206
                          base_index):
4207
  """Generate the entire disk layout for a given template type.
4208

4209
  """
4210
  #TODO: compute space requirements
4211

    
4212
  vgname = lu.cfg.GetVGName()
4213
  disk_count = len(disk_info)
4214
  disks = []
4215
  if template_name == constants.DT_DISKLESS:
4216
    pass
4217
  elif template_name == constants.DT_PLAIN:
4218
    if len(secondary_nodes) != 0:
4219
      raise errors.ProgrammerError("Wrong template configuration")
4220

    
4221
    names = _GenerateUniqueNames(lu, [".disk%d" % i
4222
                                      for i in range(disk_count)])
4223
    for idx, disk in enumerate(disk_info):
4224
      disk_index = idx + base_index
4225
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
4226
                              logical_id=(vgname, names[idx]),
4227
                              iv_name="disk/%d" % disk_index,
4228
                              mode=disk["mode"])
4229
      disks.append(disk_dev)
4230
  elif template_name == constants.DT_DRBD8:
4231
    if len(secondary_nodes) != 1:
4232
      raise errors.ProgrammerError("Wrong template configuration")
4233
    remote_node = secondary_nodes[0]
4234
    minors = lu.cfg.AllocateDRBDMinor(
4235
      [primary_node, remote_node] * len(disk_info), instance_name)
4236

    
4237
    names = []
4238
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % i
4239
                                               for i in range(disk_count)]):
4240
      names.append(lv_prefix + "_data")
4241
      names.append(lv_prefix + "_meta")
4242
    for idx, disk in enumerate(disk_info):
4243
      disk_index = idx + base_index
4244
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
4245
                                      disk["size"], names[idx*2:idx*2+2],
4246
                                      "disk/%d" % disk_index,
4247
                                      minors[idx*2], minors[idx*2+1])
4248
      disk_dev.mode = disk["mode"]
4249
      disks.append(disk_dev)
4250
  elif template_name == constants.DT_FILE:
4251
    if len(secondary_nodes) != 0:
4252
      raise errors.ProgrammerError("Wrong template configuration")
4253

    
4254
    for idx, disk in enumerate(disk_info):
4255
      disk_index = idx + base_index
4256
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
4257
                              iv_name="disk/%d" % disk_index,
4258
                              logical_id=(file_driver,
4259
                                          "%s/disk%d" % (file_storage_dir,
4260
                                                         disk_index)),
4261
                              mode=disk["mode"])
4262
      disks.append(disk_dev)
4263
  else:
4264
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
4265
  return disks
4266

    
4267

    
4268
def _GetInstanceInfoText(instance):
4269
  """Compute that text that should be added to the disk's metadata.
4270

4271
  """
4272
  return "originstname+%s" % instance.name
4273

    
4274

    
4275
def _CreateDisks(lu, instance):
4276
  """Create all disks for an instance.
4277

4278
  This abstracts away some work from AddInstance.
4279

4280
  @type lu: L{LogicalUnit}
4281
  @param lu: the logical unit on whose behalf we execute
4282
  @type instance: L{objects.Instance}
4283
  @param instance: the instance whose disks we should create
4284
  @rtype: boolean
4285
  @return: the success of the creation
4286

4287
  """
4288
  info = _GetInstanceInfoText(instance)
4289
  pnode = instance.primary_node
4290

    
4291
  if instance.disk_template == constants.DT_FILE:
4292
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4293
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
4294

    
4295
    if result.failed or not result.data:
4296
      raise errors.OpExecError("Could not connect to node '%s'" % pnode)
4297

    
4298
    if not result.data[0]:
4299
      raise errors.OpExecError("Failed to create directory '%s'" %
4300
                               file_storage_dir)
4301

    
4302
  # Note: this needs to be kept in sync with adding of disks in
4303
  # LUSetInstanceParams
4304
  for device in instance.disks:
4305
    logging.info("Creating volume %s for instance %s",
4306
                 device.iv_name, instance.name)
4307
    #HARDCODE
4308
    for node in instance.all_nodes:
4309
      f_create = node == pnode
4310
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
4311

    
4312

    
4313
def _RemoveDisks(lu, instance):
4314
  """Remove all disks for an instance.
4315

4316
  This abstracts away some work from `AddInstance()` and
4317
  `RemoveInstance()`. Note that in case some of the devices couldn't
4318
  be removed, the removal will continue with the other ones (compare
4319
  with `_CreateDisks()`).
4320

4321
  @type lu: L{LogicalUnit}
4322
  @param lu: the logical unit on whose behalf we execute
4323
  @type instance: L{objects.Instance}
4324
  @param instance: the instance whose disks we should remove
4325
  @rtype: boolean
4326
  @return: the success of the removal
4327

4328
  """
4329
  logging.info("Removing block devices for instance %s", instance.name)
4330

    
4331
  all_result = True
4332
  for device in instance.disks:
4333
    for node, disk in device.ComputeNodeTree(instance.primary_node):
4334
      lu.cfg.SetDiskID(disk, node)
4335
      msg = lu.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
4336
      if msg:
4337
        lu.LogWarning("Could not remove block device %s on node %s,"
4338
                      " continuing anyway: %s", device.iv_name, node, msg)
4339
        all_result = False
4340

    
4341
  if instance.disk_template == constants.DT_FILE:
4342
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4343
    result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
4344
                                                 file_storage_dir)
4345
    if result.failed or not result.data:
4346
      logging.error("Could not remove directory '%s'", file_storage_dir)
4347
      all_result = False
4348

    
4349
  return all_result
4350

    
4351

    
4352
def _ComputeDiskSize(disk_template, disks):
4353
  """Compute disk size requirements in the volume group
4354

4355
  """
4356
  # Required free disk space as a function of disk and swap space
4357
  req_size_dict = {
4358
    constants.DT_DISKLESS: None,
4359
    constants.DT_PLAIN: sum(d["size"] for d in disks),
4360
    # 128 MB are added for drbd metadata for each disk
4361
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
4362
    constants.DT_FILE: None,
4363
  }
4364

    
4365
  if disk_template not in req_size_dict:
4366
    raise errors.ProgrammerError("Disk template '%s' size requirement"
4367
                                 " is unknown" %  disk_template)
4368

    
4369
  return req_size_dict[disk_template]
4370

    
4371

    
4372
def _CheckHVParams(lu, nodenames, hvname, hvparams):
4373
  """Hypervisor parameter validation.
4374

4375
  This function abstract the hypervisor parameter validation to be
4376
  used in both instance create and instance modify.
4377

4378
  @type lu: L{LogicalUnit}
4379
  @param lu: the logical unit for which we check
4380
  @type nodenames: list
4381
  @param nodenames: the list of nodes on which we should check
4382
  @type hvname: string
4383
  @param hvname: the name of the hypervisor we should use
4384
  @type hvparams: dict
4385
  @param hvparams: the parameters which we need to check
4386
  @raise errors.OpPrereqError: if the parameters are not valid
4387

4388
  """
4389
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
4390
                                                  hvname,
4391
                                                  hvparams)
4392
  for node in nodenames:
4393
    info = hvinfo[node]
4394
    if info.offline:
4395
      continue
4396
    msg = info.RemoteFailMsg()
4397
    if msg:
4398
      raise errors.OpPrereqError("Hypervisor parameter validation"
4399
                                 " failed on node %s: %s" % (node, msg))
4400

    
4401

    
4402
class LUCreateInstance(LogicalUnit):
4403
  """Create an instance.
4404

4405
  """
4406
  HPATH = "instance-add"
4407
  HTYPE = constants.HTYPE_INSTANCE
4408
  _OP_REQP = ["instance_name", "disks", "disk_template",
4409
              "mode", "start",
4410
              "wait_for_sync", "ip_check", "nics",
4411
              "hvparams", "beparams"]
4412
  REQ_BGL = False
4413

    
4414
  def _ExpandNode(self, node):
4415
    """Expands and checks one node name.
4416

4417
    """
4418
    node_full = self.cfg.ExpandNodeName(node)
4419
    if node_full is None:
4420
      raise errors.OpPrereqError("Unknown node %s" % node)
4421
    return node_full
4422

    
4423
  def ExpandNames(self):
4424
    """ExpandNames for CreateInstance.
4425

4426
    Figure out the right locks for instance creation.
4427

4428
    """
4429
    self.needed_locks = {}
4430

    
4431
    # set optional parameters to none if they don't exist
4432
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
4433
      if not hasattr(self.op, attr):
4434
        setattr(self.op, attr, None)
4435

    
4436
    # cheap checks, mostly valid constants given
4437

    
4438
    # verify creation mode
4439
    if self.op.mode not in (constants.INSTANCE_CREATE,
4440
                            constants.INSTANCE_IMPORT):
4441
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
4442
                                 self.op.mode)
4443

    
4444
    # disk template and mirror node verification
4445
    if self.op.disk_template not in constants.DISK_TEMPLATES:
4446
      raise errors.OpPrereqError("Invalid disk template name")
4447

    
4448
    if self.op.hypervisor is None:
4449
      self.op.hypervisor = self.cfg.GetHypervisorType()
4450

    
4451
    cluster = self.cfg.GetClusterInfo()
4452
    enabled_hvs = cluster.enabled_hypervisors
4453
    if self.op.hypervisor not in enabled_hvs:
4454
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
4455
                                 " cluster (%s)" % (self.op.hypervisor,
4456
                                  ",".join(enabled_hvs)))
4457

    
4458
    # check hypervisor parameter syntax (locally)
4459
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4460
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
4461
                                  self.op.hvparams)
4462
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
4463
    hv_type.CheckParameterSyntax(filled_hvp)
4464

    
4465
    # fill and remember the beparams dict
4466
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4467
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
4468
                                    self.op.beparams)
4469

    
4470
    #### instance parameters check
4471

    
4472
    # instance name verification
4473
    hostname1 = utils.HostInfo(self.op.instance_name)
4474
    self.op.instance_name = instance_name = hostname1.name
4475

    
4476
    # this is just a preventive check, but someone might still add this
4477
    # instance in the meantime, and creation will fail at lock-add time
4478
    if instance_name in self.cfg.GetInstanceList():
4479
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4480
                                 instance_name)
4481

    
4482
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
4483

    
4484
    # NIC buildup
4485
    self.nics = []
4486
    for idx, nic in enumerate(self.op.nics):
4487
      nic_mode_req = nic.get("mode", None)
4488
      nic_mode = nic_mode_req
4489
      if nic_mode is None:
4490
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
4491

    
4492
      # in routed mode, for the first nic, the default ip is 'auto'
4493
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
4494
        default_ip_mode = constants.VALUE_AUTO
4495
      else:
4496
        default_ip_mode = constants.VALUE_NONE
4497

    
4498
      # ip validity checks
4499
      ip = nic.get("ip", default_ip_mode)
4500
      if ip is None or ip.lower() == constants.VALUE_NONE:
4501
        nic_ip = None
4502
      elif ip.lower() == constants.VALUE_AUTO:
4503
        nic_ip = hostname1.ip
4504
      else:
4505
        if not utils.IsValidIP(ip):
4506
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
4507
                                     " like a valid IP" % ip)
4508
        nic_ip = ip
4509

    
4510
      # TODO: check the ip for uniqueness !!
4511
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
4512
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
4513

    
4514
      # MAC address verification
4515
      mac = nic.get("mac", constants.VALUE_AUTO)
4516
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4517
        if not utils.IsValidMac(mac.lower()):
4518
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
4519
                                     mac)
4520
      # bridge verification
4521
      bridge = nic.get("bridge", None)
4522
      link = nic.get("link", None)
4523
      if bridge and link:
4524
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link' at the same time")
4525
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
4526
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
4527
      elif bridge:
4528
        link = bridge
4529

    
4530
      nicparams = {}
4531
      if nic_mode_req:
4532
        nicparams[constants.NIC_MODE] = nic_mode_req
4533
      if link:
4534
        nicparams[constants.NIC_LINK] = link
4535

    
4536
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4537
                                      nicparams)
4538
      objects.NIC.CheckParameterSyntax(check_params)
4539
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
4540

    
4541
    # disk checks/pre-build
4542
    self.disks = []
4543
    for disk in self.op.disks:
4544
      mode = disk.get("mode", constants.DISK_RDWR)
4545
      if mode not in constants.DISK_ACCESS_SET:
4546
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
4547
                                   mode)
4548
      size = disk.get("size", None)
4549
      if size is None:
4550
        raise errors.OpPrereqError("Missing disk size")
4551
      try:
4552
        size = int(size)
4553
      except ValueError:
4554
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
4555
      self.disks.append({"size": size, "mode": mode})
4556

    
4557
    # used in CheckPrereq for ip ping check
4558
    self.check_ip = hostname1.ip
4559

    
4560
    # file storage checks
4561
    if (self.op.file_driver and
4562
        not self.op.file_driver in constants.FILE_DRIVER):
4563
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
4564
                                 self.op.file_driver)
4565

    
4566
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
4567
      raise errors.OpPrereqError("File storage directory path not absolute")
4568

    
4569
    ### Node/iallocator related checks
4570
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
4571
      raise errors.OpPrereqError("One and only one of iallocator and primary"
4572
                                 " node must be given")
4573

    
4574
    if self.op.iallocator:
4575
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4576
    else:
4577
      self.op.pnode = self._ExpandNode(self.op.pnode)
4578
      nodelist = [self.op.pnode]
4579
      if self.op.snode is not None:
4580
        self.op.snode = self._ExpandNode(self.op.snode)
4581
        nodelist.append(self.op.snode)
4582
      self.needed_locks[locking.LEVEL_NODE] = nodelist
4583

    
4584
    # in case of import lock the source node too
4585
    if self.op.mode == constants.INSTANCE_IMPORT:
4586
      src_node = getattr(self.op, "src_node", None)
4587
      src_path = getattr(self.op, "src_path", None)
4588

    
4589
      if src_path is None:
4590
        self.op.src_path = src_path = self.op.instance_name
4591

    
4592
      if src_node is None:
4593
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4594
        self.op.src_node = None
4595
        if os.path.isabs(src_path):
4596
          raise errors.OpPrereqError("Importing an instance from an absolute"
4597
                                     " path requires a source node option.")
4598
      else:
4599
        self.op.src_node = src_node = self._ExpandNode(src_node)
4600
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4601
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
4602
        if not os.path.isabs(src_path):
4603
          self.op.src_path = src_path = \
4604
            os.path.join(constants.EXPORT_DIR, src_path)
4605

    
4606
    else: # INSTANCE_CREATE
4607
      if getattr(self.op, "os_type", None) is None:
4608
        raise errors.OpPrereqError("No guest OS specified")
4609

    
4610
  def _RunAllocator(self):
4611
    """Run the allocator based on input opcode.
4612

4613
    """
4614
    nics = [n.ToDict() for n in self.nics]
4615
    ial = IAllocator(self,
4616
                     mode=constants.IALLOCATOR_MODE_ALLOC,
4617
                     name=self.op.instance_name,
4618
                     disk_template=self.op.disk_template,
4619
                     tags=[],
4620
                     os=self.op.os_type,
4621
                     vcpus=self.be_full[constants.BE_VCPUS],
4622
                     mem_size=self.be_full[constants.BE_MEMORY],
4623
                     disks=self.disks,
4624
                     nics=nics,
4625
                     hypervisor=self.op.hypervisor,
4626
                     )
4627

    
4628
    ial.Run(self.op.iallocator)
4629

    
4630
    if not ial.success:
4631
      raise errors.OpPrereqError("Can't compute nodes using"
4632
                                 " iallocator '%s': %s" % (self.op.iallocator,
4633
                                                           ial.info))
4634
    if len(ial.nodes) != ial.required_nodes:
4635
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
4636
                                 " of nodes (%s), required %s" %
4637
                                 (self.op.iallocator, len(ial.nodes),
4638
                                  ial.required_nodes))
4639
    self.op.pnode = ial.nodes[0]
4640
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4641
                 self.op.instance_name, self.op.iallocator,
4642
                 ", ".join(ial.nodes))
4643
    if ial.required_nodes == 2:
4644
      self.op.snode = ial.nodes[1]
4645

    
4646
  def BuildHooksEnv(self):
4647
    """Build hooks env.
4648

4649
    This runs on master, primary and secondary nodes of the instance.
4650

4651
    """
4652
    env = {
4653
      "ADD_MODE": self.op.mode,
4654
      }
4655
    if self.op.mode == constants.INSTANCE_IMPORT:
4656
      env["SRC_NODE"] = self.op.src_node
4657
      env["SRC_PATH"] = self.op.src_path
4658
      env["SRC_IMAGES"] = self.src_images
4659

    
4660
    env.update(_BuildInstanceHookEnv(
4661
      name=self.op.instance_name,
4662
      primary_node=self.op.pnode,
4663
      secondary_nodes=self.secondaries,
4664
      status=self.op.start,
4665
      os_type=self.op.os_type,
4666
      memory=self.be_full[constants.BE_MEMORY],
4667
      vcpus=self.be_full[constants.BE_VCPUS],
4668
      nics=_PreBuildNICHooksList(self, self.nics),
4669
      disk_template=self.op.disk_template,
4670
      disks=[(d["size"], d["mode"]) for d in self.disks],
4671
    ))
4672

    
4673
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
4674
          self.secondaries)
4675
    return env, nl, nl
4676

    
4677

    
4678
  def CheckPrereq(self):
4679
    """Check prerequisites.
4680

4681
    """
4682
    if (not self.cfg.GetVGName() and
4683
        self.op.disk_template not in constants.DTS_NOT_LVM):
4684
      raise errors.OpPrereqError("Cluster does not support lvm-based"
4685
                                 " instances")
4686

    
4687
    if self.op.mode == constants.INSTANCE_IMPORT:
4688
      src_node = self.op.src_node
4689
      src_path = self.op.src_path
4690

    
4691
      if src_node is None:
4692
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
4693
        exp_list = self.rpc.call_export_list(locked_nodes)
4694
        found = False
4695
        for node in exp_list:
4696
          if exp_list[node].RemoteFailMsg():
4697
            continue
4698
          if src_path in exp_list[node].payload:
4699
            found = True
4700
            self.op.src_node = src_node = node
4701
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
4702
                                                       src_path)
4703
            break
4704
        if not found:
4705
          raise errors.OpPrereqError("No export found for relative path %s" %
4706
                                      src_path)
4707

    
4708
      _CheckNodeOnline(self, src_node)
4709
      result = self.rpc.call_export_info(src_node, src_path)
4710
      msg = result.RemoteFailMsg()
4711
      if msg:
4712
        raise errors.OpPrereqError("No export or invalid export found in"
4713
                                   " dir %s: %s" % (src_path, msg))
4714

    
4715
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
4716
      if not export_info.has_section(constants.INISECT_EXP):
4717
        raise errors.ProgrammerError("Corrupted export config")
4718

    
4719
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
4720
      if (int(ei_version) != constants.EXPORT_VERSION):
4721
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
4722
                                   (ei_version, constants.EXPORT_VERSION))
4723

    
4724
      # Check that the new instance doesn't have less disks than the export
4725
      instance_disks = len(self.disks)
4726
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
4727
      if instance_disks < export_disks:
4728
        raise errors.OpPrereqError("Not enough disks to import."
4729
                                   " (instance: %d, export: %d)" %
4730
                                   (instance_disks, export_disks))
4731

    
4732
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
4733
      disk_images = []
4734
      for idx in range(export_disks):
4735
        option = 'disk%d_dump' % idx
4736
        if export_info.has_option(constants.INISECT_INS, option):
4737
          # FIXME: are the old os-es, disk sizes, etc. useful?
4738
          export_name = export_info.get(constants.INISECT_INS, option)
4739
          image = os.path.join(src_path, export_name)
4740
          disk_images.append(image)
4741
        else:
4742
          disk_images.append(False)
4743

    
4744
      self.src_images = disk_images
4745

    
4746
      old_name = export_info.get(constants.INISECT_INS, 'name')
4747
      # FIXME: int() here could throw a ValueError on broken exports
4748
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
4749
      if self.op.instance_name == old_name:
4750
        for idx, nic in enumerate(self.nics):
4751
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
4752
            nic_mac_ini = 'nic%d_mac' % idx
4753
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
4754

    
4755
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
4756
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
4757
    if self.op.start and not self.op.ip_check:
4758
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
4759
                                 " adding an instance in start mode")
4760

    
4761
    if self.op.ip_check:
4762
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
4763
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4764
                                   (self.check_ip, self.op.instance_name))
4765

    
4766
    #### mac address generation
4767
    # By generating here the mac address both the allocator and the hooks get
4768
    # the real final mac address rather than the 'auto' or 'generate' value.
4769
    # There is a race condition between the generation and the instance object
4770
    # creation, which means that we know the mac is valid now, but we're not
4771
    # sure it will be when we actually add the instance. If things go bad
4772
    # adding the instance will abort because of a duplicate mac, and the
4773
    # creation job will fail.
4774
    for nic in self.nics:
4775
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4776
        nic.mac = self.cfg.GenerateMAC()
4777

    
4778
    #### allocator run
4779

    
4780
    if self.op.iallocator is not None:
4781
      self._RunAllocator()
4782

    
4783
    #### node related checks
4784

    
4785
    # check primary node
4786
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
4787
    assert self.pnode is not None, \
4788
      "Cannot retrieve locked node %s" % self.op.pnode
4789
    if pnode.offline:
4790
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
4791
                                 pnode.name)
4792
    if pnode.drained:
4793
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
4794
                                 pnode.name)
4795

    
4796
    self.secondaries = []
4797

    
4798
    # mirror node verification
4799
    if self.op.disk_template in constants.DTS_NET_MIRROR:
4800
      if self.op.snode is None:
4801
        raise errors.OpPrereqError("The networked disk templates need"
4802
                                   " a mirror node")
4803
      if self.op.snode == pnode.name:
4804
        raise errors.OpPrereqError("The secondary node cannot be"
4805
                                   " the primary node.")
4806
      _CheckNodeOnline(self, self.op.snode)
4807
      _CheckNodeNotDrained(self, self.op.snode)
4808
      self.secondaries.append(self.op.snode)
4809

    
4810
    nodenames = [pnode.name] + self.secondaries
4811

    
4812
    req_size = _ComputeDiskSize(self.op.disk_template,
4813
                                self.disks)
4814

    
4815
    # Check lv size requirements
4816
    if req_size is not None:
4817
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
4818
                                         self.op.hypervisor)
4819
      for node in nodenames:
4820
        info = nodeinfo[node]
4821
        msg = info.RemoteFailMsg()
4822
        if msg:
4823
          raise errors.OpPrereqError("Cannot get current information"
4824
                                     " from node %s: %s" % (node, msg))
4825
        info = info.payload
4826
        vg_free = info.get('vg_free', None)
4827
        if not isinstance(vg_free, int):
4828
          raise errors.OpPrereqError("Can't compute free disk space on"
4829
                                     " node %s" % node)
4830
        if req_size > vg_free:
4831
          raise errors.OpPrereqError("Not enough disk space on target node %s."
4832
                                     " %d MB available, %d MB required" %
4833
                                     (node, vg_free, req_size))
4834

    
4835
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
4836

    
4837
    # os verification
4838
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
4839
    result.Raise()
4840
    if not isinstance(result.data, objects.OS):
4841
      raise errors.OpPrereqError("OS '%s' not in supported os list for"
4842
                                 " primary node"  % self.op.os_type)
4843

    
4844
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
4845

    
4846
    # memory check on primary node
4847
    if self.op.start:
4848
      _CheckNodeFreeMemory(self, self.pnode.name,
4849
                           "creating instance %s" % self.op.instance_name,
4850
                           self.be_full[constants.BE_MEMORY],
4851
                           self.op.hypervisor)
4852

    
4853
  def Exec(self, feedback_fn):
4854
    """Create and add the instance to the cluster.
4855

4856
    """
4857
    instance = self.op.instance_name
4858
    pnode_name = self.pnode.name
4859

    
4860
    ht_kind = self.op.hypervisor
4861
    if ht_kind in constants.HTS_REQ_PORT:
4862
      network_port = self.cfg.AllocatePort()
4863
    else:
4864
      network_port = None
4865

    
4866
    ##if self.op.vnc_bind_address is None:
4867
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
4868

    
4869
    # this is needed because os.path.join does not accept None arguments
4870
    if self.op.file_storage_dir is None:
4871
      string_file_storage_dir = ""
4872
    else:
4873
      string_file_storage_dir = self.op.file_storage_dir
4874

    
4875
    # build the full file storage dir path
4876
    file_storage_dir = os.path.normpath(os.path.join(
4877
                                        self.cfg.GetFileStorageDir(),
4878
                                        string_file_storage_dir, instance))
4879

    
4880

    
4881
    disks = _GenerateDiskTemplate(self,
4882
                                  self.op.disk_template,
4883
                                  instance, pnode_name,
4884
                                  self.secondaries,
4885
                                  self.disks,
4886
                                  file_storage_dir,
4887
                                  self.op.file_driver,
4888
                                  0)
4889

    
4890
    iobj = objects.Instance(name=instance, os=self.op.os_type,
4891
                            primary_node=pnode_name,
4892
                            nics=self.nics, disks=disks,
4893
                            disk_template=self.op.disk_template,
4894
                            admin_up=False,
4895
                            network_port=network_port,
4896
                            beparams=self.op.beparams,
4897
                            hvparams=self.op.hvparams,
4898
                            hypervisor=self.op.hypervisor,
4899
                            )
4900

    
4901
    feedback_fn("* creating instance disks...")
4902
    try:
4903
      _CreateDisks(self, iobj)
4904
    except errors.OpExecError:
4905
      self.LogWarning("Device creation failed, reverting...")
4906
      try:
4907
        _RemoveDisks(self, iobj)
4908
      finally:
4909
        self.cfg.ReleaseDRBDMinors(instance)
4910
        raise
4911

    
4912
    feedback_fn("adding instance %s to cluster config" % instance)
4913

    
4914
    self.cfg.AddInstance(iobj)
4915
    # Declare that we don't want to remove the instance lock anymore, as we've
4916
    # added the instance to the config
4917
    del self.remove_locks[locking.LEVEL_INSTANCE]
4918
    # Unlock all the nodes
4919
    if self.op.mode == constants.INSTANCE_IMPORT:
4920
      nodes_keep = [self.op.src_node]
4921
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
4922
                       if node != self.op.src_node]
4923
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
4924
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
4925
    else:
4926
      self.context.glm.release(locking.LEVEL_NODE)
4927
      del self.acquired_locks[locking.LEVEL_NODE]
4928

    
4929
    if self.op.wait_for_sync:
4930
      disk_abort = not _WaitForSync(self, iobj)
4931
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
4932
      # make sure the disks are not degraded (still sync-ing is ok)
4933
      time.sleep(15)
4934
      feedback_fn("* checking mirrors status")
4935
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
4936
    else:
4937
      disk_abort = False
4938

    
4939
    if disk_abort:
4940
      _RemoveDisks(self, iobj)
4941
      self.cfg.RemoveInstance(iobj.name)
4942
      # Make sure the instance lock gets removed
4943
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
4944
      raise errors.OpExecError("There are some degraded disks for"
4945
                               " this instance")
4946

    
4947
    feedback_fn("creating os for instance %s on node %s" %
4948
                (instance, pnode_name))
4949

    
4950
    if iobj.disk_template != constants.DT_DISKLESS:
4951
      if self.op.mode == constants.INSTANCE_CREATE:
4952
        feedback_fn("* running the instance OS create scripts...")
4953
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
4954
        msg = result.RemoteFailMsg()
4955
        if msg:
4956
          raise errors.OpExecError("Could not add os for instance %s"
4957
                                   " on node %s: %s" %
4958
                                   (instance, pnode_name, msg))
4959

    
4960
      elif self.op.mode == constants.INSTANCE_IMPORT:
4961
        feedback_fn("* running the instance OS import scripts...")
4962
        src_node = self.op.src_node
4963
        src_images = self.src_images
4964
        cluster_name = self.cfg.GetClusterName()
4965
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
4966
                                                         src_node, src_images,
4967
                                                         cluster_name)
4968
        msg = import_result.RemoteFailMsg()
4969
        if msg:
4970
          self.LogWarning("Error while importing the disk images for instance"
4971
                          " %s on node %s: %s" % (instance, pnode_name, msg))
4972
      else:
4973
        # also checked in the prereq part
4974
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
4975
                                     % self.op.mode)
4976

    
4977
    if self.op.start:
4978
      iobj.admin_up = True
4979
      self.cfg.Update(iobj)
4980
      logging.info("Starting instance %s on node %s", instance, pnode_name)
4981
      feedback_fn("* starting instance...")
4982
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
4983
      msg = result.RemoteFailMsg()
4984
      if msg:
4985
        raise errors.OpExecError("Could not start instance: %s" % msg)
4986

    
4987

    
4988
class LUConnectConsole(NoHooksLU):
4989
  """Connect to an instance's console.
4990

4991
  This is somewhat special in that it returns the command line that
4992
  you need to run on the master node in order to connect to the
4993
  console.
4994

4995
  """
4996
  _OP_REQP = ["instance_name"]
4997
  REQ_BGL = False
4998

    
4999
  def ExpandNames(self):
5000
    self._ExpandAndLockInstance()
5001

    
5002
  def CheckPrereq(self):
5003
    """Check prerequisites.
5004

5005
    This checks that the instance is in the cluster.
5006

5007
    """
5008
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5009
    assert self.instance is not None, \
5010
      "Cannot retrieve locked instance %s" % self.op.instance_name
5011
    _CheckNodeOnline(self, self.instance.primary_node)
5012

    
5013
  def Exec(self, feedback_fn):
5014
    """Connect to the console of an instance
5015

5016
    """
5017
    instance = self.instance
5018
    node = instance.primary_node
5019

    
5020
    node_insts = self.rpc.call_instance_list([node],
5021
                                             [instance.hypervisor])[node]
5022
    msg = node_insts.RemoteFailMsg()
5023
    if msg:
5024
      raise errors.OpExecError("Can't get node information from %s: %s" %
5025
                               (node, msg))
5026

    
5027
    if instance.name not in node_insts.payload:
5028
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5029

    
5030
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5031

    
5032
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5033
    cluster = self.cfg.GetClusterInfo()
5034
    # beparams and hvparams are passed separately, to avoid editing the
5035
    # instance and then saving the defaults in the instance itself.
5036
    hvparams = cluster.FillHV(instance)
5037
    beparams = cluster.FillBE(instance)
5038
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5039

    
5040
    # build ssh cmdline
5041
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5042

    
5043

    
5044
class LUReplaceDisks(LogicalUnit):
5045
  """Replace the disks of an instance.
5046

5047
  """
5048
  HPATH = "mirrors-replace"
5049
  HTYPE = constants.HTYPE_INSTANCE
5050
  _OP_REQP = ["instance_name", "mode", "disks"]
5051
  REQ_BGL = False
5052

    
5053
  def CheckArguments(self):
5054
    if not hasattr(self.op, "remote_node"):
5055
      self.op.remote_node = None
5056
    if not hasattr(self.op, "iallocator"):
5057
      self.op.iallocator = None
5058

    
5059
    # check for valid parameter combination
5060
    cnt = [self.op.remote_node, self.op.iallocator].count(None)
5061
    if self.op.mode == constants.REPLACE_DISK_CHG:
5062
      if cnt == 2:
5063
        raise errors.OpPrereqError("When changing the secondary either an"
5064
                                   " iallocator script must be used or the"
5065
                                   " new node given")
5066
      elif cnt == 0:
5067
        raise errors.OpPrereqError("Give either the iallocator or the new"
5068
                                   " secondary, not both")
5069
    else: # not replacing the secondary
5070
      if cnt != 2:
5071
        raise errors.OpPrereqError("The iallocator and new node options can"
5072
                                   " be used only when changing the"
5073
                                   " secondary node")
5074

    
5075
  def ExpandNames(self):
5076
    self._ExpandAndLockInstance()
5077

    
5078
    if self.op.iallocator is not None:
5079
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5080
    elif self.op.remote_node is not None:
5081
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5082
      if remote_node is None:
5083
        raise errors.OpPrereqError("Node '%s' not known" %
5084
                                   self.op.remote_node)
5085
      self.op.remote_node = remote_node
5086
      # Warning: do not remove the locking of the new secondary here
5087
      # unless DRBD8.AddChildren is changed to work in parallel;
5088
      # currently it doesn't since parallel invocations of
5089
      # FindUnusedMinor will conflict
5090
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5091
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5092
    else:
5093
      self.needed_locks[locking.LEVEL_NODE] = []
5094
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5095

    
5096
  def DeclareLocks(self, level):
5097
    # If we're not already locking all nodes in the set we have to declare the
5098
    # instance's primary/secondary nodes.
5099
    if (level == locking.LEVEL_NODE and
5100
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5101
      self._LockInstancesNodes()
5102

    
5103
  def _RunAllocator(self):
5104
    """Compute a new secondary node using an IAllocator.
5105

5106
    """
5107
    ial = IAllocator(self,
5108
                     mode=constants.IALLOCATOR_MODE_RELOC,
5109
                     name=self.op.instance_name,
5110
                     relocate_from=[self.sec_node])
5111

    
5112
    ial.Run(self.op.iallocator)
5113

    
5114
    if not ial.success:
5115
      raise errors.OpPrereqError("Can't compute nodes using"
5116
                                 " iallocator '%s': %s" % (self.op.iallocator,
5117
                                                           ial.info))
5118
    if len(ial.nodes) != ial.required_nodes:
5119
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5120
                                 " of nodes (%s), required %s" %
5121
                                 (len(ial.nodes), ial.required_nodes))
5122
    self.op.remote_node = ial.nodes[0]
5123
    self.LogInfo("Selected new secondary for the instance: %s",
5124
                 self.op.remote_node)
5125

    
5126
  def BuildHooksEnv(self):
5127
    """Build hooks env.
5128

5129
    This runs on the master, the primary and all the secondaries.
5130

5131
    """
5132
    env = {
5133
      "MODE": self.op.mode,
5134
      "NEW_SECONDARY": self.op.remote_node,
5135
      "OLD_SECONDARY": self.instance.secondary_nodes[0],
5136
      }
5137
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5138
    nl = [
5139
      self.cfg.GetMasterNode(),
5140
      self.instance.primary_node,
5141
      ]
5142
    if self.op.remote_node is not None:
5143
      nl.append(self.op.remote_node)
5144
    return env, nl, nl
5145

    
5146
  def CheckPrereq(self):
5147
    """Check prerequisites.
5148

5149
    This checks that the instance is in the cluster.
5150

5151
    """
5152
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5153
    assert instance is not None, \
5154
      "Cannot retrieve locked instance %s" % self.op.instance_name
5155
    self.instance = instance
5156

    
5157
    if instance.disk_template != constants.DT_DRBD8:
5158
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
5159
                                 " instances")
5160

    
5161
    if len(instance.secondary_nodes) != 1:
5162
      raise errors.OpPrereqError("The instance has a strange layout,"
5163
                                 " expected one secondary but found %d" %
5164
                                 len(instance.secondary_nodes))
5165

    
5166
    self.sec_node = instance.secondary_nodes[0]
5167

    
5168
    if self.op.iallocator is not None:
5169
      self._RunAllocator()
5170

    
5171
    remote_node = self.op.remote_node
5172
    if remote_node is not None:
5173
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
5174
      assert self.remote_node_info is not None, \
5175
        "Cannot retrieve locked node %s" % remote_node
5176
    else:
5177
      self.remote_node_info = None
5178
    if remote_node == instance.primary_node:
5179
      raise errors.OpPrereqError("The specified node is the primary node of"
5180
                                 " the instance.")
5181
    elif remote_node == self.sec_node:
5182
      raise errors.OpPrereqError("The specified node is already the"
5183
                                 " secondary node of the instance.")
5184

    
5185
    if self.op.mode == constants.REPLACE_DISK_PRI:
5186
      n1 = self.tgt_node = instance.primary_node
5187
      n2 = self.oth_node = self.sec_node
5188
    elif self.op.mode == constants.REPLACE_DISK_SEC:
5189
      n1 = self.tgt_node = self.sec_node
5190
      n2 = self.oth_node = instance.primary_node
5191
    elif self.op.mode == constants.REPLACE_DISK_CHG:
5192
      n1 = self.new_node = remote_node
5193
      n2 = self.oth_node = instance.primary_node
5194
      self.tgt_node = self.sec_node
5195
      _CheckNodeNotDrained(self, remote_node)
5196
    else:
5197
      raise errors.ProgrammerError("Unhandled disk replace mode")
5198

    
5199
    _CheckNodeOnline(self, n1)
5200
    _CheckNodeOnline(self, n2)
5201

    
5202
    if not self.op.disks:
5203
      self.op.disks = range(len(instance.disks))
5204

    
5205
    for disk_idx in self.op.disks:
5206
      instance.FindDisk(disk_idx)
5207

    
5208
  def _ExecD8DiskOnly(self, feedback_fn):
5209
    """Replace a disk on the primary or secondary for dbrd8.
5210

5211
    The algorithm for replace is quite complicated:
5212

5213
      1. for each disk to be replaced:
5214

5215
        1. create new LVs on the target node with unique names
5216
        1. detach old LVs from the drbd device
5217
        1. rename old LVs to name_replaced.<time_t>
5218
        1. rename new LVs to old LVs
5219
        1. attach the new LVs (with the old names now) to the drbd device
5220

5221
      1. wait for sync across all devices
5222

5223
      1. for each modified disk:
5224

5225
        1. remove old LVs (which have the name name_replaces.<time_t>)
5226

5227
    Failures are not very well handled.
5228

5229
    """
5230
    steps_total = 6
5231
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
5232
    instance = self.instance
5233
    iv_names = {}
5234
    vgname = self.cfg.GetVGName()
5235
    # start of work
5236
    cfg = self.cfg
5237
    tgt_node = self.tgt_node
5238
    oth_node = self.oth_node
5239

    
5240
    # Step: check device activation
5241
    self.proc.LogStep(1, steps_total, "check device existence")
5242
    info("checking volume groups")
5243
    my_vg = cfg.GetVGName()
5244
    results = self.rpc.call_vg_list([oth_node, tgt_node])
5245
    if not results:
5246
      raise errors.OpExecError("Can't list volume groups on the nodes")
5247
    for node in oth_node, tgt_node:
5248
      res = results[node]
5249
      msg = res.RemoteFailMsg()
5250
      if msg:
5251
        raise errors.OpExecError("Error checking node %s: %s" % (node, msg))
5252
      if my_vg not in res.payload:
5253
        raise errors.OpExecError("Volume group '%s' not found on %s" %
5254
                                 (my_vg, node))
5255
    for idx, dev in enumerate(instance.disks):
5256
      if idx not in self.op.disks:
5257
        continue
5258
      for node in tgt_node, oth_node:
5259
        info("checking disk/%d on %s" % (idx, node))
5260
        cfg.SetDiskID(dev, node)
5261
        result = self.rpc.call_blockdev_find(node, dev)
5262
        msg = result.RemoteFailMsg()
5263
        if not msg and not result.payload:
5264
          msg = "disk not found"
5265
        if msg:
5266
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5267
                                   (idx, node, msg))
5268

    
5269
    # Step: check other node consistency
5270
    self.proc.LogStep(2, steps_total, "check peer consistency")
5271
    for idx, dev in enumerate(instance.disks):
5272
      if idx not in self.op.disks:
5273
        continue
5274
      info("checking disk/%d consistency on %s" % (idx, oth_node))
5275
      if not _CheckDiskConsistency(self, dev, oth_node,
5276
                                   oth_node==instance.primary_node):
5277
        raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe"
5278
                                 " to replace disks on this node (%s)" %
5279
                                 (oth_node, tgt_node))
5280

    
5281
    # Step: create new storage
5282
    self.proc.LogStep(3, steps_total, "allocate new storage")
5283
    for idx, dev in enumerate(instance.disks):
5284
      if idx not in self.op.disks:
5285
        continue
5286
      size = dev.size
5287
      cfg.SetDiskID(dev, tgt_node)
5288
      lv_names = [".disk%d_%s" % (idx, suf)
5289
                  for suf in ["data", "meta"]]
5290
      names = _GenerateUniqueNames(self, lv_names)
5291
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5292
                             logical_id=(vgname, names[0]))
5293
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5294
                             logical_id=(vgname, names[1]))
5295
      new_lvs = [lv_data, lv_meta]
5296
      old_lvs = dev.children
5297
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
5298
      info("creating new local storage on %s for %s" %
5299
           (tgt_node, dev.iv_name))
5300
      # we pass force_create=True to force the LVM creation
5301
      for new_lv in new_lvs:
5302
        _CreateBlockDev(self, tgt_node, instance, new_lv, True,
5303
                        _GetInstanceInfoText(instance), False)
5304

    
5305
    # Step: for each lv, detach+rename*2+attach
5306
    self.proc.LogStep(4, steps_total, "change drbd configuration")
5307
    for dev, old_lvs, new_lvs in iv_names.itervalues():
5308
      info("detaching %s drbd from local storage" % dev.iv_name)
5309
      result = self.rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs)
5310
      msg = result.RemoteFailMsg()
5311
      if msg:
5312
        raise errors.OpExecError("Can't detach drbd from local storage on node"
5313
                                 " %s for device %s: %s" %
5314
                                 (tgt_node, dev.iv_name, msg))
5315
      #dev.children = []
5316
      #cfg.Update(instance)
5317

    
5318
      # ok, we created the new LVs, so now we know we have the needed
5319
      # storage; as such, we proceed on the target node to rename
5320
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
5321
      # using the assumption that logical_id == physical_id (which in
5322
      # turn is the unique_id on that node)
5323

    
5324
      # FIXME(iustin): use a better name for the replaced LVs
5325
      temp_suffix = int(time.time())
5326
      ren_fn = lambda d, suff: (d.physical_id[0],
5327
                                d.physical_id[1] + "_replaced-%s" % suff)
5328
      # build the rename list based on what LVs exist on the node
5329
      rlist = []
5330
      for to_ren in old_lvs:
5331
        result = self.rpc.call_blockdev_find(tgt_node, to_ren)
5332
        if not result.RemoteFailMsg() and result.payload:
5333
          # device exists
5334
          rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
5335

    
5336
      info("renaming the old LVs on the target node")
5337
      result = self.rpc.call_blockdev_rename(tgt_node, rlist)
5338
      msg = result.RemoteFailMsg()
5339
      if msg:
5340
        raise errors.OpExecError("Can't rename old LVs on node %s: %s" %
5341
                                 (tgt_node, msg))
5342
      # now we rename the new LVs to the old LVs
5343
      info("renaming the new LVs on the target node")
5344
      rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
5345
      result = self.rpc.call_blockdev_rename(tgt_node, rlist)
5346
      msg = result.RemoteFailMsg()
5347
      if msg:
5348
        raise errors.OpExecError("Can't rename new LVs on node %s: %s" %
5349
                                 (tgt_node, msg))
5350

    
5351
      for old, new in zip(old_lvs, new_lvs):
5352
        new.logical_id = old.logical_id
5353
        cfg.SetDiskID(new, tgt_node)
5354

    
5355
      for disk in old_lvs:
5356
        disk.logical_id = ren_fn(disk, temp_suffix)
5357
        cfg.SetDiskID(disk, tgt_node)
5358

    
5359
      # now that the new lvs have the old name, we can add them to the device
5360
      info("adding new mirror component on %s" % tgt_node)
5361
      result = self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs)
5362
      msg = result.RemoteFailMsg()
5363
      if msg:
5364
        for new_lv in new_lvs:
5365
          msg = self.rpc.call_blockdev_remove(tgt_node, new_lv).RemoteFailMsg()
5366
          if msg:
5367
            warning("Can't rollback device %s: %s", dev, msg,
5368
                    hint="cleanup manually the unused logical volumes")
5369
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
5370

    
5371
      dev.children = new_lvs
5372
      cfg.Update(instance)
5373

    
5374
    # Step: wait for sync
5375

    
5376
    # this can fail as the old devices are degraded and _WaitForSync
5377
    # does a combined result over all disks, so we don't check its
5378
    # return value
5379
    self.proc.LogStep(5, steps_total, "sync devices")
5380
    _WaitForSync(self, instance, unlock=True)
5381

    
5382
    # so check manually all the devices
5383
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5384
      cfg.SetDiskID(dev, instance.primary_node)
5385
      result = self.rpc.call_blockdev_find(instance.primary_node, dev)
5386
      msg = result.RemoteFailMsg()
5387
      if not msg and not result.payload:
5388
        msg = "disk not found"
5389
      if msg:
5390
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
5391
                                 (name, msg))
5392
      if result.payload[5]:
5393
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
5394

    
5395
    # Step: remove old storage
5396
    self.proc.LogStep(6, steps_total, "removing old storage")
5397
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5398
      info("remove logical volumes for %s" % name)
5399
      for lv in old_lvs:
5400
        cfg.SetDiskID(lv, tgt_node)
5401
        msg = self.rpc.call_blockdev_remove(tgt_node, lv).RemoteFailMsg()
5402
        if msg:
5403
          warning("Can't remove old LV: %s" % msg,
5404
                  hint="manually remove unused LVs")
5405
          continue
5406

    
5407
  def _ExecD8Secondary(self, feedback_fn):
5408
    """Replace the secondary node for drbd8.
5409

5410
    The algorithm for replace is quite complicated:
5411
      - for all disks of the instance:
5412
        - create new LVs on the new node with same names
5413
        - shutdown the drbd device on the old secondary
5414
        - disconnect the drbd network on the primary
5415
        - create the drbd device on the new secondary
5416
        - network attach the drbd on the primary, using an artifice:
5417
          the drbd code for Attach() will connect to the network if it
5418
          finds a device which is connected to the good local disks but
5419
          not network enabled
5420
      - wait for sync across all devices
5421
      - remove all disks from the old secondary
5422

5423
    Failures are not very well handled.
5424

5425
    """
5426
    steps_total = 6
5427
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
5428
    instance = self.instance
5429
    iv_names = {}
5430
    # start of work
5431
    cfg = self.cfg
5432
    old_node = self.tgt_node
5433
    new_node = self.new_node
5434
    pri_node = instance.primary_node
5435
    nodes_ip = {
5436
      old_node: self.cfg.GetNodeInfo(old_node).secondary_ip,
5437
      new_node: self.cfg.GetNodeInfo(new_node).secondary_ip,
5438
      pri_node: self.cfg.GetNodeInfo(pri_node).secondary_ip,
5439
      }
5440

    
5441
    # Step: check device activation
5442
    self.proc.LogStep(1, steps_total, "check device existence")
5443
    info("checking volume groups")
5444
    my_vg = cfg.GetVGName()
5445
    results = self.rpc.call_vg_list([pri_node, new_node])
5446
    for node in pri_node, new_node:
5447
      res = results[node]
5448
      msg = res.RemoteFailMsg()
5449
      if msg:
5450
        raise errors.OpExecError("Error checking node %s: %s" % (node, msg))
5451
      if my_vg not in res.payload:
5452
        raise errors.OpExecError("Volume group '%s' not found on %s" %
5453
                                 (my_vg, node))
5454
    for idx, dev in enumerate(instance.disks):
5455
      if idx not in self.op.disks:
5456
        continue
5457
      info("checking disk/%d on %s" % (idx, pri_node))
5458
      cfg.SetDiskID(dev, pri_node)
5459
      result = self.rpc.call_blockdev_find(pri_node, dev)
5460
      msg = result.RemoteFailMsg()
5461
      if not msg and not result.payload:
5462
        msg = "disk not found"
5463
      if msg:
5464
        raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5465
                                 (idx, pri_node, msg))
5466

    
5467
    # Step: check other node consistency
5468
    self.proc.LogStep(2, steps_total, "check peer consistency")
5469
    for idx, dev in enumerate(instance.disks):
5470
      if idx not in self.op.disks:
5471
        continue
5472
      info("checking disk/%d consistency on %s" % (idx, pri_node))
5473
      if not _CheckDiskConsistency(self, dev, pri_node, True, ldisk=True):
5474
        raise errors.OpExecError("Primary node (%s) has degraded storage,"
5475
                                 " unsafe to replace the secondary" %
5476
                                 pri_node)
5477

    
5478
    # Step: create new storage
5479
    self.proc.LogStep(3, steps_total, "allocate new storage")
5480
    for idx, dev in enumerate(instance.disks):
5481
      info("adding new local storage on %s for disk/%d" %
5482
           (new_node, idx))
5483
      # we pass force_create=True to force LVM creation
5484
      for new_lv in dev.children:
5485
        _CreateBlockDev(self, new_node, instance, new_lv, True,
5486
                        _GetInstanceInfoText(instance), False)
5487

    
5488
    # Step 4: dbrd minors and drbd setups changes
5489
    # after this, we must manually remove the drbd minors on both the
5490
    # error and the success paths
5491
    minors = cfg.AllocateDRBDMinor([new_node for dev in instance.disks],
5492
                                   instance.name)
5493
    logging.debug("Allocated minors %s" % (minors,))
5494
    self.proc.LogStep(4, steps_total, "changing drbd configuration")
5495
    for idx, (dev, new_minor) in enumerate(zip(instance.disks, minors)):
5496
      size = dev.size
5497
      info("activating a new drbd on %s for disk/%d" % (new_node, idx))
5498
      # create new devices on new_node; note that we create two IDs:
5499
      # one without port, so the drbd will be activated without
5500
      # networking information on the new node at this stage, and one
5501
      # with network, for the latter activation in step 4
5502
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
5503
      if pri_node == o_node1:
5504
        p_minor = o_minor1
5505
      else:
5506
        p_minor = o_minor2
5507

    
5508
      new_alone_id = (pri_node, new_node, None, p_minor, new_minor, o_secret)
5509
      new_net_id = (pri_node, new_node, o_port, p_minor, new_minor, o_secret)
5510

    
5511
      iv_names[idx] = (dev, dev.children, new_net_id)
5512
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
5513
                    new_net_id)
5514
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
5515
                              logical_id=new_alone_id,
5516
                              children=dev.children)
5517
      try:
5518
        _CreateSingleBlockDev(self, new_node, instance, new_drbd,
5519
                              _GetInstanceInfoText(instance), False)
5520
      except errors.GenericError:
5521
        self.cfg.ReleaseDRBDMinors(instance.name)
5522
        raise
5523

    
5524
    for idx, dev in enumerate(instance.disks):
5525
      # we have new devices, shutdown the drbd on the old secondary
5526
      info("shutting down drbd for disk/%d on old node" % idx)
5527
      cfg.SetDiskID(dev, old_node)
5528
      msg = self.rpc.call_blockdev_shutdown(old_node, dev).RemoteFailMsg()
5529
      if msg:
5530
        warning("Failed to shutdown drbd for disk/%d on old node: %s" %
5531
                (idx, msg),
5532
                hint="Please cleanup this device manually as soon as possible")
5533

    
5534
    info("detaching primary drbds from the network (=> standalone)")
5535
    result = self.rpc.call_drbd_disconnect_net([pri_node], nodes_ip,
5536
                                               instance.disks)[pri_node]
5537

    
5538
    msg = result.RemoteFailMsg()
5539
    if msg:
5540
      # detaches didn't succeed (unlikely)
5541
      self.cfg.ReleaseDRBDMinors(instance.name)
5542
      raise errors.OpExecError("Can't detach the disks from the network on"
5543
                               " old node: %s" % (msg,))
5544

    
5545
    # if we managed to detach at least one, we update all the disks of
5546
    # the instance to point to the new secondary
5547
    info("updating instance configuration")
5548
    for dev, _, new_logical_id in iv_names.itervalues():
5549
      dev.logical_id = new_logical_id
5550
      cfg.SetDiskID(dev, pri_node)
5551
    cfg.Update(instance)
5552

    
5553
    # and now perform the drbd attach
5554
    info("attaching primary drbds to new secondary (standalone => connected)")
5555
    result = self.rpc.call_drbd_attach_net([pri_node, new_node], nodes_ip,
5556
                                           instance.disks, instance.name,
5557
                                           False)
5558
    for to_node, to_result in result.items():
5559
      msg = to_result.RemoteFailMsg()
5560
      if msg:
5561
        warning("can't attach drbd disks on node %s: %s", to_node, msg,
5562
                hint="please do a gnt-instance info to see the"
5563
                " status of disks")
5564

    
5565
    # this can fail as the old devices are degraded and _WaitForSync
5566
    # does a combined result over all disks, so we don't check its
5567
    # return value
5568
    self.proc.LogStep(5, steps_total, "sync devices")
5569
    _WaitForSync(self, instance, unlock=True)
5570

    
5571
    # so check manually all the devices
5572
    for idx, (dev, old_lvs, _) in iv_names.iteritems():
5573
      cfg.SetDiskID(dev, pri_node)
5574
      result = self.rpc.call_blockdev_find(pri_node, dev)
5575
      msg = result.RemoteFailMsg()
5576
      if not msg and not result.payload:
5577
        msg = "disk not found"
5578
      if msg:
5579
        raise errors.OpExecError("Can't find DRBD device disk/%d: %s" %
5580
                                 (idx, msg))
5581
      if result.payload[5]:
5582
        raise errors.OpExecError("DRBD device disk/%d is degraded!" % idx)
5583

    
5584
    self.proc.LogStep(6, steps_total, "removing old storage")
5585
    for idx, (dev, old_lvs, _) in iv_names.iteritems():
5586
      info("remove logical volumes for disk/%d" % idx)
5587
      for lv in old_lvs:
5588
        cfg.SetDiskID(lv, old_node)
5589
        msg = self.rpc.call_blockdev_remove(old_node, lv).RemoteFailMsg()
5590
        if msg:
5591
          warning("Can't remove LV on old secondary: %s", msg,
5592
                  hint="Cleanup stale volumes by hand")
5593

    
5594
  def Exec(self, feedback_fn):
5595
    """Execute disk replacement.
5596

5597
    This dispatches the disk replacement to the appropriate handler.
5598

5599
    """
5600
    instance = self.instance
5601

    
5602
    # Activate the instance disks if we're replacing them on a down instance
5603
    if not instance.admin_up:
5604
      _StartInstanceDisks(self, instance, True)
5605

    
5606
    if self.op.mode == constants.REPLACE_DISK_CHG:
5607
      fn = self._ExecD8Secondary
5608
    else:
5609
      fn = self._ExecD8DiskOnly
5610

    
5611
    ret = fn(feedback_fn)
5612

    
5613
    # Deactivate the instance disks if we're replacing them on a down instance
5614
    if not instance.admin_up:
5615
      _SafeShutdownInstanceDisks(self, instance)
5616

    
5617
    return ret
5618

    
5619

    
5620
class LUGrowDisk(LogicalUnit):
5621
  """Grow a disk of an instance.
5622

5623
  """
5624
  HPATH = "disk-grow"
5625
  HTYPE = constants.HTYPE_INSTANCE
5626
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
5627
  REQ_BGL = False
5628

    
5629
  def ExpandNames(self):
5630
    self._ExpandAndLockInstance()
5631
    self.needed_locks[locking.LEVEL_NODE] = []
5632
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5633

    
5634
  def DeclareLocks(self, level):
5635
    if level == locking.LEVEL_NODE:
5636
      self._LockInstancesNodes()
5637

    
5638
  def BuildHooksEnv(self):
5639
    """Build hooks env.
5640

5641
    This runs on the master, the primary and all the secondaries.
5642

5643
    """
5644
    env = {
5645
      "DISK": self.op.disk,
5646
      "AMOUNT": self.op.amount,
5647
      }
5648
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5649
    nl = [
5650
      self.cfg.GetMasterNode(),
5651
      self.instance.primary_node,
5652
      ]
5653
    return env, nl, nl
5654

    
5655
  def CheckPrereq(self):
5656
    """Check prerequisites.
5657

5658
    This checks that the instance is in the cluster.
5659

5660
    """
5661
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5662
    assert instance is not None, \
5663
      "Cannot retrieve locked instance %s" % self.op.instance_name
5664
    nodenames = list(instance.all_nodes)
5665
    for node in nodenames:
5666
      _CheckNodeOnline(self, node)
5667

    
5668

    
5669
    self.instance = instance
5670

    
5671
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
5672
      raise errors.OpPrereqError("Instance's disk layout does not support"
5673
                                 " growing.")
5674

    
5675
    self.disk = instance.FindDisk(self.op.disk)
5676

    
5677
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5678
                                       instance.hypervisor)
5679
    for node in nodenames:
5680
      info = nodeinfo[node]
5681
      msg = info.RemoteFailMsg()
5682
      if msg:
5683
        raise errors.OpPrereqError("Cannot get current information"
5684
                                   " from node %s:" % (node, msg))
5685
      vg_free = info.payload.get('vg_free', None)
5686
      if not isinstance(vg_free, int):
5687
        raise errors.OpPrereqError("Can't compute free disk space on"
5688
                                   " node %s" % node)
5689
      if self.op.amount > vg_free:
5690
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
5691
                                   " %d MiB available, %d MiB required" %
5692
                                   (node, vg_free, self.op.amount))
5693

    
5694
  def Exec(self, feedback_fn):
5695
    """Execute disk grow.
5696

5697
    """
5698
    instance = self.instance
5699
    disk = self.disk
5700
    for node in instance.all_nodes:
5701
      self.cfg.SetDiskID(disk, node)
5702
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
5703
      msg = result.RemoteFailMsg()
5704
      if msg:
5705
        raise errors.OpExecError("Grow request failed to node %s: %s" %
5706
                                 (node, msg))
5707
    disk.RecordGrow(self.op.amount)
5708
    self.cfg.Update(instance)
5709
    if self.op.wait_for_sync:
5710
      disk_abort = not _WaitForSync(self, instance)
5711
      if disk_abort:
5712
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
5713
                             " status.\nPlease check the instance.")
5714

    
5715

    
5716
class LUQueryInstanceData(NoHooksLU):
5717
  """Query runtime instance data.
5718

5719
  """
5720
  _OP_REQP = ["instances", "static"]
5721
  REQ_BGL = False
5722

    
5723
  def ExpandNames(self):
5724
    self.needed_locks = {}
5725
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
5726

    
5727
    if not isinstance(self.op.instances, list):
5728
      raise errors.OpPrereqError("Invalid argument type 'instances'")
5729

    
5730
    if self.op.instances:
5731
      self.wanted_names = []
5732
      for name in self.op.instances:
5733
        full_name = self.cfg.ExpandInstanceName(name)
5734
        if full_name is None:
5735
          raise errors.OpPrereqError("Instance '%s' not known" % name)
5736
        self.wanted_names.append(full_name)
5737
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
5738
    else:
5739
      self.wanted_names = None
5740
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5741

    
5742
    self.needed_locks[locking.LEVEL_NODE] = []
5743
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5744

    
5745
  def DeclareLocks(self, level):
5746
    if level == locking.LEVEL_NODE:
5747
      self._LockInstancesNodes()
5748

    
5749
  def CheckPrereq(self):
5750
    """Check prerequisites.
5751

5752
    This only checks the optional instance list against the existing names.
5753

5754
    """
5755
    if self.wanted_names is None:
5756
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5757

    
5758
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
5759
                             in self.wanted_names]
5760
    return
5761

    
5762
  def _ComputeDiskStatus(self, instance, snode, dev):
5763
    """Compute block device status.
5764

5765
    """
5766
    static = self.op.static
5767
    if not static:
5768
      self.cfg.SetDiskID(dev, instance.primary_node)
5769
      dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
5770
      if dev_pstatus.offline:
5771
        dev_pstatus = None
5772
      else:
5773
        msg = dev_pstatus.RemoteFailMsg()
5774
        if msg:
5775
          raise errors.OpExecError("Can't compute disk status for %s: %s" %
5776
                                   (instance.name, msg))
5777
        dev_pstatus = dev_pstatus.payload
5778
    else:
5779
      dev_pstatus = None
5780

    
5781
    if dev.dev_type in constants.LDS_DRBD:
5782
      # we change the snode then (otherwise we use the one passed in)
5783
      if dev.logical_id[0] == instance.primary_node:
5784
        snode = dev.logical_id[1]
5785
      else:
5786
        snode = dev.logical_id[0]
5787

    
5788
    if snode and not static:
5789
      self.cfg.SetDiskID(dev, snode)
5790
      dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
5791
      if dev_sstatus.offline:
5792
        dev_sstatus = None
5793
      else:
5794
        msg = dev_sstatus.RemoteFailMsg()
5795
        if msg:
5796
          raise errors.OpExecError("Can't compute disk status for %s: %s" %
5797
                                   (instance.name, msg))
5798
        dev_sstatus = dev_sstatus.payload
5799
    else:
5800
      dev_sstatus = None
5801

    
5802
    if dev.children:
5803
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
5804
                      for child in dev.children]
5805
    else:
5806
      dev_children = []
5807

    
5808
    data = {
5809
      "iv_name": dev.iv_name,
5810
      "dev_type": dev.dev_type,
5811
      "logical_id": dev.logical_id,
5812
      "physical_id": dev.physical_id,
5813
      "pstatus": dev_pstatus,
5814
      "sstatus": dev_sstatus,
5815
      "children": dev_children,
5816
      "mode": dev.mode,
5817
      }
5818

    
5819
    return data
5820

    
5821
  def Exec(self, feedback_fn):
5822
    """Gather and return data"""
5823
    result = {}
5824

    
5825
    cluster = self.cfg.GetClusterInfo()
5826

    
5827
    for instance in self.wanted_instances:
5828
      if not self.op.static:
5829
        remote_info = self.rpc.call_instance_info(instance.primary_node,
5830
                                                  instance.name,
5831
                                                  instance.hypervisor)
5832
        msg = remote_info.RemoteFailMsg()
5833
        if msg:
5834
          raise errors.OpExecError("Error checking node %s: %s" %
5835
                                   (instance.primary_node, msg))
5836
        remote_info = remote_info.payload
5837
        if remote_info and "state" in remote_info:
5838
          remote_state = "up"
5839
        else:
5840
          remote_state = "down"
5841
      else:
5842
        remote_state = None
5843
      if instance.admin_up:
5844
        config_state = "up"
5845
      else:
5846
        config_state = "down"
5847

    
5848
      disks = [self._ComputeDiskStatus(instance, None, device)
5849
               for device in instance.disks]
5850

    
5851
      idict = {
5852
        "name": instance.name,
5853
        "config_state": config_state,
5854
        "run_state": remote_state,
5855
        "pnode": instance.primary_node,
5856
        "snodes": instance.secondary_nodes,
5857
        "os": instance.os,
5858
        "nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
5859
        "disks": disks,
5860
        "hypervisor": instance.hypervisor,
5861
        "network_port": instance.network_port,
5862
        "hv_instance": instance.hvparams,
5863
        "hv_actual": cluster.FillHV(instance),
5864
        "be_instance": instance.beparams,
5865
        "be_actual": cluster.FillBE(instance),
5866
        }
5867

    
5868
      result[instance.name] = idict
5869

    
5870
    return result
5871

    
5872

    
5873
class LUSetInstanceParams(LogicalUnit):
5874
  """Modifies an instances's parameters.
5875

5876
  """
5877
  HPATH = "instance-modify"
5878
  HTYPE = constants.HTYPE_INSTANCE
5879
  _OP_REQP = ["instance_name"]
5880
  REQ_BGL = False
5881

    
5882
  def CheckArguments(self):
5883
    if not hasattr(self.op, 'nics'):
5884
      self.op.nics = []
5885
    if not hasattr(self.op, 'disks'):
5886
      self.op.disks = []
5887
    if not hasattr(self.op, 'beparams'):
5888
      self.op.beparams = {}
5889
    if not hasattr(self.op, 'hvparams'):
5890
      self.op.hvparams = {}
5891
    self.op.force = getattr(self.op, "force", False)
5892
    if not (self.op.nics or self.op.disks or
5893
            self.op.hvparams or self.op.beparams):
5894
      raise errors.OpPrereqError("No changes submitted")
5895

    
5896
    # Disk validation
5897
    disk_addremove = 0
5898
    for disk_op, disk_dict in self.op.disks:
5899
      if disk_op == constants.DDM_REMOVE:
5900
        disk_addremove += 1
5901
        continue
5902
      elif disk_op == constants.DDM_ADD:
5903
        disk_addremove += 1
5904
      else:
5905
        if not isinstance(disk_op, int):
5906
          raise errors.OpPrereqError("Invalid disk index")
5907
      if disk_op == constants.DDM_ADD:
5908
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
5909
        if mode not in constants.DISK_ACCESS_SET:
5910
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
5911
        size = disk_dict.get('size', None)
5912
        if size is None:
5913
          raise errors.OpPrereqError("Required disk parameter size missing")
5914
        try:
5915
          size = int(size)
5916
        except ValueError, err:
5917
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
5918
                                     str(err))
5919
        disk_dict['size'] = size
5920
      else:
5921
        # modification of disk
5922
        if 'size' in disk_dict:
5923
          raise errors.OpPrereqError("Disk size change not possible, use"
5924
                                     " grow-disk")
5925

    
5926
    if disk_addremove > 1:
5927
      raise errors.OpPrereqError("Only one disk add or remove operation"
5928
                                 " supported at a time")
5929

    
5930
    # NIC validation
5931
    nic_addremove = 0
5932
    for nic_op, nic_dict in self.op.nics:
5933
      if nic_op == constants.DDM_REMOVE:
5934
        nic_addremove += 1
5935
        continue
5936
      elif nic_op == constants.DDM_ADD:
5937
        nic_addremove += 1
5938
      else:
5939
        if not isinstance(nic_op, int):
5940
          raise errors.OpPrereqError("Invalid nic index")
5941

    
5942
      # nic_dict should be a dict
5943
      nic_ip = nic_dict.get('ip', None)
5944
      if nic_ip is not None:
5945
        if nic_ip.lower() == constants.VALUE_NONE:
5946
          nic_dict['ip'] = None
5947
        else:
5948
          if not utils.IsValidIP(nic_ip):
5949
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
5950

    
5951
      nic_bridge = nic_dict.get('bridge', None)
5952
      nic_link = nic_dict.get('link', None)
5953
      if nic_bridge and nic_link:
5954
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link' at the same time")
5955
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
5956
        nic_dict['bridge'] = None
5957
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
5958
        nic_dict['link'] = None
5959

    
5960
      if nic_op == constants.DDM_ADD:
5961
        nic_mac = nic_dict.get('mac', None)
5962
        if nic_mac is None:
5963
          nic_dict['mac'] = constants.VALUE_AUTO
5964

    
5965
      if 'mac' in nic_dict:
5966
        nic_mac = nic_dict['mac']
5967
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5968
          if not utils.IsValidMac(nic_mac):
5969
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
5970
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
5971
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
5972
                                     " modifying an existing nic")
5973

    
5974
    if nic_addremove > 1:
5975
      raise errors.OpPrereqError("Only one NIC add or remove operation"
5976
                                 " supported at a time")
5977

    
5978
  def ExpandNames(self):
5979
    self._ExpandAndLockInstance()
5980
    self.needed_locks[locking.LEVEL_NODE] = []
5981
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5982

    
5983
  def DeclareLocks(self, level):
5984
    if level == locking.LEVEL_NODE:
5985
      self._LockInstancesNodes()
5986

    
5987
  def BuildHooksEnv(self):
5988
    """Build hooks env.
5989

5990
    This runs on the master, primary and secondaries.
5991

5992
    """
5993
    args = dict()
5994
    if constants.BE_MEMORY in self.be_new:
5995
      args['memory'] = self.be_new[constants.BE_MEMORY]
5996
    if constants.BE_VCPUS in self.be_new:
5997
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
5998
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
5999
    # information at all.
6000
    if self.op.nics:
6001
      args['nics'] = []
6002
      nic_override = dict(self.op.nics)
6003
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
6004
      for idx, nic in enumerate(self.instance.nics):
6005
        if idx in nic_override:
6006
          this_nic_override = nic_override[idx]
6007
        else:
6008
          this_nic_override = {}
6009
        if 'ip' in this_nic_override:
6010
          ip = this_nic_override['ip']
6011
        else:
6012
          ip = nic.ip
6013
        if 'mac' in this_nic_override:
6014
          mac = this_nic_override['mac']
6015
        else:
6016
          mac = nic.mac
6017
        if idx in self.nic_pnew:
6018
          nicparams = self.nic_pnew[idx]
6019
        else:
6020
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
6021
        mode = nicparams[constants.NIC_MODE]
6022
        link = nicparams[constants.NIC_LINK]
6023
        args['nics'].append((ip, mac, mode, link))
6024
      if constants.DDM_ADD in nic_override:
6025
        ip = nic_override[constants.DDM_ADD].get('ip', None)
6026
        mac = nic_override[constants.DDM_ADD]['mac']
6027
        nicparams = self.nic_pnew[constants.DDM_ADD]
6028
        mode = nicparams[constants.NIC_MODE]
6029
        link = nicparams[constants.NIC_LINK]
6030
        args['nics'].append((ip, mac, mode, link))
6031
      elif constants.DDM_REMOVE in nic_override:
6032
        del args['nics'][-1]
6033

    
6034
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
6035
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6036
    return env, nl, nl
6037

    
6038
  def _GetUpdatedParams(self, old_params, update_dict,
6039
                        default_values, parameter_types):
6040
    """Return the new params dict for the given params.
6041

6042
    @type old_params: dict
6043
    @type old_params: old parameters
6044
    @type update_dict: dict
6045
    @type update_dict: dict containing new parameter values,
6046
                       or constants.VALUE_DEFAULT to reset the
6047
                       parameter to its default value
6048
    @type default_values: dict
6049
    @param default_values: default values for the filled parameters
6050
    @type parameter_types: dict
6051
    @param parameter_types: dict mapping target dict keys to types
6052
                            in constants.ENFORCEABLE_TYPES
6053
    @rtype: (dict, dict)
6054
    @return: (new_parameters, filled_parameters)
6055

6056
    """
6057
    params_copy = copy.deepcopy(old_params)
6058
    for key, val in update_dict.iteritems():
6059
      if val == constants.VALUE_DEFAULT:
6060
        try:
6061
          del params_copy[key]
6062
        except KeyError:
6063
          pass
6064
      else:
6065
        params_copy[key] = val
6066
    utils.ForceDictType(params_copy, parameter_types)
6067
    params_filled = objects.FillDict(default_values, params_copy)
6068
    return (params_copy, params_filled)
6069

    
6070
  def CheckPrereq(self):
6071
    """Check prerequisites.
6072

6073
    This only checks the instance list against the existing names.
6074

6075
    """
6076
    force = self.force = self.op.force
6077

    
6078
    # checking the new params on the primary/secondary nodes
6079

    
6080
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6081
    cluster = self.cluster = self.cfg.GetClusterInfo()
6082
    assert self.instance is not None, \
6083
      "Cannot retrieve locked instance %s" % self.op.instance_name
6084
    pnode = instance.primary_node
6085
    nodelist = list(instance.all_nodes)
6086

    
6087
    # hvparams processing
6088
    if self.op.hvparams:
6089
      i_hvdict, hv_new = self._GetUpdatedParams(
6090
                             instance.hvparams, self.op.hvparams,
6091
                             cluster.hvparams[instance.hypervisor],
6092
                             constants.HVS_PARAMETER_TYPES)
6093
      # local check
6094
      hypervisor.GetHypervisor(
6095
        instance.hypervisor).CheckParameterSyntax(hv_new)
6096
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
6097
      self.hv_new = hv_new # the new actual values
6098
      self.hv_inst = i_hvdict # the new dict (without defaults)
6099
    else:
6100
      self.hv_new = self.hv_inst = {}
6101

    
6102
    # beparams processing
6103
    if self.op.beparams:
6104
      i_bedict, be_new = self._GetUpdatedParams(
6105
                             instance.beparams, self.op.beparams,
6106
                             cluster.beparams[constants.PP_DEFAULT],
6107
                             constants.BES_PARAMETER_TYPES)
6108
      self.be_new = be_new # the new actual values
6109
      self.be_inst = i_bedict # the new dict (without defaults)
6110
    else:
6111
      self.be_new = self.be_inst = {}
6112

    
6113
    self.warn = []
6114

    
6115
    if constants.BE_MEMORY in self.op.beparams and not self.force:
6116
      mem_check_list = [pnode]
6117
      if be_new[constants.BE_AUTO_BALANCE]:
6118
        # either we changed auto_balance to yes or it was from before
6119
        mem_check_list.extend(instance.secondary_nodes)
6120
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
6121
                                                  instance.hypervisor)
6122
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
6123
                                         instance.hypervisor)
6124
      pninfo = nodeinfo[pnode]
6125
      msg = pninfo.RemoteFailMsg()
6126
      if msg:
6127
        # Assume the primary node is unreachable and go ahead
6128
        self.warn.append("Can't get info from primary node %s: %s" %
6129
                         (pnode,  msg))
6130
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
6131
        self.warn.append("Node data from primary node %s doesn't contain"
6132
                         " free memory information" % pnode)
6133
      elif instance_info.RemoteFailMsg():
6134
        self.warn.append("Can't get instance runtime information: %s" %
6135
                        instance_info.RemoteFailMsg())
6136
      else:
6137
        if instance_info.payload:
6138
          current_mem = int(instance_info.payload['memory'])
6139
        else:
6140
          # Assume instance not running
6141
          # (there is a slight race condition here, but it's not very probable,
6142
          # and we have no other way to check)
6143
          current_mem = 0
6144
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
6145
                    pninfo.payload['memory_free'])
6146
        if miss_mem > 0:
6147
          raise errors.OpPrereqError("This change will prevent the instance"
6148
                                     " from starting, due to %d MB of memory"
6149
                                     " missing on its primary node" % miss_mem)
6150

    
6151
      if be_new[constants.BE_AUTO_BALANCE]:
6152
        for node, nres in nodeinfo.items():
6153
          if node not in instance.secondary_nodes:
6154
            continue
6155
          msg = nres.RemoteFailMsg()
6156
          if msg:
6157
            self.warn.append("Can't get info from secondary node %s: %s" %
6158
                             (node, msg))
6159
          elif not isinstance(nres.payload.get('memory_free', None), int):
6160
            self.warn.append("Secondary node %s didn't return free"
6161
                             " memory information" % node)
6162
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
6163
            self.warn.append("Not enough memory to failover instance to"
6164
                             " secondary node %s" % node)
6165

    
6166
    # NIC processing
6167
    self.nic_pnew = {}
6168
    self.nic_pinst = {}
6169
    for nic_op, nic_dict in self.op.nics:
6170
      if nic_op == constants.DDM_REMOVE:
6171
        if not instance.nics:
6172
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
6173
        continue
6174
      if nic_op != constants.DDM_ADD:
6175
        # an existing nic
6176
        if nic_op < 0 or nic_op >= len(instance.nics):
6177
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
6178
                                     " are 0 to %d" %
6179
                                     (nic_op, len(instance.nics)))
6180
        old_nic_params = instance.nics[nic_op].nicparams
6181
        old_nic_ip = instance.nics[nic_op].ip
6182
      else:
6183
        old_nic_params = {}
6184
        old_nic_ip = None
6185

    
6186
      update_params_dict = dict([(key, nic_dict[key])
6187
                                 for key in constants.NICS_PARAMETERS
6188
                                 if key in nic_dict])
6189

    
6190
      if 'bridge' in nic_dict:
6191
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
6192

    
6193
      new_nic_params, new_filled_nic_params = \
6194
          self._GetUpdatedParams(old_nic_params, update_params_dict,
6195
                                 cluster.nicparams[constants.PP_DEFAULT],
6196
                                 constants.NICS_PARAMETER_TYPES)
6197
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
6198
      self.nic_pinst[nic_op] = new_nic_params
6199
      self.nic_pnew[nic_op] = new_filled_nic_params
6200
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
6201

    
6202
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
6203
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
6204
        result = self.rpc.call_bridges_exist(pnode, [nic_bridge])
6205
        msg = result.RemoteFailMsg()
6206
        if msg:
6207
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
6208
          if self.force:
6209
            self.warn.append(msg)
6210
          else:
6211
            raise errors.OpPrereqError(msg)
6212
      if new_nic_mode == constants.NIC_MODE_ROUTED:
6213
        if 'ip' in nic_dict:
6214
          nic_ip = nic_dict['ip']
6215
        else:
6216
          nic_ip = old_nic_ip
6217
        if nic_ip is None:
6218
          raise errors.OpPrereqError('Cannot set the nic ip to None'
6219
                                     ' on a routed nic')
6220
      if 'mac' in nic_dict:
6221
        nic_mac = nic_dict['mac']
6222
        if nic_mac is None:
6223
          raise errors.OpPrereqError('Cannot set the nic mac to None')
6224
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6225
          # otherwise generate the mac
6226
          nic_dict['mac'] = self.cfg.GenerateMAC()
6227
        else:
6228
          # or validate/reserve the current one
6229
          if self.cfg.IsMacInUse(nic_mac):
6230
            raise errors.OpPrereqError("MAC address %s already in use"
6231
                                       " in cluster" % nic_mac)
6232

    
6233
    # DISK processing
6234
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
6235
      raise errors.OpPrereqError("Disk operations not supported for"
6236
                                 " diskless instances")
6237
    for disk_op, disk_dict in self.op.disks:
6238
      if disk_op == constants.DDM_REMOVE:
6239
        if len(instance.disks) == 1:
6240
          raise errors.OpPrereqError("Cannot remove the last disk of"
6241
                                     " an instance")
6242
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
6243
        ins_l = ins_l[pnode]
6244
        msg = ins_l.RemoteFailMsg()
6245
        if msg:
6246
          raise errors.OpPrereqError("Can't contact node %s: %s" %
6247
                                     (pnode, msg))
6248
        if instance.name in ins_l.payload:
6249
          raise errors.OpPrereqError("Instance is running, can't remove"
6250
                                     " disks.")
6251

    
6252
      if (disk_op == constants.DDM_ADD and
6253
          len(instance.nics) >= constants.MAX_DISKS):
6254
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
6255
                                   " add more" % constants.MAX_DISKS)
6256
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
6257
        # an existing disk
6258
        if disk_op < 0 or disk_op >= len(instance.disks):
6259
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
6260
                                     " are 0 to %d" %
6261
                                     (disk_op, len(instance.disks)))
6262

    
6263
    return
6264

    
6265
  def Exec(self, feedback_fn):
6266
    """Modifies an instance.
6267

6268
    All parameters take effect only at the next restart of the instance.
6269

6270
    """
6271
    # Process here the warnings from CheckPrereq, as we don't have a
6272
    # feedback_fn there.
6273
    for warn in self.warn:
6274
      feedback_fn("WARNING: %s" % warn)
6275

    
6276
    result = []
6277
    instance = self.instance
6278
    cluster = self.cluster
6279
    # disk changes
6280
    for disk_op, disk_dict in self.op.disks:
6281
      if disk_op == constants.DDM_REMOVE:
6282
        # remove the last disk
6283
        device = instance.disks.pop()
6284
        device_idx = len(instance.disks)
6285
        for node, disk in device.ComputeNodeTree(instance.primary_node):
6286
          self.cfg.SetDiskID(disk, node)
6287
          msg = self.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
6288
          if msg:
6289
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
6290
                            " continuing anyway", device_idx, node, msg)
6291
        result.append(("disk/%d" % device_idx, "remove"))
6292
      elif disk_op == constants.DDM_ADD:
6293
        # add a new disk
6294
        if instance.disk_template == constants.DT_FILE:
6295
          file_driver, file_path = instance.disks[0].logical_id
6296
          file_path = os.path.dirname(file_path)
6297
        else:
6298
          file_driver = file_path = None
6299
        disk_idx_base = len(instance.disks)
6300
        new_disk = _GenerateDiskTemplate(self,
6301
                                         instance.disk_template,
6302
                                         instance.name, instance.primary_node,
6303
                                         instance.secondary_nodes,
6304
                                         [disk_dict],
6305
                                         file_path,
6306
                                         file_driver,
6307
                                         disk_idx_base)[0]
6308
        instance.disks.append(new_disk)
6309
        info = _GetInstanceInfoText(instance)
6310

    
6311
        logging.info("Creating volume %s for instance %s",
6312
                     new_disk.iv_name, instance.name)
6313
        # Note: this needs to be kept in sync with _CreateDisks
6314
        #HARDCODE
6315
        for node in instance.all_nodes:
6316
          f_create = node == instance.primary_node
6317
          try:
6318
            _CreateBlockDev(self, node, instance, new_disk,
6319
                            f_create, info, f_create)
6320
          except errors.OpExecError, err:
6321
            self.LogWarning("Failed to create volume %s (%s) on"
6322
                            " node %s: %s",
6323
                            new_disk.iv_name, new_disk, node, err)
6324
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
6325
                       (new_disk.size, new_disk.mode)))
6326
      else:
6327
        # change a given disk
6328
        instance.disks[disk_op].mode = disk_dict['mode']
6329
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
6330
    # NIC changes
6331
    for nic_op, nic_dict in self.op.nics:
6332
      if nic_op == constants.DDM_REMOVE:
6333
        # remove the last nic
6334
        del instance.nics[-1]
6335
        result.append(("nic.%d" % len(instance.nics), "remove"))
6336
      elif nic_op == constants.DDM_ADD:
6337
        # mac and bridge should be set, by now
6338
        mac = nic_dict['mac']
6339
        ip = nic_dict.get('ip', None)
6340
        nicparams = self.nic_pinst[constants.DDM_ADD]
6341
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
6342
        instance.nics.append(new_nic)
6343
        result.append(("nic.%d" % (len(instance.nics) - 1),
6344
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
6345
                       (new_nic.mac, new_nic.ip,
6346
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
6347
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
6348
                       )))
6349
      else:
6350
        for key in 'mac', 'ip':
6351
          if key in nic_dict:
6352
            setattr(instance.nics[nic_op], key, nic_dict[key])
6353
        if nic_op in self.nic_pnew:
6354
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
6355
        for key, val in nic_dict.iteritems():
6356
          result.append(("nic.%s/%d" % (key, nic_op), val))
6357

    
6358
    # hvparams changes
6359
    if self.op.hvparams:
6360
      instance.hvparams = self.hv_inst
6361
      for key, val in self.op.hvparams.iteritems():
6362
        result.append(("hv/%s" % key, val))
6363

    
6364
    # beparams changes
6365
    if self.op.beparams:
6366
      instance.beparams = self.be_inst
6367
      for key, val in self.op.beparams.iteritems():
6368
        result.append(("be/%s" % key, val))
6369

    
6370
    self.cfg.Update(instance)
6371

    
6372
    return result
6373

    
6374

    
6375
class LUQueryExports(NoHooksLU):
6376
  """Query the exports list
6377

6378
  """
6379
  _OP_REQP = ['nodes']
6380
  REQ_BGL = False
6381

    
6382
  def ExpandNames(self):
6383
    self.needed_locks = {}
6384
    self.share_locks[locking.LEVEL_NODE] = 1
6385
    if not self.op.nodes:
6386
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6387
    else:
6388
      self.needed_locks[locking.LEVEL_NODE] = \
6389
        _GetWantedNodes(self, self.op.nodes)
6390

    
6391
  def CheckPrereq(self):
6392
    """Check prerequisites.
6393

6394
    """
6395
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
6396

    
6397
  def Exec(self, feedback_fn):
6398
    """Compute the list of all the exported system images.
6399

6400
    @rtype: dict
6401
    @return: a dictionary with the structure node->(export-list)
6402
        where export-list is a list of the instances exported on
6403
        that node.
6404

6405
    """
6406
    rpcresult = self.rpc.call_export_list(self.nodes)
6407
    result = {}
6408
    for node in rpcresult:
6409
      if rpcresult[node].RemoteFailMsg():
6410
        result[node] = False
6411
      else:
6412
        result[node] = rpcresult[node].payload
6413

    
6414
    return result
6415

    
6416

    
6417
class LUExportInstance(LogicalUnit):
6418
  """Export an instance to an image in the cluster.
6419

6420
  """
6421
  HPATH = "instance-export"
6422
  HTYPE = constants.HTYPE_INSTANCE
6423
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
6424
  REQ_BGL = False
6425

    
6426
  def ExpandNames(self):
6427
    self._ExpandAndLockInstance()
6428
    # FIXME: lock only instance primary and destination node
6429
    #
6430
    # Sad but true, for now we have do lock all nodes, as we don't know where
6431
    # the previous export might be, and and in this LU we search for it and
6432
    # remove it from its current node. In the future we could fix this by:
6433
    #  - making a tasklet to search (share-lock all), then create the new one,
6434
    #    then one to remove, after
6435
    #  - removing the removal operation altoghether
6436
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6437

    
6438
  def DeclareLocks(self, level):
6439
    """Last minute lock declaration."""
6440
    # All nodes are locked anyway, so nothing to do here.
6441

    
6442
  def BuildHooksEnv(self):
6443
    """Build hooks env.
6444

6445
    This will run on the master, primary node and target node.
6446

6447
    """
6448
    env = {
6449
      "EXPORT_NODE": self.op.target_node,
6450
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
6451
      }
6452
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6453
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
6454
          self.op.target_node]
6455
    return env, nl, nl
6456

    
6457
  def CheckPrereq(self):
6458
    """Check prerequisites.
6459

6460
    This checks that the instance and node names are valid.
6461

6462
    """
6463
    instance_name = self.op.instance_name
6464
    self.instance = self.cfg.GetInstanceInfo(instance_name)
6465
    assert self.instance is not None, \
6466
          "Cannot retrieve locked instance %s" % self.op.instance_name
6467
    _CheckNodeOnline(self, self.instance.primary_node)
6468

    
6469
    self.dst_node = self.cfg.GetNodeInfo(
6470
      self.cfg.ExpandNodeName(self.op.target_node))
6471

    
6472
    if self.dst_node is None:
6473
      # This is wrong node name, not a non-locked node
6474
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
6475
    _CheckNodeOnline(self, self.dst_node.name)
6476
    _CheckNodeNotDrained(self, self.dst_node.name)
6477

    
6478
    # instance disk type verification
6479
    for disk in self.instance.disks:
6480
      if disk.dev_type == constants.LD_FILE:
6481
        raise errors.OpPrereqError("Export not supported for instances with"
6482
                                   " file-based disks")
6483

    
6484
  def Exec(self, feedback_fn):
6485
    """Export an instance to an image in the cluster.
6486

6487
    """
6488
    instance = self.instance
6489
    dst_node = self.dst_node
6490
    src_node = instance.primary_node
6491
    if self.op.shutdown:
6492
      # shutdown the instance, but not the disks
6493
      result = self.rpc.call_instance_shutdown(src_node, instance)
6494
      msg = result.RemoteFailMsg()
6495
      if msg:
6496
        raise errors.OpExecError("Could not shutdown instance %s on"
6497
                                 " node %s: %s" %
6498
                                 (instance.name, src_node, msg))
6499

    
6500
    vgname = self.cfg.GetVGName()
6501

    
6502
    snap_disks = []
6503

    
6504
    # set the disks ID correctly since call_instance_start needs the
6505
    # correct drbd minor to create the symlinks
6506
    for disk in instance.disks:
6507
      self.cfg.SetDiskID(disk, src_node)
6508

    
6509
    try:
6510
      for disk in instance.disks:
6511
        # result.payload will be a snapshot of an lvm leaf of the one we passed
6512
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
6513
        msg = result.RemoteFailMsg()
6514
        if msg:
6515
          self.LogWarning("Could not snapshot block device %s on node %s: %s",
6516
                          disk.logical_id[1], src_node, msg)
6517
          snap_disks.append(False)
6518
        else:
6519
          disk_id = (vgname, result.payload)
6520
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
6521
                                 logical_id=disk_id, physical_id=disk_id,
6522
                                 iv_name=disk.iv_name)
6523
          snap_disks.append(new_dev)
6524

    
6525
    finally:
6526
      if self.op.shutdown and instance.admin_up:
6527
        result = self.rpc.call_instance_start(src_node, instance, None, None)
6528
        msg = result.RemoteFailMsg()
6529
        if msg:
6530
          _ShutdownInstanceDisks(self, instance)
6531
          raise errors.OpExecError("Could not start instance: %s" % msg)
6532

    
6533
    # TODO: check for size
6534

    
6535
    cluster_name = self.cfg.GetClusterName()
6536
    for idx, dev in enumerate(snap_disks):
6537
      if dev:
6538
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
6539
                                               instance, cluster_name, idx)
6540
        msg = result.RemoteFailMsg()
6541
        if msg:
6542
          self.LogWarning("Could not export block device %s from node %s to"
6543
                          " node %s: %s", dev.logical_id[1], src_node,
6544
                          dst_node.name, msg)
6545
        msg = self.rpc.call_blockdev_remove(src_node, dev).RemoteFailMsg()
6546
        if msg:
6547
          self.LogWarning("Could not remove snapshot block device %s from node"
6548
                          " %s: %s", dev.logical_id[1], src_node, msg)
6549

    
6550
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
6551
    msg = result.RemoteFailMsg()
6552
    if msg:
6553
      self.LogWarning("Could not finalize export for instance %s"
6554
                      " on node %s: %s", instance.name, dst_node.name, msg)
6555

    
6556
    nodelist = self.cfg.GetNodeList()
6557
    nodelist.remove(dst_node.name)
6558

    
6559
    # on one-node clusters nodelist will be empty after the removal
6560
    # if we proceed the backup would be removed because OpQueryExports
6561
    # substitutes an empty list with the full cluster node list.
6562
    iname = instance.name
6563
    if nodelist:
6564
      exportlist = self.rpc.call_export_list(nodelist)
6565
      for node in exportlist:
6566
        if exportlist[node].RemoteFailMsg():
6567
          continue
6568
        if iname in exportlist[node].payload:
6569
          msg = self.rpc.call_export_remove(node, iname).RemoteFailMsg()
6570
          if msg:
6571
            self.LogWarning("Could not remove older export for instance %s"
6572
                            " on node %s: %s", iname, node, msg)
6573

    
6574

    
6575
class LURemoveExport(NoHooksLU):
6576
  """Remove exports related to the named instance.
6577

6578
  """
6579
  _OP_REQP = ["instance_name"]
6580
  REQ_BGL = False
6581

    
6582
  def ExpandNames(self):
6583
    self.needed_locks = {}
6584
    # We need all nodes to be locked in order for RemoveExport to work, but we
6585
    # don't need to lock the instance itself, as nothing will happen to it (and
6586
    # we can remove exports also for a removed instance)
6587
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6588

    
6589
  def CheckPrereq(self):
6590
    """Check prerequisites.
6591
    """
6592
    pass
6593

    
6594
  def Exec(self, feedback_fn):
6595
    """Remove any export.
6596

6597
    """
6598
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
6599
    # If the instance was not found we'll try with the name that was passed in.
6600
    # This will only work if it was an FQDN, though.
6601
    fqdn_warn = False
6602
    if not instance_name:
6603
      fqdn_warn = True
6604
      instance_name = self.op.instance_name
6605

    
6606
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6607
    exportlist = self.rpc.call_export_list(locked_nodes)
6608
    found = False
6609
    for node in exportlist:
6610
      msg = exportlist[node].RemoteFailMsg()
6611
      if msg:
6612
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
6613
        continue
6614
      if instance_name in exportlist[node].payload:
6615
        found = True
6616
        result = self.rpc.call_export_remove(node, instance_name)
6617
        msg = result.RemoteFailMsg()
6618
        if msg:
6619
          logging.error("Could not remove export for instance %s"
6620
                        " on node %s: %s", instance_name, node, msg)
6621

    
6622
    if fqdn_warn and not found:
6623
      feedback_fn("Export not found. If trying to remove an export belonging"
6624
                  " to a deleted instance please use its Fully Qualified"
6625
                  " Domain Name.")
6626

    
6627

    
6628
class TagsLU(NoHooksLU):
6629
  """Generic tags LU.
6630

6631
  This is an abstract class which is the parent of all the other tags LUs.
6632

6633
  """
6634

    
6635
  def ExpandNames(self):
6636
    self.needed_locks = {}
6637
    if self.op.kind == constants.TAG_NODE:
6638
      name = self.cfg.ExpandNodeName(self.op.name)
6639
      if name is None:
6640
        raise errors.OpPrereqError("Invalid node name (%s)" %
6641
                                   (self.op.name,))
6642
      self.op.name = name
6643
      self.needed_locks[locking.LEVEL_NODE] = name
6644
    elif self.op.kind == constants.TAG_INSTANCE:
6645
      name = self.cfg.ExpandInstanceName(self.op.name)
6646
      if name is None:
6647
        raise errors.OpPrereqError("Invalid instance name (%s)" %
6648
                                   (self.op.name,))
6649
      self.op.name = name
6650
      self.needed_locks[locking.LEVEL_INSTANCE] = name
6651

    
6652
  def CheckPrereq(self):
6653
    """Check prerequisites.
6654

6655
    """
6656
    if self.op.kind == constants.TAG_CLUSTER:
6657
      self.target = self.cfg.GetClusterInfo()
6658
    elif self.op.kind == constants.TAG_NODE:
6659
      self.target = self.cfg.GetNodeInfo(self.op.name)
6660
    elif self.op.kind == constants.TAG_INSTANCE:
6661
      self.target = self.cfg.GetInstanceInfo(self.op.name)
6662
    else:
6663
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
6664
                                 str(self.op.kind))
6665

    
6666

    
6667
class LUGetTags(TagsLU):
6668
  """Returns the tags of a given object.
6669

6670
  """
6671
  _OP_REQP = ["kind", "name"]
6672
  REQ_BGL = False
6673

    
6674
  def Exec(self, feedback_fn):
6675
    """Returns the tag list.
6676

6677
    """
6678
    return list(self.target.GetTags())
6679

    
6680

    
6681
class LUSearchTags(NoHooksLU):
6682
  """Searches the tags for a given pattern.
6683

6684
  """
6685
  _OP_REQP = ["pattern"]
6686
  REQ_BGL = False
6687

    
6688
  def ExpandNames(self):
6689
    self.needed_locks = {}
6690

    
6691
  def CheckPrereq(self):
6692
    """Check prerequisites.
6693

6694
    This checks the pattern passed for validity by compiling it.
6695

6696
    """
6697
    try:
6698
      self.re = re.compile(self.op.pattern)
6699
    except re.error, err:
6700
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
6701
                                 (self.op.pattern, err))
6702

    
6703
  def Exec(self, feedback_fn):
6704
    """Returns the tag list.
6705

6706
    """
6707
    cfg = self.cfg
6708
    tgts = [("/cluster", cfg.GetClusterInfo())]
6709
    ilist = cfg.GetAllInstancesInfo().values()
6710
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
6711
    nlist = cfg.GetAllNodesInfo().values()
6712
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
6713
    results = []
6714
    for path, target in tgts:
6715
      for tag in target.GetTags():
6716
        if self.re.search(tag):
6717
          results.append((path, tag))
6718
    return results
6719

    
6720

    
6721
class LUAddTags(TagsLU):
6722
  """Sets a tag on a given object.
6723

6724
  """
6725
  _OP_REQP = ["kind", "name", "tags"]
6726
  REQ_BGL = False
6727

    
6728
  def CheckPrereq(self):
6729
    """Check prerequisites.
6730

6731
    This checks the type and length of the tag name and value.
6732

6733
    """
6734
    TagsLU.CheckPrereq(self)
6735
    for tag in self.op.tags:
6736
      objects.TaggableObject.ValidateTag(tag)
6737

    
6738
  def Exec(self, feedback_fn):
6739
    """Sets the tag.
6740

6741
    """
6742
    try:
6743
      for tag in self.op.tags:
6744
        self.target.AddTag(tag)
6745
    except errors.TagError, err:
6746
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
6747
    try:
6748
      self.cfg.Update(self.target)
6749
    except errors.ConfigurationError:
6750
      raise errors.OpRetryError("There has been a modification to the"
6751
                                " config file and the operation has been"
6752
                                " aborted. Please retry.")
6753

    
6754

    
6755
class LUDelTags(TagsLU):
6756
  """Delete a list of tags from a given object.
6757

6758
  """
6759
  _OP_REQP = ["kind", "name", "tags"]
6760
  REQ_BGL = False
6761

    
6762
  def CheckPrereq(self):
6763
    """Check prerequisites.
6764

6765
    This checks that we have the given tag.
6766

6767
    """
6768
    TagsLU.CheckPrereq(self)
6769
    for tag in self.op.tags:
6770
      objects.TaggableObject.ValidateTag(tag)
6771
    del_tags = frozenset(self.op.tags)
6772
    cur_tags = self.target.GetTags()
6773
    if not del_tags <= cur_tags:
6774
      diff_tags = del_tags - cur_tags
6775
      diff_names = ["'%s'" % tag for tag in diff_tags]
6776
      diff_names.sort()
6777
      raise errors.OpPrereqError("Tag(s) %s not found" %
6778
                                 (",".join(diff_names)))
6779

    
6780
  def Exec(self, feedback_fn):
6781
    """Remove the tag from the object.
6782

6783
    """
6784
    for tag in self.op.tags:
6785
      self.target.RemoveTag(tag)
6786
    try:
6787
      self.cfg.Update(self.target)
6788
    except errors.ConfigurationError:
6789
      raise errors.OpRetryError("There has been a modification to the"
6790
                                " config file and the operation has been"
6791
                                " aborted. Please retry.")
6792

    
6793

    
6794
class LUTestDelay(NoHooksLU):
6795
  """Sleep for a specified amount of time.
6796

6797
  This LU sleeps on the master and/or nodes for a specified amount of
6798
  time.
6799

6800
  """
6801
  _OP_REQP = ["duration", "on_master", "on_nodes"]
6802
  REQ_BGL = False
6803

    
6804
  def ExpandNames(self):
6805
    """Expand names and set required locks.
6806

6807
    This expands the node list, if any.
6808

6809
    """
6810
    self.needed_locks = {}
6811
    if self.op.on_nodes:
6812
      # _GetWantedNodes can be used here, but is not always appropriate to use
6813
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
6814
      # more information.
6815
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
6816
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
6817

    
6818
  def CheckPrereq(self):
6819
    """Check prerequisites.
6820

6821
    """
6822

    
6823
  def Exec(self, feedback_fn):
6824
    """Do the actual sleep.
6825

6826
    """
6827
    if self.op.on_master:
6828
      if not utils.TestDelay(self.op.duration):
6829
        raise errors.OpExecError("Error during master delay test")
6830
    if self.op.on_nodes:
6831
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
6832
      if not result:
6833
        raise errors.OpExecError("Complete failure from rpc call")
6834
      for node, node_result in result.items():
6835
        node_result.Raise()
6836
        if not node_result.data:
6837
          raise errors.OpExecError("Failure during rpc call to node %s,"
6838
                                   " result: %s" % (node, node_result.data))
6839

    
6840

    
6841
class IAllocator(object):
6842
  """IAllocator framework.
6843

6844
  An IAllocator instance has three sets of attributes:
6845
    - cfg that is needed to query the cluster
6846
    - input data (all members of the _KEYS class attribute are required)
6847
    - four buffer attributes (in|out_data|text), that represent the
6848
      input (to the external script) in text and data structure format,
6849
      and the output from it, again in two formats
6850
    - the result variables from the script (success, info, nodes) for
6851
      easy usage
6852

6853
  """
6854
  _ALLO_KEYS = [
6855
    "mem_size", "disks", "disk_template",
6856
    "os", "tags", "nics", "vcpus", "hypervisor",
6857
    ]
6858
  _RELO_KEYS = [
6859
    "relocate_from",
6860
    ]
6861

    
6862
  def __init__(self, lu, mode, name, **kwargs):
6863
    self.lu = lu
6864
    # init buffer variables
6865
    self.in_text = self.out_text = self.in_data = self.out_data = None
6866
    # init all input fields so that pylint is happy
6867
    self.mode = mode
6868
    self.name = name
6869
    self.mem_size = self.disks = self.disk_template = None
6870
    self.os = self.tags = self.nics = self.vcpus = None
6871
    self.hypervisor = None
6872
    self.relocate_from = None
6873
    # computed fields
6874
    self.required_nodes = None
6875
    # init result fields
6876
    self.success = self.info = self.nodes = None
6877
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6878
      keyset = self._ALLO_KEYS
6879
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6880
      keyset = self._RELO_KEYS
6881
    else:
6882
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
6883
                                   " IAllocator" % self.mode)
6884
    for key in kwargs:
6885
      if key not in keyset:
6886
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
6887
                                     " IAllocator" % key)
6888
      setattr(self, key, kwargs[key])
6889
    for key in keyset:
6890
      if key not in kwargs:
6891
        raise errors.ProgrammerError("Missing input parameter '%s' to"
6892
                                     " IAllocator" % key)
6893
    self._BuildInputData()
6894

    
6895
  def _ComputeClusterData(self):
6896
    """Compute the generic allocator input data.
6897

6898
    This is the data that is independent of the actual operation.
6899

6900
    """
6901
    cfg = self.lu.cfg
6902
    cluster_info = cfg.GetClusterInfo()
6903
    # cluster data
6904
    data = {
6905
      "version": constants.IALLOCATOR_VERSION,
6906
      "cluster_name": cfg.GetClusterName(),
6907
      "cluster_tags": list(cluster_info.GetTags()),
6908
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
6909
      # we don't have job IDs
6910
      }
6911
    iinfo = cfg.GetAllInstancesInfo().values()
6912
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
6913

    
6914
    # node data
6915
    node_results = {}
6916
    node_list = cfg.GetNodeList()
6917

    
6918
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6919
      hypervisor_name = self.hypervisor
6920
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6921
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
6922

    
6923
    node_data = self.lu.rpc.call_node_info(node_list, cfg.GetVGName(),
6924
                                           hypervisor_name)
6925
    node_iinfo = self.lu.rpc.call_all_instances_info(node_list,
6926
                       cluster_info.enabled_hypervisors)
6927
    for nname, nresult in node_data.items():
6928
      # first fill in static (config-based) values
6929
      ninfo = cfg.GetNodeInfo(nname)
6930
      pnr = {
6931
        "tags": list(ninfo.GetTags()),
6932
        "primary_ip": ninfo.primary_ip,
6933
        "secondary_ip": ninfo.secondary_ip,
6934
        "offline": ninfo.offline,
6935
        "drained": ninfo.drained,
6936
        "master_candidate": ninfo.master_candidate,
6937
        }
6938

    
6939
      if not ninfo.offline:
6940
        msg = nresult.RemoteFailMsg()
6941
        if msg:
6942
          raise errors.OpExecError("Can't get data for node %s: %s" %
6943
                                   (nname, msg))
6944
        msg = node_iinfo[nname].RemoteFailMsg()
6945
        if msg:
6946
          raise errors.OpExecError("Can't get node instance info"
6947
                                   " from node %s: %s" % (nname, msg))
6948
        remote_info = nresult.payload
6949
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
6950
                     'vg_size', 'vg_free', 'cpu_total']:
6951
          if attr not in remote_info:
6952
            raise errors.OpExecError("Node '%s' didn't return attribute"
6953
                                     " '%s'" % (nname, attr))
6954
          if not isinstance(remote_info[attr], int):
6955
            raise errors.OpExecError("Node '%s' returned invalid value"
6956
                                     " for '%s': %s" %
6957
                                     (nname, attr, remote_info[attr]))
6958
        # compute memory used by primary instances
6959
        i_p_mem = i_p_up_mem = 0
6960
        for iinfo, beinfo in i_list:
6961
          if iinfo.primary_node == nname:
6962
            i_p_mem += beinfo[constants.BE_MEMORY]
6963
            if iinfo.name not in node_iinfo[nname].payload:
6964
              i_used_mem = 0
6965
            else:
6966
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
6967
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
6968
            remote_info['memory_free'] -= max(0, i_mem_diff)
6969

    
6970
            if iinfo.admin_up:
6971
              i_p_up_mem += beinfo[constants.BE_MEMORY]
6972

    
6973
        # compute memory used by instances
6974
        pnr_dyn = {
6975
          "total_memory": remote_info['memory_total'],
6976
          "reserved_memory": remote_info['memory_dom0'],
6977
          "free_memory": remote_info['memory_free'],
6978
          "total_disk": remote_info['vg_size'],
6979
          "free_disk": remote_info['vg_free'],
6980
          "total_cpus": remote_info['cpu_total'],
6981
          "i_pri_memory": i_p_mem,
6982
          "i_pri_up_memory": i_p_up_mem,
6983
          }
6984
        pnr.update(pnr_dyn)
6985

    
6986
      node_results[nname] = pnr
6987
    data["nodes"] = node_results
6988

    
6989
    # instance data
6990
    instance_data = {}
6991
    for iinfo, beinfo in i_list:
6992
      nic_data = []
6993
      for nic in iinfo.nics:
6994
        filled_params = objects.FillDict(
6995
            cluster_info.nicparams[constants.PP_DEFAULT],
6996
            nic.nicparams)
6997
        nic_dict = {"mac": nic.mac,
6998
                    "ip": nic.ip,
6999
                    "mode": filled_params[constants.NIC_MODE],
7000
                    "link": filled_params[constants.NIC_LINK],
7001
                   }
7002
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
7003
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
7004
        nic_data.append(nic_dict)
7005
      pir = {
7006
        "tags": list(iinfo.GetTags()),
7007
        "admin_up": iinfo.admin_up,
7008
        "vcpus": beinfo[constants.BE_VCPUS],
7009
        "memory": beinfo[constants.BE_MEMORY],
7010
        "os": iinfo.os,
7011
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
7012
        "nics": nic_data,
7013
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
7014
        "disk_template": iinfo.disk_template,
7015
        "hypervisor": iinfo.hypervisor,
7016
        }
7017
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
7018
                                                 pir["disks"])
7019
      instance_data[iinfo.name] = pir
7020

    
7021
    data["instances"] = instance_data
7022

    
7023
    self.in_data = data
7024

    
7025
  def _AddNewInstance(self):
7026
    """Add new instance data to allocator structure.
7027

7028
    This in combination with _AllocatorGetClusterData will create the
7029
    correct structure needed as input for the allocator.
7030

7031
    The checks for the completeness of the opcode must have already been
7032
    done.
7033

7034
    """
7035
    data = self.in_data
7036

    
7037
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
7038

    
7039
    if self.disk_template in constants.DTS_NET_MIRROR:
7040
      self.required_nodes = 2
7041
    else:
7042
      self.required_nodes = 1
7043
    request = {
7044
      "type": "allocate",
7045
      "name": self.name,
7046
      "disk_template": self.disk_template,
7047
      "tags": self.tags,
7048
      "os": self.os,
7049
      "vcpus": self.vcpus,
7050
      "memory": self.mem_size,
7051
      "disks": self.disks,
7052
      "disk_space_total": disk_space,
7053
      "nics": self.nics,
7054
      "required_nodes": self.required_nodes,
7055
      }
7056
    data["request"] = request
7057

    
7058
  def _AddRelocateInstance(self):
7059
    """Add relocate instance data to allocator structure.
7060

7061
    This in combination with _IAllocatorGetClusterData will create the
7062
    correct structure needed as input for the allocator.
7063

7064
    The checks for the completeness of the opcode must have already been
7065
    done.
7066

7067
    """
7068
    instance = self.lu.cfg.GetInstanceInfo(self.name)
7069
    if instance is None:
7070
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
7071
                                   " IAllocator" % self.name)
7072

    
7073
    if instance.disk_template not in constants.DTS_NET_MIRROR:
7074
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
7075

    
7076
    if len(instance.secondary_nodes) != 1:
7077
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
7078

    
7079
    self.required_nodes = 1
7080
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
7081
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
7082

    
7083
    request = {
7084
      "type": "relocate",
7085
      "name": self.name,
7086
      "disk_space_total": disk_space,
7087
      "required_nodes": self.required_nodes,
7088
      "relocate_from": self.relocate_from,
7089
      }
7090
    self.in_data["request"] = request
7091

    
7092
  def _BuildInputData(self):
7093
    """Build input data structures.
7094

7095
    """
7096
    self._ComputeClusterData()
7097

    
7098
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7099
      self._AddNewInstance()
7100
    else:
7101
      self._AddRelocateInstance()
7102

    
7103
    self.in_text = serializer.Dump(self.in_data)
7104

    
7105
  def Run(self, name, validate=True, call_fn=None):
7106
    """Run an instance allocator and return the results.
7107

7108
    """
7109
    if call_fn is None:
7110
      call_fn = self.lu.rpc.call_iallocator_runner
7111
    data = self.in_text
7112

    
7113
    result = call_fn(self.lu.cfg.GetMasterNode(), name, self.in_text)
7114
    result.Raise()
7115

    
7116
    if not isinstance(result.data, (list, tuple)) or len(result.data) != 4:
7117
      raise errors.OpExecError("Invalid result from master iallocator runner")
7118

    
7119
    rcode, stdout, stderr, fail = result.data
7120

    
7121
    if rcode == constants.IARUN_NOTFOUND:
7122
      raise errors.OpExecError("Can't find allocator '%s'" % name)
7123
    elif rcode == constants.IARUN_FAILURE:
7124
      raise errors.OpExecError("Instance allocator call failed: %s,"
7125
                               " output: %s" % (fail, stdout+stderr))
7126
    self.out_text = stdout
7127
    if validate:
7128
      self._ValidateResult()
7129

    
7130
  def _ValidateResult(self):
7131
    """Process the allocator results.
7132

7133
    This will process and if successful save the result in
7134
    self.out_data and the other parameters.
7135

7136
    """
7137
    try:
7138
      rdict = serializer.Load(self.out_text)
7139
    except Exception, err:
7140
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
7141

    
7142
    if not isinstance(rdict, dict):
7143
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
7144

    
7145
    for key in "success", "info", "nodes":
7146
      if key not in rdict:
7147
        raise errors.OpExecError("Can't parse iallocator results:"
7148
                                 " missing key '%s'" % key)
7149
      setattr(self, key, rdict[key])
7150

    
7151
    if not isinstance(rdict["nodes"], list):
7152
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
7153
                               " is not a list")
7154
    self.out_data = rdict
7155

    
7156

    
7157
class LUTestAllocator(NoHooksLU):
7158
  """Run allocator tests.
7159

7160
  This LU runs the allocator tests
7161

7162
  """
7163
  _OP_REQP = ["direction", "mode", "name"]
7164

    
7165
  def CheckPrereq(self):
7166
    """Check prerequisites.
7167

7168
    This checks the opcode parameters depending on the director and mode test.
7169

7170
    """
7171
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7172
      for attr in ["name", "mem_size", "disks", "disk_template",
7173
                   "os", "tags", "nics", "vcpus"]:
7174
        if not hasattr(self.op, attr):
7175
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
7176
                                     attr)
7177
      iname = self.cfg.ExpandInstanceName(self.op.name)
7178
      if iname is not None:
7179
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
7180
                                   iname)
7181
      if not isinstance(self.op.nics, list):
7182
        raise errors.OpPrereqError("Invalid parameter 'nics'")
7183
      for row in self.op.nics:
7184
        if (not isinstance(row, dict) or
7185
            "mac" not in row or
7186
            "ip" not in row or
7187
            "bridge" not in row):
7188
          raise errors.OpPrereqError("Invalid contents of the"
7189
                                     " 'nics' parameter")
7190
      if not isinstance(self.op.disks, list):
7191
        raise errors.OpPrereqError("Invalid parameter 'disks'")
7192
      for row in self.op.disks:
7193
        if (not isinstance(row, dict) or
7194
            "size" not in row or
7195
            not isinstance(row["size"], int) or
7196
            "mode" not in row or
7197
            row["mode"] not in ['r', 'w']):
7198
          raise errors.OpPrereqError("Invalid contents of the"
7199
                                     " 'disks' parameter")
7200
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
7201
        self.op.hypervisor = self.cfg.GetHypervisorType()
7202
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
7203
      if not hasattr(self.op, "name"):
7204
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
7205
      fname = self.cfg.ExpandInstanceName(self.op.name)
7206
      if fname is None:
7207
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
7208
                                   self.op.name)
7209
      self.op.name = fname
7210
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
7211
    else:
7212
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
7213
                                 self.op.mode)
7214

    
7215
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
7216
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
7217
        raise errors.OpPrereqError("Missing allocator name")
7218
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
7219
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
7220
                                 self.op.direction)
7221

    
7222
  def Exec(self, feedback_fn):
7223
    """Run the allocator test.
7224

7225
    """
7226
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7227
      ial = IAllocator(self,
7228
                       mode=self.op.mode,
7229
                       name=self.op.name,
7230
                       mem_size=self.op.mem_size,
7231
                       disks=self.op.disks,
7232
                       disk_template=self.op.disk_template,
7233
                       os=self.op.os,
7234
                       tags=self.op.tags,
7235
                       nics=self.op.nics,
7236
                       vcpus=self.op.vcpus,
7237
                       hypervisor=self.op.hypervisor,
7238
                       )
7239
    else:
7240
      ial = IAllocator(self,
7241
                       mode=self.op.mode,
7242
                       name=self.op.name,
7243
                       relocate_from=list(self.relocate_from),
7244
                       )
7245

    
7246
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
7247
      result = ial.in_text
7248
    else:
7249
      ial.Run(self.op.allocator, validate=False)
7250
      result = ial.out_text
7251
    return result