Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ cacfd1fd

History | View | Annotate | Download (234.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import sha
29
import time
30
import tempfile
31
import re
32
import platform
33
import logging
34
import copy
35
import random
36

    
37
from ganeti import ssh
38
from ganeti import utils
39
from ganeti import errors
40
from ganeti import hypervisor
41
from ganeti import locking
42
from ganeti import constants
43
from ganeti import objects
44
from ganeti import opcodes
45
from ganeti import serializer
46
from ganeti import ssconf
47

    
48

    
49
class LogicalUnit(object):
50
  """Logical Unit base class.
51

52
  Subclasses must follow these rules:
53
    - implement ExpandNames
54
    - implement CheckPrereq
55
    - implement Exec
56
    - implement BuildHooksEnv
57
    - redefine HPATH and HTYPE
58
    - optionally redefine their run requirements:
59
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
60

61
  Note that all commands require root permissions.
62

63
  """
64
  HPATH = None
65
  HTYPE = None
66
  _OP_REQP = []
67
  REQ_BGL = True
68

    
69
  def __init__(self, processor, op, context, rpc):
70
    """Constructor for LogicalUnit.
71

72
    This needs to be overriden in derived classes in order to check op
73
    validity.
74

75
    """
76
    self.proc = processor
77
    self.op = op
78
    self.cfg = context.cfg
79
    self.context = context
80
    self.rpc = rpc
81
    # Dicts used to declare locking needs to mcpu
82
    self.needed_locks = None
83
    self.acquired_locks = {}
84
    self.share_locks = dict(((i, 0) for i in locking.LEVELS))
85
    self.add_locks = {}
86
    self.remove_locks = {}
87
    # Used to force good behavior when calling helper functions
88
    self.recalculate_locks = {}
89
    self.__ssh = None
90
    # logging
91
    self.LogWarning = processor.LogWarning
92
    self.LogInfo = processor.LogInfo
93

    
94
    for attr_name in self._OP_REQP:
95
      attr_val = getattr(op, attr_name, None)
96
      if attr_val is None:
97
        raise errors.OpPrereqError("Required parameter '%s' missing" %
98
                                   attr_name)
99
    self.CheckArguments()
100

    
101
  def __GetSSH(self):
102
    """Returns the SshRunner object
103

104
    """
105
    if not self.__ssh:
106
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
107
    return self.__ssh
108

    
109
  ssh = property(fget=__GetSSH)
110

    
111
  def CheckArguments(self):
112
    """Check syntactic validity for the opcode arguments.
113

114
    This method is for doing a simple syntactic check and ensure
115
    validity of opcode parameters, without any cluster-related
116
    checks. While the same can be accomplished in ExpandNames and/or
117
    CheckPrereq, doing these separate is better because:
118

119
      - ExpandNames is left as as purely a lock-related function
120
      - CheckPrereq is run after we have aquired locks (and possible
121
        waited for them)
122

123
    The function is allowed to change the self.op attribute so that
124
    later methods can no longer worry about missing parameters.
125

126
    """
127
    pass
128

    
129
  def ExpandNames(self):
130
    """Expand names for this LU.
131

132
    This method is called before starting to execute the opcode, and it should
133
    update all the parameters of the opcode to their canonical form (e.g. a
134
    short node name must be fully expanded after this method has successfully
135
    completed). This way locking, hooks, logging, ecc. can work correctly.
136

137
    LUs which implement this method must also populate the self.needed_locks
138
    member, as a dict with lock levels as keys, and a list of needed lock names
139
    as values. Rules:
140

141
      - use an empty dict if you don't need any lock
142
      - if you don't need any lock at a particular level omit that level
143
      - don't put anything for the BGL level
144
      - if you want all locks at a level use locking.ALL_SET as a value
145

146
    If you need to share locks (rather than acquire them exclusively) at one
147
    level you can modify self.share_locks, setting a true value (usually 1) for
148
    that level. By default locks are not shared.
149

150
    Examples::
151

152
      # Acquire all nodes and one instance
153
      self.needed_locks = {
154
        locking.LEVEL_NODE: locking.ALL_SET,
155
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
156
      }
157
      # Acquire just two nodes
158
      self.needed_locks = {
159
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
160
      }
161
      # Acquire no locks
162
      self.needed_locks = {} # No, you can't leave it to the default value None
163

164
    """
165
    # The implementation of this method is mandatory only if the new LU is
166
    # concurrent, so that old LUs don't need to be changed all at the same
167
    # time.
168
    if self.REQ_BGL:
169
      self.needed_locks = {} # Exclusive LUs don't need locks.
170
    else:
171
      raise NotImplementedError
172

    
173
  def DeclareLocks(self, level):
174
    """Declare LU locking needs for a level
175

176
    While most LUs can just declare their locking needs at ExpandNames time,
177
    sometimes there's the need to calculate some locks after having acquired
178
    the ones before. This function is called just before acquiring locks at a
179
    particular level, but after acquiring the ones at lower levels, and permits
180
    such calculations. It can be used to modify self.needed_locks, and by
181
    default it does nothing.
182

183
    This function is only called if you have something already set in
184
    self.needed_locks for the level.
185

186
    @param level: Locking level which is going to be locked
187
    @type level: member of ganeti.locking.LEVELS
188

189
    """
190

    
191
  def CheckPrereq(self):
192
    """Check prerequisites for this LU.
193

194
    This method should check that the prerequisites for the execution
195
    of this LU are fulfilled. It can do internode communication, but
196
    it should be idempotent - no cluster or system changes are
197
    allowed.
198

199
    The method should raise errors.OpPrereqError in case something is
200
    not fulfilled. Its return value is ignored.
201

202
    This method should also update all the parameters of the opcode to
203
    their canonical form if it hasn't been done by ExpandNames before.
204

205
    """
206
    raise NotImplementedError
207

    
208
  def Exec(self, feedback_fn):
209
    """Execute the LU.
210

211
    This method should implement the actual work. It should raise
212
    errors.OpExecError for failures that are somewhat dealt with in
213
    code, or expected.
214

215
    """
216
    raise NotImplementedError
217

    
218
  def BuildHooksEnv(self):
219
    """Build hooks environment for this LU.
220

221
    This method should return a three-node tuple consisting of: a dict
222
    containing the environment that will be used for running the
223
    specific hook for this LU, a list of node names on which the hook
224
    should run before the execution, and a list of node names on which
225
    the hook should run after the execution.
226

227
    The keys of the dict must not have 'GANETI_' prefixed as this will
228
    be handled in the hooks runner. Also note additional keys will be
229
    added by the hooks runner. If the LU doesn't define any
230
    environment, an empty dict (and not None) should be returned.
231

232
    No nodes should be returned as an empty list (and not None).
233

234
    Note that if the HPATH for a LU class is None, this function will
235
    not be called.
236

237
    """
238
    raise NotImplementedError
239

    
240
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
241
    """Notify the LU about the results of its hooks.
242

243
    This method is called every time a hooks phase is executed, and notifies
244
    the Logical Unit about the hooks' result. The LU can then use it to alter
245
    its result based on the hooks.  By default the method does nothing and the
246
    previous result is passed back unchanged but any LU can define it if it
247
    wants to use the local cluster hook-scripts somehow.
248

249
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
250
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
251
    @param hook_results: the results of the multi-node hooks rpc call
252
    @param feedback_fn: function used send feedback back to the caller
253
    @param lu_result: the previous Exec result this LU had, or None
254
        in the PRE phase
255
    @return: the new Exec result, based on the previous result
256
        and hook results
257

258
    """
259
    return lu_result
260

    
261
  def _ExpandAndLockInstance(self):
262
    """Helper function to expand and lock an instance.
263

264
    Many LUs that work on an instance take its name in self.op.instance_name
265
    and need to expand it and then declare the expanded name for locking. This
266
    function does it, and then updates self.op.instance_name to the expanded
267
    name. It also initializes needed_locks as a dict, if this hasn't been done
268
    before.
269

270
    """
271
    if self.needed_locks is None:
272
      self.needed_locks = {}
273
    else:
274
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
275
        "_ExpandAndLockInstance called with instance-level locks set"
276
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
277
    if expanded_name is None:
278
      raise errors.OpPrereqError("Instance '%s' not known" %
279
                                  self.op.instance_name)
280
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
281
    self.op.instance_name = expanded_name
282

    
283
  def _LockInstancesNodes(self, primary_only=False):
284
    """Helper function to declare instances' nodes for locking.
285

286
    This function should be called after locking one or more instances to lock
287
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
288
    with all primary or secondary nodes for instances already locked and
289
    present in self.needed_locks[locking.LEVEL_INSTANCE].
290

291
    It should be called from DeclareLocks, and for safety only works if
292
    self.recalculate_locks[locking.LEVEL_NODE] is set.
293

294
    In the future it may grow parameters to just lock some instance's nodes, or
295
    to just lock primaries or secondary nodes, if needed.
296

297
    If should be called in DeclareLocks in a way similar to::
298

299
      if level == locking.LEVEL_NODE:
300
        self._LockInstancesNodes()
301

302
    @type primary_only: boolean
303
    @param primary_only: only lock primary nodes of locked instances
304

305
    """
306
    assert locking.LEVEL_NODE in self.recalculate_locks, \
307
      "_LockInstancesNodes helper function called with no nodes to recalculate"
308

    
309
    # TODO: check if we're really been called with the instance locks held
310

    
311
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
312
    # future we might want to have different behaviors depending on the value
313
    # of self.recalculate_locks[locking.LEVEL_NODE]
314
    wanted_nodes = []
315
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
316
      instance = self.context.cfg.GetInstanceInfo(instance_name)
317
      wanted_nodes.append(instance.primary_node)
318
      if not primary_only:
319
        wanted_nodes.extend(instance.secondary_nodes)
320

    
321
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
322
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
323
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
324
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
325

    
326
    del self.recalculate_locks[locking.LEVEL_NODE]
327

    
328

    
329
class NoHooksLU(LogicalUnit):
330
  """Simple LU which runs no hooks.
331

332
  This LU is intended as a parent for other LogicalUnits which will
333
  run no hooks, in order to reduce duplicate code.
334

335
  """
336
  HPATH = None
337
  HTYPE = None
338

    
339

    
340
def _GetWantedNodes(lu, nodes):
341
  """Returns list of checked and expanded node names.
342

343
  @type lu: L{LogicalUnit}
344
  @param lu: the logical unit on whose behalf we execute
345
  @type nodes: list
346
  @param nodes: list of node names or None for all nodes
347
  @rtype: list
348
  @return: the list of nodes, sorted
349
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
350

351
  """
352
  if not isinstance(nodes, list):
353
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
354

    
355
  if not nodes:
356
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
357
      " non-empty list of nodes whose name is to be expanded.")
358

    
359
  wanted = []
360
  for name in nodes:
361
    node = lu.cfg.ExpandNodeName(name)
362
    if node is None:
363
      raise errors.OpPrereqError("No such node name '%s'" % name)
364
    wanted.append(node)
365

    
366
  return utils.NiceSort(wanted)
367

    
368

    
369
def _GetWantedInstances(lu, instances):
370
  """Returns list of checked and expanded instance names.
371

372
  @type lu: L{LogicalUnit}
373
  @param lu: the logical unit on whose behalf we execute
374
  @type instances: list
375
  @param instances: list of instance names or None for all instances
376
  @rtype: list
377
  @return: the list of instances, sorted
378
  @raise errors.OpPrereqError: if the instances parameter is wrong type
379
  @raise errors.OpPrereqError: if any of the passed instances is not found
380

381
  """
382
  if not isinstance(instances, list):
383
    raise errors.OpPrereqError("Invalid argument type 'instances'")
384

    
385
  if instances:
386
    wanted = []
387

    
388
    for name in instances:
389
      instance = lu.cfg.ExpandInstanceName(name)
390
      if instance is None:
391
        raise errors.OpPrereqError("No such instance name '%s'" % name)
392
      wanted.append(instance)
393

    
394
  else:
395
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
396
  return wanted
397

    
398

    
399
def _CheckOutputFields(static, dynamic, selected):
400
  """Checks whether all selected fields are valid.
401

402
  @type static: L{utils.FieldSet}
403
  @param static: static fields set
404
  @type dynamic: L{utils.FieldSet}
405
  @param dynamic: dynamic fields set
406

407
  """
408
  f = utils.FieldSet()
409
  f.Extend(static)
410
  f.Extend(dynamic)
411

    
412
  delta = f.NonMatching(selected)
413
  if delta:
414
    raise errors.OpPrereqError("Unknown output fields selected: %s"
415
                               % ",".join(delta))
416

    
417

    
418
def _CheckBooleanOpField(op, name):
419
  """Validates boolean opcode parameters.
420

421
  This will ensure that an opcode parameter is either a boolean value,
422
  or None (but that it always exists).
423

424
  """
425
  val = getattr(op, name, None)
426
  if not (val is None or isinstance(val, bool)):
427
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
428
                               (name, str(val)))
429
  setattr(op, name, val)
430

    
431

    
432
def _CheckNodeOnline(lu, node):
433
  """Ensure that a given node is online.
434

435
  @param lu: the LU on behalf of which we make the check
436
  @param node: the node to check
437
  @raise errors.OpPrereqError: if the nodes is offline
438

439
  """
440
  if lu.cfg.GetNodeInfo(node).offline:
441
    raise errors.OpPrereqError("Can't use offline node %s" % node)
442

    
443

    
444
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
445
                          memory, vcpus, nics):
446
  """Builds instance related env variables for hooks
447

448
  This builds the hook environment from individual variables.
449

450
  @type name: string
451
  @param name: the name of the instance
452
  @type primary_node: string
453
  @param primary_node: the name of the instance's primary node
454
  @type secondary_nodes: list
455
  @param secondary_nodes: list of secondary nodes as strings
456
  @type os_type: string
457
  @param os_type: the name of the instance's OS
458
  @type status: boolean
459
  @param status: the should_run status of the instance
460
  @type memory: string
461
  @param memory: the memory size of the instance
462
  @type vcpus: string
463
  @param vcpus: the count of VCPUs the instance has
464
  @type nics: list
465
  @param nics: list of tuples (ip, bridge, mac) representing
466
      the NICs the instance  has
467
  @rtype: dict
468
  @return: the hook environment for this instance
469

470
  """
471
  if status:
472
    str_status = "up"
473
  else:
474
    str_status = "down"
475
  env = {
476
    "OP_TARGET": name,
477
    "INSTANCE_NAME": name,
478
    "INSTANCE_PRIMARY": primary_node,
479
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
480
    "INSTANCE_OS_TYPE": os_type,
481
    "INSTANCE_STATUS": str_status,
482
    "INSTANCE_MEMORY": memory,
483
    "INSTANCE_VCPUS": vcpus,
484
  }
485

    
486
  if nics:
487
    nic_count = len(nics)
488
    for idx, (ip, bridge, mac) in enumerate(nics):
489
      if ip is None:
490
        ip = ""
491
      env["INSTANCE_NIC%d_IP" % idx] = ip
492
      env["INSTANCE_NIC%d_BRIDGE" % idx] = bridge
493
      env["INSTANCE_NIC%d_HWADDR" % idx] = mac
494
  else:
495
    nic_count = 0
496

    
497
  env["INSTANCE_NIC_COUNT"] = nic_count
498

    
499
  return env
500

    
501

    
502
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
503
  """Builds instance related env variables for hooks from an object.
504

505
  @type lu: L{LogicalUnit}
506
  @param lu: the logical unit on whose behalf we execute
507
  @type instance: L{objects.Instance}
508
  @param instance: the instance for which we should build the
509
      environment
510
  @type override: dict
511
  @param override: dictionary with key/values that will override
512
      our values
513
  @rtype: dict
514
  @return: the hook environment dictionary
515

516
  """
517
  bep = lu.cfg.GetClusterInfo().FillBE(instance)
518
  args = {
519
    'name': instance.name,
520
    'primary_node': instance.primary_node,
521
    'secondary_nodes': instance.secondary_nodes,
522
    'os_type': instance.os,
523
    'status': instance.admin_up,
524
    'memory': bep[constants.BE_MEMORY],
525
    'vcpus': bep[constants.BE_VCPUS],
526
    'nics': [(nic.ip, nic.bridge, nic.mac) for nic in instance.nics],
527
  }
528
  if override:
529
    args.update(override)
530
  return _BuildInstanceHookEnv(**args)
531

    
532

    
533
def _AdjustCandidatePool(lu):
534
  """Adjust the candidate pool after node operations.
535

536
  """
537
  mod_list = lu.cfg.MaintainCandidatePool()
538
  if mod_list:
539
    lu.LogInfo("Promoted nodes to master candidate role: %s",
540
               ", ".join(node.name for node in mod_list))
541
    for name in mod_list:
542
      lu.context.ReaddNode(name)
543
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
544
  if mc_now > mc_max:
545
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
546
               (mc_now, mc_max))
547

    
548

    
549
def _CheckInstanceBridgesExist(lu, instance):
550
  """Check that the brigdes needed by an instance exist.
551

552
  """
553
  # check bridges existance
554
  brlist = [nic.bridge for nic in instance.nics]
555
  result = lu.rpc.call_bridges_exist(instance.primary_node, brlist)
556
  result.Raise()
557
  if not result.data:
558
    raise errors.OpPrereqError("One or more target bridges %s does not"
559
                               " exist on destination node '%s'" %
560
                               (brlist, instance.primary_node))
561

    
562

    
563
class LUDestroyCluster(NoHooksLU):
564
  """Logical unit for destroying the cluster.
565

566
  """
567
  _OP_REQP = []
568

    
569
  def CheckPrereq(self):
570
    """Check prerequisites.
571

572
    This checks whether the cluster is empty.
573

574
    Any errors are signalled by raising errors.OpPrereqError.
575

576
    """
577
    master = self.cfg.GetMasterNode()
578

    
579
    nodelist = self.cfg.GetNodeList()
580
    if len(nodelist) != 1 or nodelist[0] != master:
581
      raise errors.OpPrereqError("There are still %d node(s) in"
582
                                 " this cluster." % (len(nodelist) - 1))
583
    instancelist = self.cfg.GetInstanceList()
584
    if instancelist:
585
      raise errors.OpPrereqError("There are still %d instance(s) in"
586
                                 " this cluster." % len(instancelist))
587

    
588
  def Exec(self, feedback_fn):
589
    """Destroys the cluster.
590

591
    """
592
    master = self.cfg.GetMasterNode()
593
    result = self.rpc.call_node_stop_master(master, False)
594
    result.Raise()
595
    if not result.data:
596
      raise errors.OpExecError("Could not disable the master role")
597
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
598
    utils.CreateBackup(priv_key)
599
    utils.CreateBackup(pub_key)
600
    return master
601

    
602

    
603
class LUVerifyCluster(LogicalUnit):
604
  """Verifies the cluster status.
605

606
  """
607
  HPATH = "cluster-verify"
608
  HTYPE = constants.HTYPE_CLUSTER
609
  _OP_REQP = ["skip_checks"]
610
  REQ_BGL = False
611

    
612
  def ExpandNames(self):
613
    self.needed_locks = {
614
      locking.LEVEL_NODE: locking.ALL_SET,
615
      locking.LEVEL_INSTANCE: locking.ALL_SET,
616
    }
617
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
618

    
619
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
620
                  node_result, feedback_fn, master_files,
621
                  drbd_map):
622
    """Run multiple tests against a node.
623

624
    Test list:
625

626
      - compares ganeti version
627
      - checks vg existance and size > 20G
628
      - checks config file checksum
629
      - checks ssh to other nodes
630

631
    @type nodeinfo: L{objects.Node}
632
    @param nodeinfo: the node to check
633
    @param file_list: required list of files
634
    @param local_cksum: dictionary of local files and their checksums
635
    @param node_result: the results from the node
636
    @param feedback_fn: function used to accumulate results
637
    @param master_files: list of files that only masters should have
638
    @param drbd_map: the useddrbd minors for this node, in
639
        form of minor: (instance, must_exist) which correspond to instances
640
        and their running status
641

642
    """
643
    node = nodeinfo.name
644

    
645
    # main result, node_result should be a non-empty dict
646
    if not node_result or not isinstance(node_result, dict):
647
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
648
      return True
649

    
650
    # compares ganeti version
651
    local_version = constants.PROTOCOL_VERSION
652
    remote_version = node_result.get('version', None)
653
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
654
            len(remote_version) == 2):
655
      feedback_fn("  - ERROR: connection to %s failed" % (node))
656
      return True
657

    
658
    if local_version != remote_version[0]:
659
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
660
                  " node %s %s" % (local_version, node, remote_version[0]))
661
      return True
662

    
663
    # node seems compatible, we can actually try to look into its results
664

    
665
    bad = False
666

    
667
    # full package version
668
    if constants.RELEASE_VERSION != remote_version[1]:
669
      feedback_fn("  - WARNING: software version mismatch: master %s,"
670
                  " node %s %s" %
671
                  (constants.RELEASE_VERSION, node, remote_version[1]))
672

    
673
    # checks vg existence and size > 20G
674

    
675
    vglist = node_result.get(constants.NV_VGLIST, None)
676
    if not vglist:
677
      feedback_fn("  - ERROR: unable to check volume groups on node %s." %
678
                      (node,))
679
      bad = True
680
    else:
681
      vgstatus = utils.CheckVolumeGroupSize(vglist, self.cfg.GetVGName(),
682
                                            constants.MIN_VG_SIZE)
683
      if vgstatus:
684
        feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
685
        bad = True
686

    
687
    # checks config file checksum
688

    
689
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
690
    if not isinstance(remote_cksum, dict):
691
      bad = True
692
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
693
    else:
694
      for file_name in file_list:
695
        node_is_mc = nodeinfo.master_candidate
696
        must_have_file = file_name not in master_files
697
        if file_name not in remote_cksum:
698
          if node_is_mc or must_have_file:
699
            bad = True
700
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
701
        elif remote_cksum[file_name] != local_cksum[file_name]:
702
          if node_is_mc or must_have_file:
703
            bad = True
704
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
705
          else:
706
            # not candidate and this is not a must-have file
707
            bad = True
708
            feedback_fn("  - ERROR: non master-candidate has old/wrong file"
709
                        " '%s'" % file_name)
710
        else:
711
          # all good, except non-master/non-must have combination
712
          if not node_is_mc and not must_have_file:
713
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
714
                        " candidates" % file_name)
715

    
716
    # checks ssh to any
717

    
718
    if constants.NV_NODELIST not in node_result:
719
      bad = True
720
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
721
    else:
722
      if node_result[constants.NV_NODELIST]:
723
        bad = True
724
        for node in node_result[constants.NV_NODELIST]:
725
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
726
                          (node, node_result[constants.NV_NODELIST][node]))
727

    
728
    if constants.NV_NODENETTEST not in node_result:
729
      bad = True
730
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
731
    else:
732
      if node_result[constants.NV_NODENETTEST]:
733
        bad = True
734
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
735
        for node in nlist:
736
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
737
                          (node, node_result[constants.NV_NODENETTEST][node]))
738

    
739
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
740
    if isinstance(hyp_result, dict):
741
      for hv_name, hv_result in hyp_result.iteritems():
742
        if hv_result is not None:
743
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
744
                      (hv_name, hv_result))
745

    
746
    # check used drbd list
747
    used_minors = node_result.get(constants.NV_DRBDLIST, [])
748
    for minor, (iname, must_exist) in drbd_map.items():
749
      if minor not in used_minors and must_exist:
750
        feedback_fn("  - ERROR: drbd minor %d of instance %s is not active" %
751
                    (minor, iname))
752
        bad = True
753
    for minor in used_minors:
754
      if minor not in drbd_map:
755
        feedback_fn("  - ERROR: unallocated drbd minor %d is in use" % minor)
756
        bad = True
757

    
758
    return bad
759

    
760
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
761
                      node_instance, feedback_fn, n_offline):
762
    """Verify an instance.
763

764
    This function checks to see if the required block devices are
765
    available on the instance's node.
766

767
    """
768
    bad = False
769

    
770
    node_current = instanceconfig.primary_node
771

    
772
    node_vol_should = {}
773
    instanceconfig.MapLVsByNode(node_vol_should)
774

    
775
    for node in node_vol_should:
776
      if node in n_offline:
777
        # ignore missing volumes on offline nodes
778
        continue
779
      for volume in node_vol_should[node]:
780
        if node not in node_vol_is or volume not in node_vol_is[node]:
781
          feedback_fn("  - ERROR: volume %s missing on node %s" %
782
                          (volume, node))
783
          bad = True
784

    
785
    if instanceconfig.admin_up:
786
      if ((node_current not in node_instance or
787
          not instance in node_instance[node_current]) and
788
          node_current not in n_offline):
789
        feedback_fn("  - ERROR: instance %s not running on node %s" %
790
                        (instance, node_current))
791
        bad = True
792

    
793
    for node in node_instance:
794
      if (not node == node_current):
795
        if instance in node_instance[node]:
796
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
797
                          (instance, node))
798
          bad = True
799

    
800
    return bad
801

    
802
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
803
    """Verify if there are any unknown volumes in the cluster.
804

805
    The .os, .swap and backup volumes are ignored. All other volumes are
806
    reported as unknown.
807

808
    """
809
    bad = False
810

    
811
    for node in node_vol_is:
812
      for volume in node_vol_is[node]:
813
        if node not in node_vol_should or volume not in node_vol_should[node]:
814
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
815
                      (volume, node))
816
          bad = True
817
    return bad
818

    
819
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
820
    """Verify the list of running instances.
821

822
    This checks what instances are running but unknown to the cluster.
823

824
    """
825
    bad = False
826
    for node in node_instance:
827
      for runninginstance in node_instance[node]:
828
        if runninginstance not in instancelist:
829
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
830
                          (runninginstance, node))
831
          bad = True
832
    return bad
833

    
834
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
835
    """Verify N+1 Memory Resilience.
836

837
    Check that if one single node dies we can still start all the instances it
838
    was primary for.
839

840
    """
841
    bad = False
842

    
843
    for node, nodeinfo in node_info.iteritems():
844
      # This code checks that every node which is now listed as secondary has
845
      # enough memory to host all instances it is supposed to should a single
846
      # other node in the cluster fail.
847
      # FIXME: not ready for failover to an arbitrary node
848
      # FIXME: does not support file-backed instances
849
      # WARNING: we currently take into account down instances as well as up
850
      # ones, considering that even if they're down someone might want to start
851
      # them even in the event of a node failure.
852
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
853
        needed_mem = 0
854
        for instance in instances:
855
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
856
          if bep[constants.BE_AUTO_BALANCE]:
857
            needed_mem += bep[constants.BE_MEMORY]
858
        if nodeinfo['mfree'] < needed_mem:
859
          feedback_fn("  - ERROR: not enough memory on node %s to accomodate"
860
                      " failovers should node %s fail" % (node, prinode))
861
          bad = True
862
    return bad
863

    
864
  def CheckPrereq(self):
865
    """Check prerequisites.
866

867
    Transform the list of checks we're going to skip into a set and check that
868
    all its members are valid.
869

870
    """
871
    self.skip_set = frozenset(self.op.skip_checks)
872
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
873
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
874

    
875
  def BuildHooksEnv(self):
876
    """Build hooks env.
877

878
    Cluster-Verify hooks just rone in the post phase and their failure makes
879
    the output be logged in the verify output and the verification to fail.
880

881
    """
882
    all_nodes = self.cfg.GetNodeList()
883
    # TODO: populate the environment with useful information for verify hooks
884
    env = {}
885
    return env, [], all_nodes
886

    
887
  def Exec(self, feedback_fn):
888
    """Verify integrity of cluster, performing various test on nodes.
889

890
    """
891
    bad = False
892
    feedback_fn("* Verifying global settings")
893
    for msg in self.cfg.VerifyConfig():
894
      feedback_fn("  - ERROR: %s" % msg)
895

    
896
    vg_name = self.cfg.GetVGName()
897
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
898
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
899
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
900
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
901
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
902
                        for iname in instancelist)
903
    i_non_redundant = [] # Non redundant instances
904
    i_non_a_balanced = [] # Non auto-balanced instances
905
    n_offline = [] # List of offline nodes
906
    node_volume = {}
907
    node_instance = {}
908
    node_info = {}
909
    instance_cfg = {}
910

    
911
    # FIXME: verify OS list
912
    # do local checksums
913
    master_files = [constants.CLUSTER_CONF_FILE]
914

    
915
    file_names = ssconf.SimpleStore().GetFileList()
916
    file_names.append(constants.SSL_CERT_FILE)
917
    file_names.append(constants.RAPI_CERT_FILE)
918
    file_names.extend(master_files)
919

    
920
    local_checksums = utils.FingerprintFiles(file_names)
921

    
922
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
923
    node_verify_param = {
924
      constants.NV_FILELIST: file_names,
925
      constants.NV_NODELIST: [node.name for node in nodeinfo
926
                              if not node.offline],
927
      constants.NV_HYPERVISOR: hypervisors,
928
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
929
                                  node.secondary_ip) for node in nodeinfo
930
                                 if not node.offline],
931
      constants.NV_LVLIST: vg_name,
932
      constants.NV_INSTANCELIST: hypervisors,
933
      constants.NV_VGLIST: None,
934
      constants.NV_VERSION: None,
935
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
936
      constants.NV_DRBDLIST: None,
937
      }
938
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
939
                                           self.cfg.GetClusterName())
940

    
941
    cluster = self.cfg.GetClusterInfo()
942
    master_node = self.cfg.GetMasterNode()
943
    all_drbd_map = self.cfg.ComputeDRBDMap()
944

    
945
    for node_i in nodeinfo:
946
      node = node_i.name
947
      nresult = all_nvinfo[node].data
948

    
949
      if node_i.offline:
950
        feedback_fn("* Skipping offline node %s" % (node,))
951
        n_offline.append(node)
952
        continue
953

    
954
      if node == master_node:
955
        ntype = "master"
956
      elif node_i.master_candidate:
957
        ntype = "master candidate"
958
      else:
959
        ntype = "regular"
960
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
961

    
962
      if all_nvinfo[node].failed or not isinstance(nresult, dict):
963
        feedback_fn("  - ERROR: connection to %s failed" % (node,))
964
        bad = True
965
        continue
966

    
967
      node_drbd = {}
968
      for minor, instance in all_drbd_map[node].items():
969
        instance = instanceinfo[instance]
970
        node_drbd[minor] = (instance.name, instance.admin_up)
971
      result = self._VerifyNode(node_i, file_names, local_checksums,
972
                                nresult, feedback_fn, master_files,
973
                                node_drbd)
974
      bad = bad or result
975

    
976
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
977
      if isinstance(lvdata, basestring):
978
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
979
                    (node, utils.SafeEncode(lvdata)))
980
        bad = True
981
        node_volume[node] = {}
982
      elif not isinstance(lvdata, dict):
983
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
984
        bad = True
985
        continue
986
      else:
987
        node_volume[node] = lvdata
988

    
989
      # node_instance
990
      idata = nresult.get(constants.NV_INSTANCELIST, None)
991
      if not isinstance(idata, list):
992
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
993
                    (node,))
994
        bad = True
995
        continue
996

    
997
      node_instance[node] = idata
998

    
999
      # node_info
1000
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1001
      if not isinstance(nodeinfo, dict):
1002
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1003
        bad = True
1004
        continue
1005

    
1006
      try:
1007
        node_info[node] = {
1008
          "mfree": int(nodeinfo['memory_free']),
1009
          "dfree": int(nresult[constants.NV_VGLIST][vg_name]),
1010
          "pinst": [],
1011
          "sinst": [],
1012
          # dictionary holding all instances this node is secondary for,
1013
          # grouped by their primary node. Each key is a cluster node, and each
1014
          # value is a list of instances which have the key as primary and the
1015
          # current node as secondary.  this is handy to calculate N+1 memory
1016
          # availability if you can only failover from a primary to its
1017
          # secondary.
1018
          "sinst-by-pnode": {},
1019
        }
1020
      except ValueError:
1021
        feedback_fn("  - ERROR: invalid value returned from node %s" % (node,))
1022
        bad = True
1023
        continue
1024

    
1025
    node_vol_should = {}
1026

    
1027
    for instance in instancelist:
1028
      feedback_fn("* Verifying instance %s" % instance)
1029
      inst_config = instanceinfo[instance]
1030
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1031
                                     node_instance, feedback_fn, n_offline)
1032
      bad = bad or result
1033
      inst_nodes_offline = []
1034

    
1035
      inst_config.MapLVsByNode(node_vol_should)
1036

    
1037
      instance_cfg[instance] = inst_config
1038

    
1039
      pnode = inst_config.primary_node
1040
      if pnode in node_info:
1041
        node_info[pnode]['pinst'].append(instance)
1042
      elif pnode not in n_offline:
1043
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1044
                    " %s failed" % (instance, pnode))
1045
        bad = True
1046

    
1047
      if pnode in n_offline:
1048
        inst_nodes_offline.append(pnode)
1049

    
1050
      # If the instance is non-redundant we cannot survive losing its primary
1051
      # node, so we are not N+1 compliant. On the other hand we have no disk
1052
      # templates with more than one secondary so that situation is not well
1053
      # supported either.
1054
      # FIXME: does not support file-backed instances
1055
      if len(inst_config.secondary_nodes) == 0:
1056
        i_non_redundant.append(instance)
1057
      elif len(inst_config.secondary_nodes) > 1:
1058
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1059
                    % instance)
1060

    
1061
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1062
        i_non_a_balanced.append(instance)
1063

    
1064
      for snode in inst_config.secondary_nodes:
1065
        if snode in node_info:
1066
          node_info[snode]['sinst'].append(instance)
1067
          if pnode not in node_info[snode]['sinst-by-pnode']:
1068
            node_info[snode]['sinst-by-pnode'][pnode] = []
1069
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1070
        elif snode not in n_offline:
1071
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1072
                      " %s failed" % (instance, snode))
1073
          bad = True
1074
        if snode in n_offline:
1075
          inst_nodes_offline.append(snode)
1076

    
1077
      if inst_nodes_offline:
1078
        # warn that the instance lives on offline nodes, and set bad=True
1079
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1080
                    ", ".join(inst_nodes_offline))
1081
        bad = True
1082

    
1083
    feedback_fn("* Verifying orphan volumes")
1084
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1085
                                       feedback_fn)
1086
    bad = bad or result
1087

    
1088
    feedback_fn("* Verifying remaining instances")
1089
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1090
                                         feedback_fn)
1091
    bad = bad or result
1092

    
1093
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1094
      feedback_fn("* Verifying N+1 Memory redundancy")
1095
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1096
      bad = bad or result
1097

    
1098
    feedback_fn("* Other Notes")
1099
    if i_non_redundant:
1100
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1101
                  % len(i_non_redundant))
1102

    
1103
    if i_non_a_balanced:
1104
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1105
                  % len(i_non_a_balanced))
1106

    
1107
    if n_offline:
1108
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1109

    
1110
    return not bad
1111

    
1112
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1113
    """Analize the post-hooks' result
1114

1115
    This method analyses the hook result, handles it, and sends some
1116
    nicely-formatted feedback back to the user.
1117

1118
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1119
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1120
    @param hooks_results: the results of the multi-node hooks rpc call
1121
    @param feedback_fn: function used send feedback back to the caller
1122
    @param lu_result: previous Exec result
1123
    @return: the new Exec result, based on the previous result
1124
        and hook results
1125

1126
    """
1127
    # We only really run POST phase hooks, and are only interested in
1128
    # their results
1129
    if phase == constants.HOOKS_PHASE_POST:
1130
      # Used to change hooks' output to proper indentation
1131
      indent_re = re.compile('^', re.M)
1132
      feedback_fn("* Hooks Results")
1133
      if not hooks_results:
1134
        feedback_fn("  - ERROR: general communication failure")
1135
        lu_result = 1
1136
      else:
1137
        for node_name in hooks_results:
1138
          show_node_header = True
1139
          res = hooks_results[node_name]
1140
          if res.failed or res.data is False or not isinstance(res.data, list):
1141
            if res.offline:
1142
              # no need to warn or set fail return value
1143
              continue
1144
            feedback_fn("    Communication failure in hooks execution")
1145
            lu_result = 1
1146
            continue
1147
          for script, hkr, output in res.data:
1148
            if hkr == constants.HKR_FAIL:
1149
              # The node header is only shown once, if there are
1150
              # failing hooks on that node
1151
              if show_node_header:
1152
                feedback_fn("  Node %s:" % node_name)
1153
                show_node_header = False
1154
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1155
              output = indent_re.sub('      ', output)
1156
              feedback_fn("%s" % output)
1157
              lu_result = 1
1158

    
1159
      return lu_result
1160

    
1161

    
1162
class LUVerifyDisks(NoHooksLU):
1163
  """Verifies the cluster disks status.
1164

1165
  """
1166
  _OP_REQP = []
1167
  REQ_BGL = False
1168

    
1169
  def ExpandNames(self):
1170
    self.needed_locks = {
1171
      locking.LEVEL_NODE: locking.ALL_SET,
1172
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1173
    }
1174
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1175

    
1176
  def CheckPrereq(self):
1177
    """Check prerequisites.
1178

1179
    This has no prerequisites.
1180

1181
    """
1182
    pass
1183

    
1184
  def Exec(self, feedback_fn):
1185
    """Verify integrity of cluster disks.
1186

1187
    """
1188
    result = res_nodes, res_nlvm, res_instances, res_missing = [], {}, [], {}
1189

    
1190
    vg_name = self.cfg.GetVGName()
1191
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1192
    instances = [self.cfg.GetInstanceInfo(name)
1193
                 for name in self.cfg.GetInstanceList()]
1194

    
1195
    nv_dict = {}
1196
    for inst in instances:
1197
      inst_lvs = {}
1198
      if (not inst.admin_up or
1199
          inst.disk_template not in constants.DTS_NET_MIRROR):
1200
        continue
1201
      inst.MapLVsByNode(inst_lvs)
1202
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1203
      for node, vol_list in inst_lvs.iteritems():
1204
        for vol in vol_list:
1205
          nv_dict[(node, vol)] = inst
1206

    
1207
    if not nv_dict:
1208
      return result
1209

    
1210
    node_lvs = self.rpc.call_volume_list(nodes, vg_name)
1211

    
1212
    to_act = set()
1213
    for node in nodes:
1214
      # node_volume
1215
      lvs = node_lvs[node]
1216
      if lvs.failed:
1217
        if not lvs.offline:
1218
          self.LogWarning("Connection to node %s failed: %s" %
1219
                          (node, lvs.data))
1220
        continue
1221
      lvs = lvs.data
1222
      if isinstance(lvs, basestring):
1223
        logging.warning("Error enumerating LVs on node %s: %s", node, lvs)
1224
        res_nlvm[node] = lvs
1225
      elif not isinstance(lvs, dict):
1226
        logging.warning("Connection to node %s failed or invalid data"
1227
                        " returned", node)
1228
        res_nodes.append(node)
1229
        continue
1230

    
1231
      for lv_name, (_, lv_inactive, lv_online) in lvs.iteritems():
1232
        inst = nv_dict.pop((node, lv_name), None)
1233
        if (not lv_online and inst is not None
1234
            and inst.name not in res_instances):
1235
          res_instances.append(inst.name)
1236

    
1237
    # any leftover items in nv_dict are missing LVs, let's arrange the
1238
    # data better
1239
    for key, inst in nv_dict.iteritems():
1240
      if inst.name not in res_missing:
1241
        res_missing[inst.name] = []
1242
      res_missing[inst.name].append(key)
1243

    
1244
    return result
1245

    
1246

    
1247
class LURenameCluster(LogicalUnit):
1248
  """Rename the cluster.
1249

1250
  """
1251
  HPATH = "cluster-rename"
1252
  HTYPE = constants.HTYPE_CLUSTER
1253
  _OP_REQP = ["name"]
1254

    
1255
  def BuildHooksEnv(self):
1256
    """Build hooks env.
1257

1258
    """
1259
    env = {
1260
      "OP_TARGET": self.cfg.GetClusterName(),
1261
      "NEW_NAME": self.op.name,
1262
      }
1263
    mn = self.cfg.GetMasterNode()
1264
    return env, [mn], [mn]
1265

    
1266
  def CheckPrereq(self):
1267
    """Verify that the passed name is a valid one.
1268

1269
    """
1270
    hostname = utils.HostInfo(self.op.name)
1271

    
1272
    new_name = hostname.name
1273
    self.ip = new_ip = hostname.ip
1274
    old_name = self.cfg.GetClusterName()
1275
    old_ip = self.cfg.GetMasterIP()
1276
    if new_name == old_name and new_ip == old_ip:
1277
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1278
                                 " cluster has changed")
1279
    if new_ip != old_ip:
1280
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1281
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1282
                                   " reachable on the network. Aborting." %
1283
                                   new_ip)
1284

    
1285
    self.op.name = new_name
1286

    
1287
  def Exec(self, feedback_fn):
1288
    """Rename the cluster.
1289

1290
    """
1291
    clustername = self.op.name
1292
    ip = self.ip
1293

    
1294
    # shutdown the master IP
1295
    master = self.cfg.GetMasterNode()
1296
    result = self.rpc.call_node_stop_master(master, False)
1297
    if result.failed or not result.data:
1298
      raise errors.OpExecError("Could not disable the master role")
1299

    
1300
    try:
1301
      cluster = self.cfg.GetClusterInfo()
1302
      cluster.cluster_name = clustername
1303
      cluster.master_ip = ip
1304
      self.cfg.Update(cluster)
1305

    
1306
      # update the known hosts file
1307
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1308
      node_list = self.cfg.GetNodeList()
1309
      try:
1310
        node_list.remove(master)
1311
      except ValueError:
1312
        pass
1313
      result = self.rpc.call_upload_file(node_list,
1314
                                         constants.SSH_KNOWN_HOSTS_FILE)
1315
      for to_node, to_result in result.iteritems():
1316
        if to_result.failed or not to_result.data:
1317
          logging.error("Copy of file %s to node %s failed",
1318
                        constants.SSH_KNOWN_HOSTS_FILE, to_node)
1319

    
1320
    finally:
1321
      result = self.rpc.call_node_start_master(master, False)
1322
      if result.failed or not result.data:
1323
        self.LogWarning("Could not re-enable the master role on"
1324
                        " the master, please restart manually.")
1325

    
1326

    
1327
def _RecursiveCheckIfLVMBased(disk):
1328
  """Check if the given disk or its children are lvm-based.
1329

1330
  @type disk: L{objects.Disk}
1331
  @param disk: the disk to check
1332
  @rtype: booleean
1333
  @return: boolean indicating whether a LD_LV dev_type was found or not
1334

1335
  """
1336
  if disk.children:
1337
    for chdisk in disk.children:
1338
      if _RecursiveCheckIfLVMBased(chdisk):
1339
        return True
1340
  return disk.dev_type == constants.LD_LV
1341

    
1342

    
1343
class LUSetClusterParams(LogicalUnit):
1344
  """Change the parameters of the cluster.
1345

1346
  """
1347
  HPATH = "cluster-modify"
1348
  HTYPE = constants.HTYPE_CLUSTER
1349
  _OP_REQP = []
1350
  REQ_BGL = False
1351

    
1352
  def CheckParameters(self):
1353
    """Check parameters
1354

1355
    """
1356
    if not hasattr(self.op, "candidate_pool_size"):
1357
      self.op.candidate_pool_size = None
1358
    if self.op.candidate_pool_size is not None:
1359
      try:
1360
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1361
      except ValueError, err:
1362
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1363
                                   str(err))
1364
      if self.op.candidate_pool_size < 1:
1365
        raise errors.OpPrereqError("At least one master candidate needed")
1366

    
1367
  def ExpandNames(self):
1368
    # FIXME: in the future maybe other cluster params won't require checking on
1369
    # all nodes to be modified.
1370
    self.needed_locks = {
1371
      locking.LEVEL_NODE: locking.ALL_SET,
1372
    }
1373
    self.share_locks[locking.LEVEL_NODE] = 1
1374

    
1375
  def BuildHooksEnv(self):
1376
    """Build hooks env.
1377

1378
    """
1379
    env = {
1380
      "OP_TARGET": self.cfg.GetClusterName(),
1381
      "NEW_VG_NAME": self.op.vg_name,
1382
      }
1383
    mn = self.cfg.GetMasterNode()
1384
    return env, [mn], [mn]
1385

    
1386
  def CheckPrereq(self):
1387
    """Check prerequisites.
1388

1389
    This checks whether the given params don't conflict and
1390
    if the given volume group is valid.
1391

1392
    """
1393
    # FIXME: This only works because there is only one parameter that can be
1394
    # changed or removed.
1395
    if self.op.vg_name is not None and not self.op.vg_name:
1396
      instances = self.cfg.GetAllInstancesInfo().values()
1397
      for inst in instances:
1398
        for disk in inst.disks:
1399
          if _RecursiveCheckIfLVMBased(disk):
1400
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1401
                                       " lvm-based instances exist")
1402

    
1403
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1404

    
1405
    # if vg_name not None, checks given volume group on all nodes
1406
    if self.op.vg_name:
1407
      vglist = self.rpc.call_vg_list(node_list)
1408
      for node in node_list:
1409
        if vglist[node].failed:
1410
          # ignoring down node
1411
          self.LogWarning("Node %s unreachable/error, ignoring" % node)
1412
          continue
1413
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].data,
1414
                                              self.op.vg_name,
1415
                                              constants.MIN_VG_SIZE)
1416
        if vgstatus:
1417
          raise errors.OpPrereqError("Error on node '%s': %s" %
1418
                                     (node, vgstatus))
1419

    
1420
    self.cluster = cluster = self.cfg.GetClusterInfo()
1421
    # validate beparams changes
1422
    if self.op.beparams:
1423
      utils.CheckBEParams(self.op.beparams)
1424
      self.new_beparams = cluster.FillDict(
1425
        cluster.beparams[constants.BEGR_DEFAULT], self.op.beparams)
1426

    
1427
    # hypervisor list/parameters
1428
    self.new_hvparams = cluster.FillDict(cluster.hvparams, {})
1429
    if self.op.hvparams:
1430
      if not isinstance(self.op.hvparams, dict):
1431
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1432
      for hv_name, hv_dict in self.op.hvparams.items():
1433
        if hv_name not in self.new_hvparams:
1434
          self.new_hvparams[hv_name] = hv_dict
1435
        else:
1436
          self.new_hvparams[hv_name].update(hv_dict)
1437

    
1438
    if self.op.enabled_hypervisors is not None:
1439
      self.hv_list = self.op.enabled_hypervisors
1440
    else:
1441
      self.hv_list = cluster.enabled_hypervisors
1442

    
1443
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1444
      # either the enabled list has changed, or the parameters have, validate
1445
      for hv_name, hv_params in self.new_hvparams.items():
1446
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1447
            (self.op.enabled_hypervisors and
1448
             hv_name in self.op.enabled_hypervisors)):
1449
          # either this is a new hypervisor, or its parameters have changed
1450
          hv_class = hypervisor.GetHypervisor(hv_name)
1451
          hv_class.CheckParameterSyntax(hv_params)
1452
          _CheckHVParams(self, node_list, hv_name, hv_params)
1453

    
1454
  def Exec(self, feedback_fn):
1455
    """Change the parameters of the cluster.
1456

1457
    """
1458
    if self.op.vg_name is not None:
1459
      if self.op.vg_name != self.cfg.GetVGName():
1460
        self.cfg.SetVGName(self.op.vg_name)
1461
      else:
1462
        feedback_fn("Cluster LVM configuration already in desired"
1463
                    " state, not changing")
1464
    if self.op.hvparams:
1465
      self.cluster.hvparams = self.new_hvparams
1466
    if self.op.enabled_hypervisors is not None:
1467
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1468
    if self.op.beparams:
1469
      self.cluster.beparams[constants.BEGR_DEFAULT] = self.new_beparams
1470
    if self.op.candidate_pool_size is not None:
1471
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1472

    
1473
    self.cfg.Update(self.cluster)
1474

    
1475
    # we want to update nodes after the cluster so that if any errors
1476
    # happen, we have recorded and saved the cluster info
1477
    if self.op.candidate_pool_size is not None:
1478
      _AdjustCandidatePool(self)
1479

    
1480

    
1481
class LURedistributeConfig(NoHooksLU):
1482
  """Force the redistribution of cluster configuration.
1483

1484
  This is a very simple LU.
1485

1486
  """
1487
  _OP_REQP = []
1488
  REQ_BGL = False
1489

    
1490
  def ExpandNames(self):
1491
    self.needed_locks = {
1492
      locking.LEVEL_NODE: locking.ALL_SET,
1493
    }
1494
    self.share_locks[locking.LEVEL_NODE] = 1
1495

    
1496
  def CheckPrereq(self):
1497
    """Check prerequisites.
1498

1499
    """
1500

    
1501
  def Exec(self, feedback_fn):
1502
    """Redistribute the configuration.
1503

1504
    """
1505
    self.cfg.Update(self.cfg.GetClusterInfo())
1506

    
1507

    
1508
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1509
  """Sleep and poll for an instance's disk to sync.
1510

1511
  """
1512
  if not instance.disks:
1513
    return True
1514

    
1515
  if not oneshot:
1516
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1517

    
1518
  node = instance.primary_node
1519

    
1520
  for dev in instance.disks:
1521
    lu.cfg.SetDiskID(dev, node)
1522

    
1523
  retries = 0
1524
  while True:
1525
    max_time = 0
1526
    done = True
1527
    cumul_degraded = False
1528
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1529
    if rstats.failed or not rstats.data:
1530
      lu.LogWarning("Can't get any data from node %s", node)
1531
      retries += 1
1532
      if retries >= 10:
1533
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1534
                                 " aborting." % node)
1535
      time.sleep(6)
1536
      continue
1537
    rstats = rstats.data
1538
    retries = 0
1539
    for i, mstat in enumerate(rstats):
1540
      if mstat is None:
1541
        lu.LogWarning("Can't compute data for node %s/%s",
1542
                           node, instance.disks[i].iv_name)
1543
        continue
1544
      # we ignore the ldisk parameter
1545
      perc_done, est_time, is_degraded, _ = mstat
1546
      cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1547
      if perc_done is not None:
1548
        done = False
1549
        if est_time is not None:
1550
          rem_time = "%d estimated seconds remaining" % est_time
1551
          max_time = est_time
1552
        else:
1553
          rem_time = "no time estimate"
1554
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1555
                        (instance.disks[i].iv_name, perc_done, rem_time))
1556
    if done or oneshot:
1557
      break
1558

    
1559
    time.sleep(min(60, max_time))
1560

    
1561
  if done:
1562
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1563
  return not cumul_degraded
1564

    
1565

    
1566
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
1567
  """Check that mirrors are not degraded.
1568

1569
  The ldisk parameter, if True, will change the test from the
1570
  is_degraded attribute (which represents overall non-ok status for
1571
  the device(s)) to the ldisk (representing the local storage status).
1572

1573
  """
1574
  lu.cfg.SetDiskID(dev, node)
1575
  if ldisk:
1576
    idx = 6
1577
  else:
1578
    idx = 5
1579

    
1580
  result = True
1581
  if on_primary or dev.AssembleOnSecondary():
1582
    rstats = lu.rpc.call_blockdev_find(node, dev)
1583
    msg = rstats.RemoteFailMsg()
1584
    if msg:
1585
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
1586
      result = False
1587
    elif not rstats.payload:
1588
      lu.LogWarning("Can't find disk on node %s", node)
1589
      result = False
1590
    else:
1591
      result = result and (not rstats.payload[idx])
1592
  if dev.children:
1593
    for child in dev.children:
1594
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
1595

    
1596
  return result
1597

    
1598

    
1599
class LUDiagnoseOS(NoHooksLU):
1600
  """Logical unit for OS diagnose/query.
1601

1602
  """
1603
  _OP_REQP = ["output_fields", "names"]
1604
  REQ_BGL = False
1605
  _FIELDS_STATIC = utils.FieldSet()
1606
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
1607

    
1608
  def ExpandNames(self):
1609
    if self.op.names:
1610
      raise errors.OpPrereqError("Selective OS query not supported")
1611

    
1612
    _CheckOutputFields(static=self._FIELDS_STATIC,
1613
                       dynamic=self._FIELDS_DYNAMIC,
1614
                       selected=self.op.output_fields)
1615

    
1616
    # Lock all nodes, in shared mode
1617
    self.needed_locks = {}
1618
    self.share_locks[locking.LEVEL_NODE] = 1
1619
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1620

    
1621
  def CheckPrereq(self):
1622
    """Check prerequisites.
1623

1624
    """
1625

    
1626
  @staticmethod
1627
  def _DiagnoseByOS(node_list, rlist):
1628
    """Remaps a per-node return list into an a per-os per-node dictionary
1629

1630
    @param node_list: a list with the names of all nodes
1631
    @param rlist: a map with node names as keys and OS objects as values
1632

1633
    @rtype: dict
1634
    @returns: a dictionary with osnames as keys and as value another map, with
1635
        nodes as keys and list of OS objects as values, eg::
1636

1637
          {"debian-etch": {"node1": [<object>,...],
1638
                           "node2": [<object>,]}
1639
          }
1640

1641
    """
1642
    all_os = {}
1643
    for node_name, nr in rlist.iteritems():
1644
      if nr.failed or not nr.data:
1645
        continue
1646
      for os_obj in nr.data:
1647
        if os_obj.name not in all_os:
1648
          # build a list of nodes for this os containing empty lists
1649
          # for each node in node_list
1650
          all_os[os_obj.name] = {}
1651
          for nname in node_list:
1652
            all_os[os_obj.name][nname] = []
1653
        all_os[os_obj.name][node_name].append(os_obj)
1654
    return all_os
1655

    
1656
  def Exec(self, feedback_fn):
1657
    """Compute the list of OSes.
1658

1659
    """
1660
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1661
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()
1662
                   if node in node_list]
1663
    node_data = self.rpc.call_os_diagnose(valid_nodes)
1664
    if node_data == False:
1665
      raise errors.OpExecError("Can't gather the list of OSes")
1666
    pol = self._DiagnoseByOS(valid_nodes, node_data)
1667
    output = []
1668
    for os_name, os_data in pol.iteritems():
1669
      row = []
1670
      for field in self.op.output_fields:
1671
        if field == "name":
1672
          val = os_name
1673
        elif field == "valid":
1674
          val = utils.all([osl and osl[0] for osl in os_data.values()])
1675
        elif field == "node_status":
1676
          val = {}
1677
          for node_name, nos_list in os_data.iteritems():
1678
            val[node_name] = [(v.status, v.path) for v in nos_list]
1679
        else:
1680
          raise errors.ParameterError(field)
1681
        row.append(val)
1682
      output.append(row)
1683

    
1684
    return output
1685

    
1686

    
1687
class LURemoveNode(LogicalUnit):
1688
  """Logical unit for removing a node.
1689

1690
  """
1691
  HPATH = "node-remove"
1692
  HTYPE = constants.HTYPE_NODE
1693
  _OP_REQP = ["node_name"]
1694

    
1695
  def BuildHooksEnv(self):
1696
    """Build hooks env.
1697

1698
    This doesn't run on the target node in the pre phase as a failed
1699
    node would then be impossible to remove.
1700

1701
    """
1702
    env = {
1703
      "OP_TARGET": self.op.node_name,
1704
      "NODE_NAME": self.op.node_name,
1705
      }
1706
    all_nodes = self.cfg.GetNodeList()
1707
    all_nodes.remove(self.op.node_name)
1708
    return env, all_nodes, all_nodes
1709

    
1710
  def CheckPrereq(self):
1711
    """Check prerequisites.
1712

1713
    This checks:
1714
     - the node exists in the configuration
1715
     - it does not have primary or secondary instances
1716
     - it's not the master
1717

1718
    Any errors are signalled by raising errors.OpPrereqError.
1719

1720
    """
1721
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1722
    if node is None:
1723
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1724

    
1725
    instance_list = self.cfg.GetInstanceList()
1726

    
1727
    masternode = self.cfg.GetMasterNode()
1728
    if node.name == masternode:
1729
      raise errors.OpPrereqError("Node is the master node,"
1730
                                 " you need to failover first.")
1731

    
1732
    for instance_name in instance_list:
1733
      instance = self.cfg.GetInstanceInfo(instance_name)
1734
      if node.name in instance.all_nodes:
1735
        raise errors.OpPrereqError("Instance %s is still running on the node,"
1736
                                   " please remove first." % instance_name)
1737
    self.op.node_name = node.name
1738
    self.node = node
1739

    
1740
  def Exec(self, feedback_fn):
1741
    """Removes the node from the cluster.
1742

1743
    """
1744
    node = self.node
1745
    logging.info("Stopping the node daemon and removing configs from node %s",
1746
                 node.name)
1747

    
1748
    self.context.RemoveNode(node.name)
1749

    
1750
    self.rpc.call_node_leave_cluster(node.name)
1751

    
1752
    # Promote nodes to master candidate as needed
1753
    _AdjustCandidatePool(self)
1754

    
1755

    
1756
class LUQueryNodes(NoHooksLU):
1757
  """Logical unit for querying nodes.
1758

1759
  """
1760
  _OP_REQP = ["output_fields", "names", "use_locking"]
1761
  REQ_BGL = False
1762
  _FIELDS_DYNAMIC = utils.FieldSet(
1763
    "dtotal", "dfree",
1764
    "mtotal", "mnode", "mfree",
1765
    "bootid",
1766
    "ctotal", "cnodes", "csockets",
1767
    )
1768

    
1769
  _FIELDS_STATIC = utils.FieldSet(
1770
    "name", "pinst_cnt", "sinst_cnt",
1771
    "pinst_list", "sinst_list",
1772
    "pip", "sip", "tags",
1773
    "serial_no",
1774
    "master_candidate",
1775
    "master",
1776
    "offline",
1777
    )
1778

    
1779
  def ExpandNames(self):
1780
    _CheckOutputFields(static=self._FIELDS_STATIC,
1781
                       dynamic=self._FIELDS_DYNAMIC,
1782
                       selected=self.op.output_fields)
1783

    
1784
    self.needed_locks = {}
1785
    self.share_locks[locking.LEVEL_NODE] = 1
1786

    
1787
    if self.op.names:
1788
      self.wanted = _GetWantedNodes(self, self.op.names)
1789
    else:
1790
      self.wanted = locking.ALL_SET
1791

    
1792
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
1793
    self.do_locking = self.do_node_query and self.op.use_locking
1794
    if self.do_locking:
1795
      # if we don't request only static fields, we need to lock the nodes
1796
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
1797

    
1798

    
1799
  def CheckPrereq(self):
1800
    """Check prerequisites.
1801

1802
    """
1803
    # The validation of the node list is done in the _GetWantedNodes,
1804
    # if non empty, and if empty, there's no validation to do
1805
    pass
1806

    
1807
  def Exec(self, feedback_fn):
1808
    """Computes the list of nodes and their attributes.
1809

1810
    """
1811
    all_info = self.cfg.GetAllNodesInfo()
1812
    if self.do_locking:
1813
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
1814
    elif self.wanted != locking.ALL_SET:
1815
      nodenames = self.wanted
1816
      missing = set(nodenames).difference(all_info.keys())
1817
      if missing:
1818
        raise errors.OpExecError(
1819
          "Some nodes were removed before retrieving their data: %s" % missing)
1820
    else:
1821
      nodenames = all_info.keys()
1822

    
1823
    nodenames = utils.NiceSort(nodenames)
1824
    nodelist = [all_info[name] for name in nodenames]
1825

    
1826
    # begin data gathering
1827

    
1828
    if self.do_node_query:
1829
      live_data = {}
1830
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
1831
                                          self.cfg.GetHypervisorType())
1832
      for name in nodenames:
1833
        nodeinfo = node_data[name]
1834
        if not nodeinfo.failed and nodeinfo.data:
1835
          nodeinfo = nodeinfo.data
1836
          fn = utils.TryConvert
1837
          live_data[name] = {
1838
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
1839
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
1840
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
1841
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
1842
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
1843
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
1844
            "bootid": nodeinfo.get('bootid', None),
1845
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
1846
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
1847
            }
1848
        else:
1849
          live_data[name] = {}
1850
    else:
1851
      live_data = dict.fromkeys(nodenames, {})
1852

    
1853
    node_to_primary = dict([(name, set()) for name in nodenames])
1854
    node_to_secondary = dict([(name, set()) for name in nodenames])
1855

    
1856
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
1857
                             "sinst_cnt", "sinst_list"))
1858
    if inst_fields & frozenset(self.op.output_fields):
1859
      instancelist = self.cfg.GetInstanceList()
1860

    
1861
      for instance_name in instancelist:
1862
        inst = self.cfg.GetInstanceInfo(instance_name)
1863
        if inst.primary_node in node_to_primary:
1864
          node_to_primary[inst.primary_node].add(inst.name)
1865
        for secnode in inst.secondary_nodes:
1866
          if secnode in node_to_secondary:
1867
            node_to_secondary[secnode].add(inst.name)
1868

    
1869
    master_node = self.cfg.GetMasterNode()
1870

    
1871
    # end data gathering
1872

    
1873
    output = []
1874
    for node in nodelist:
1875
      node_output = []
1876
      for field in self.op.output_fields:
1877
        if field == "name":
1878
          val = node.name
1879
        elif field == "pinst_list":
1880
          val = list(node_to_primary[node.name])
1881
        elif field == "sinst_list":
1882
          val = list(node_to_secondary[node.name])
1883
        elif field == "pinst_cnt":
1884
          val = len(node_to_primary[node.name])
1885
        elif field == "sinst_cnt":
1886
          val = len(node_to_secondary[node.name])
1887
        elif field == "pip":
1888
          val = node.primary_ip
1889
        elif field == "sip":
1890
          val = node.secondary_ip
1891
        elif field == "tags":
1892
          val = list(node.GetTags())
1893
        elif field == "serial_no":
1894
          val = node.serial_no
1895
        elif field == "master_candidate":
1896
          val = node.master_candidate
1897
        elif field == "master":
1898
          val = node.name == master_node
1899
        elif field == "offline":
1900
          val = node.offline
1901
        elif self._FIELDS_DYNAMIC.Matches(field):
1902
          val = live_data[node.name].get(field, None)
1903
        else:
1904
          raise errors.ParameterError(field)
1905
        node_output.append(val)
1906
      output.append(node_output)
1907

    
1908
    return output
1909

    
1910

    
1911
class LUQueryNodeVolumes(NoHooksLU):
1912
  """Logical unit for getting volumes on node(s).
1913

1914
  """
1915
  _OP_REQP = ["nodes", "output_fields"]
1916
  REQ_BGL = False
1917
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
1918
  _FIELDS_STATIC = utils.FieldSet("node")
1919

    
1920
  def ExpandNames(self):
1921
    _CheckOutputFields(static=self._FIELDS_STATIC,
1922
                       dynamic=self._FIELDS_DYNAMIC,
1923
                       selected=self.op.output_fields)
1924

    
1925
    self.needed_locks = {}
1926
    self.share_locks[locking.LEVEL_NODE] = 1
1927
    if not self.op.nodes:
1928
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1929
    else:
1930
      self.needed_locks[locking.LEVEL_NODE] = \
1931
        _GetWantedNodes(self, self.op.nodes)
1932

    
1933
  def CheckPrereq(self):
1934
    """Check prerequisites.
1935

1936
    This checks that the fields required are valid output fields.
1937

1938
    """
1939
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
1940

    
1941
  def Exec(self, feedback_fn):
1942
    """Computes the list of nodes and their attributes.
1943

1944
    """
1945
    nodenames = self.nodes
1946
    volumes = self.rpc.call_node_volumes(nodenames)
1947

    
1948
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
1949
             in self.cfg.GetInstanceList()]
1950

    
1951
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
1952

    
1953
    output = []
1954
    for node in nodenames:
1955
      if node not in volumes or volumes[node].failed or not volumes[node].data:
1956
        continue
1957

    
1958
      node_vols = volumes[node].data[:]
1959
      node_vols.sort(key=lambda vol: vol['dev'])
1960

    
1961
      for vol in node_vols:
1962
        node_output = []
1963
        for field in self.op.output_fields:
1964
          if field == "node":
1965
            val = node
1966
          elif field == "phys":
1967
            val = vol['dev']
1968
          elif field == "vg":
1969
            val = vol['vg']
1970
          elif field == "name":
1971
            val = vol['name']
1972
          elif field == "size":
1973
            val = int(float(vol['size']))
1974
          elif field == "instance":
1975
            for inst in ilist:
1976
              if node not in lv_by_node[inst]:
1977
                continue
1978
              if vol['name'] in lv_by_node[inst][node]:
1979
                val = inst.name
1980
                break
1981
            else:
1982
              val = '-'
1983
          else:
1984
            raise errors.ParameterError(field)
1985
          node_output.append(str(val))
1986

    
1987
        output.append(node_output)
1988

    
1989
    return output
1990

    
1991

    
1992
class LUAddNode(LogicalUnit):
1993
  """Logical unit for adding node to the cluster.
1994

1995
  """
1996
  HPATH = "node-add"
1997
  HTYPE = constants.HTYPE_NODE
1998
  _OP_REQP = ["node_name"]
1999

    
2000
  def BuildHooksEnv(self):
2001
    """Build hooks env.
2002

2003
    This will run on all nodes before, and on all nodes + the new node after.
2004

2005
    """
2006
    env = {
2007
      "OP_TARGET": self.op.node_name,
2008
      "NODE_NAME": self.op.node_name,
2009
      "NODE_PIP": self.op.primary_ip,
2010
      "NODE_SIP": self.op.secondary_ip,
2011
      }
2012
    nodes_0 = self.cfg.GetNodeList()
2013
    nodes_1 = nodes_0 + [self.op.node_name, ]
2014
    return env, nodes_0, nodes_1
2015

    
2016
  def CheckPrereq(self):
2017
    """Check prerequisites.
2018

2019
    This checks:
2020
     - the new node is not already in the config
2021
     - it is resolvable
2022
     - its parameters (single/dual homed) matches the cluster
2023

2024
    Any errors are signalled by raising errors.OpPrereqError.
2025

2026
    """
2027
    node_name = self.op.node_name
2028
    cfg = self.cfg
2029

    
2030
    dns_data = utils.HostInfo(node_name)
2031

    
2032
    node = dns_data.name
2033
    primary_ip = self.op.primary_ip = dns_data.ip
2034
    secondary_ip = getattr(self.op, "secondary_ip", None)
2035
    if secondary_ip is None:
2036
      secondary_ip = primary_ip
2037
    if not utils.IsValidIP(secondary_ip):
2038
      raise errors.OpPrereqError("Invalid secondary IP given")
2039
    self.op.secondary_ip = secondary_ip
2040

    
2041
    node_list = cfg.GetNodeList()
2042
    if not self.op.readd and node in node_list:
2043
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2044
                                 node)
2045
    elif self.op.readd and node not in node_list:
2046
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2047

    
2048
    for existing_node_name in node_list:
2049
      existing_node = cfg.GetNodeInfo(existing_node_name)
2050

    
2051
      if self.op.readd and node == existing_node_name:
2052
        if (existing_node.primary_ip != primary_ip or
2053
            existing_node.secondary_ip != secondary_ip):
2054
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2055
                                     " address configuration as before")
2056
        continue
2057

    
2058
      if (existing_node.primary_ip == primary_ip or
2059
          existing_node.secondary_ip == primary_ip or
2060
          existing_node.primary_ip == secondary_ip or
2061
          existing_node.secondary_ip == secondary_ip):
2062
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2063
                                   " existing node %s" % existing_node.name)
2064

    
2065
    # check that the type of the node (single versus dual homed) is the
2066
    # same as for the master
2067
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2068
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2069
    newbie_singlehomed = secondary_ip == primary_ip
2070
    if master_singlehomed != newbie_singlehomed:
2071
      if master_singlehomed:
2072
        raise errors.OpPrereqError("The master has no private ip but the"
2073
                                   " new node has one")
2074
      else:
2075
        raise errors.OpPrereqError("The master has a private ip but the"
2076
                                   " new node doesn't have one")
2077

    
2078
    # checks reachablity
2079
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2080
      raise errors.OpPrereqError("Node not reachable by ping")
2081

    
2082
    if not newbie_singlehomed:
2083
      # check reachability from my secondary ip to newbie's secondary ip
2084
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2085
                           source=myself.secondary_ip):
2086
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2087
                                   " based ping to noded port")
2088

    
2089
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2090
    mc_now, _ = self.cfg.GetMasterCandidateStats()
2091
    master_candidate = mc_now < cp_size
2092

    
2093
    self.new_node = objects.Node(name=node,
2094
                                 primary_ip=primary_ip,
2095
                                 secondary_ip=secondary_ip,
2096
                                 master_candidate=master_candidate,
2097
                                 offline=False)
2098

    
2099
  def Exec(self, feedback_fn):
2100
    """Adds the new node to the cluster.
2101

2102
    """
2103
    new_node = self.new_node
2104
    node = new_node.name
2105

    
2106
    # check connectivity
2107
    result = self.rpc.call_version([node])[node]
2108
    result.Raise()
2109
    if result.data:
2110
      if constants.PROTOCOL_VERSION == result.data:
2111
        logging.info("Communication to node %s fine, sw version %s match",
2112
                     node, result.data)
2113
      else:
2114
        raise errors.OpExecError("Version mismatch master version %s,"
2115
                                 " node version %s" %
2116
                                 (constants.PROTOCOL_VERSION, result.data))
2117
    else:
2118
      raise errors.OpExecError("Cannot get version from the new node")
2119

    
2120
    # setup ssh on node
2121
    logging.info("Copy ssh key to node %s", node)
2122
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2123
    keyarray = []
2124
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2125
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2126
                priv_key, pub_key]
2127

    
2128
    for i in keyfiles:
2129
      f = open(i, 'r')
2130
      try:
2131
        keyarray.append(f.read())
2132
      finally:
2133
        f.close()
2134

    
2135
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2136
                                    keyarray[2],
2137
                                    keyarray[3], keyarray[4], keyarray[5])
2138

    
2139
    msg = result.RemoteFailMsg()
2140
    if msg:
2141
      raise errors.OpExecError("Cannot transfer ssh keys to the"
2142
                               " new node: %s" % msg)
2143

    
2144
    # Add node to our /etc/hosts, and add key to known_hosts
2145
    utils.AddHostToEtcHosts(new_node.name)
2146

    
2147
    if new_node.secondary_ip != new_node.primary_ip:
2148
      result = self.rpc.call_node_has_ip_address(new_node.name,
2149
                                                 new_node.secondary_ip)
2150
      if result.failed or not result.data:
2151
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2152
                                 " you gave (%s). Please fix and re-run this"
2153
                                 " command." % new_node.secondary_ip)
2154

    
2155
    node_verify_list = [self.cfg.GetMasterNode()]
2156
    node_verify_param = {
2157
      'nodelist': [node],
2158
      # TODO: do a node-net-test as well?
2159
    }
2160

    
2161
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2162
                                       self.cfg.GetClusterName())
2163
    for verifier in node_verify_list:
2164
      if result[verifier].failed or not result[verifier].data:
2165
        raise errors.OpExecError("Cannot communicate with %s's node daemon"
2166
                                 " for remote verification" % verifier)
2167
      if result[verifier].data['nodelist']:
2168
        for failed in result[verifier].data['nodelist']:
2169
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2170
                      (verifier, result[verifier].data['nodelist'][failed]))
2171
        raise errors.OpExecError("ssh/hostname verification failed.")
2172

    
2173
    # Distribute updated /etc/hosts and known_hosts to all nodes,
2174
    # including the node just added
2175
    myself = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
2176
    dist_nodes = self.cfg.GetNodeList()
2177
    if not self.op.readd:
2178
      dist_nodes.append(node)
2179
    if myself.name in dist_nodes:
2180
      dist_nodes.remove(myself.name)
2181

    
2182
    logging.debug("Copying hosts and known_hosts to all nodes")
2183
    for fname in (constants.ETC_HOSTS, constants.SSH_KNOWN_HOSTS_FILE):
2184
      result = self.rpc.call_upload_file(dist_nodes, fname)
2185
      for to_node, to_result in result.iteritems():
2186
        if to_result.failed or not to_result.data:
2187
          logging.error("Copy of file %s to node %s failed", fname, to_node)
2188

    
2189
    to_copy = []
2190
    enabled_hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2191
    if constants.HTS_USE_VNC.intersection(enabled_hypervisors):
2192
      to_copy.append(constants.VNC_PASSWORD_FILE)
2193

    
2194
    for fname in to_copy:
2195
      result = self.rpc.call_upload_file([node], fname)
2196
      if result[node].failed or not result[node]:
2197
        logging.error("Could not copy file %s to node %s", fname, node)
2198

    
2199
    if self.op.readd:
2200
      self.context.ReaddNode(new_node)
2201
    else:
2202
      self.context.AddNode(new_node)
2203

    
2204

    
2205
class LUSetNodeParams(LogicalUnit):
2206
  """Modifies the parameters of a node.
2207

2208
  """
2209
  HPATH = "node-modify"
2210
  HTYPE = constants.HTYPE_NODE
2211
  _OP_REQP = ["node_name"]
2212
  REQ_BGL = False
2213

    
2214
  def CheckArguments(self):
2215
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2216
    if node_name is None:
2217
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2218
    self.op.node_name = node_name
2219
    _CheckBooleanOpField(self.op, 'master_candidate')
2220
    _CheckBooleanOpField(self.op, 'offline')
2221
    if self.op.master_candidate is None and self.op.offline is None:
2222
      raise errors.OpPrereqError("Please pass at least one modification")
2223
    if self.op.offline == True and self.op.master_candidate == True:
2224
      raise errors.OpPrereqError("Can't set the node into offline and"
2225
                                 " master_candidate at the same time")
2226

    
2227
  def ExpandNames(self):
2228
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2229

    
2230
  def BuildHooksEnv(self):
2231
    """Build hooks env.
2232

2233
    This runs on the master node.
2234

2235
    """
2236
    env = {
2237
      "OP_TARGET": self.op.node_name,
2238
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2239
      "OFFLINE": str(self.op.offline),
2240
      }
2241
    nl = [self.cfg.GetMasterNode(),
2242
          self.op.node_name]
2243
    return env, nl, nl
2244

    
2245
  def CheckPrereq(self):
2246
    """Check prerequisites.
2247

2248
    This only checks the instance list against the existing names.
2249

2250
    """
2251
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2252

    
2253
    if ((self.op.master_candidate == False or self.op.offline == True)
2254
        and node.master_candidate):
2255
      # we will demote the node from master_candidate
2256
      if self.op.node_name == self.cfg.GetMasterNode():
2257
        raise errors.OpPrereqError("The master node has to be a"
2258
                                   " master candidate and online")
2259
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2260
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2261
      if num_candidates <= cp_size:
2262
        msg = ("Not enough master candidates (desired"
2263
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2264
        if self.op.force:
2265
          self.LogWarning(msg)
2266
        else:
2267
          raise errors.OpPrereqError(msg)
2268

    
2269
    if (self.op.master_candidate == True and node.offline and
2270
        not self.op.offline == False):
2271
      raise errors.OpPrereqError("Can't set an offline node to"
2272
                                 " master_candidate")
2273

    
2274
    return
2275

    
2276
  def Exec(self, feedback_fn):
2277
    """Modifies a node.
2278

2279
    """
2280
    node = self.node
2281

    
2282
    result = []
2283

    
2284
    if self.op.offline is not None:
2285
      node.offline = self.op.offline
2286
      result.append(("offline", str(self.op.offline)))
2287
      if self.op.offline == True and node.master_candidate:
2288
        node.master_candidate = False
2289
        result.append(("master_candidate", "auto-demotion due to offline"))
2290

    
2291
    if self.op.master_candidate is not None:
2292
      node.master_candidate = self.op.master_candidate
2293
      result.append(("master_candidate", str(self.op.master_candidate)))
2294
      if self.op.master_candidate == False:
2295
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2296
        msg = rrc.RemoteFailMsg()
2297
        if msg:
2298
          self.LogWarning("Node failed to demote itself: %s" % msg)
2299

    
2300
    # this will trigger configuration file update, if needed
2301
    self.cfg.Update(node)
2302
    # this will trigger job queue propagation or cleanup
2303
    if self.op.node_name != self.cfg.GetMasterNode():
2304
      self.context.ReaddNode(node)
2305

    
2306
    return result
2307

    
2308

    
2309
class LUQueryClusterInfo(NoHooksLU):
2310
  """Query cluster configuration.
2311

2312
  """
2313
  _OP_REQP = []
2314
  REQ_BGL = False
2315

    
2316
  def ExpandNames(self):
2317
    self.needed_locks = {}
2318

    
2319
  def CheckPrereq(self):
2320
    """No prerequsites needed for this LU.
2321

2322
    """
2323
    pass
2324

    
2325
  def Exec(self, feedback_fn):
2326
    """Return cluster config.
2327

2328
    """
2329
    cluster = self.cfg.GetClusterInfo()
2330
    result = {
2331
      "software_version": constants.RELEASE_VERSION,
2332
      "protocol_version": constants.PROTOCOL_VERSION,
2333
      "config_version": constants.CONFIG_VERSION,
2334
      "os_api_version": constants.OS_API_VERSION,
2335
      "export_version": constants.EXPORT_VERSION,
2336
      "architecture": (platform.architecture()[0], platform.machine()),
2337
      "name": cluster.cluster_name,
2338
      "master": cluster.master_node,
2339
      "default_hypervisor": cluster.default_hypervisor,
2340
      "enabled_hypervisors": cluster.enabled_hypervisors,
2341
      "hvparams": dict([(hypervisor, cluster.hvparams[hypervisor])
2342
                        for hypervisor in cluster.enabled_hypervisors]),
2343
      "beparams": cluster.beparams,
2344
      "candidate_pool_size": cluster.candidate_pool_size,
2345
      }
2346

    
2347
    return result
2348

    
2349

    
2350
class LUQueryConfigValues(NoHooksLU):
2351
  """Return configuration values.
2352

2353
  """
2354
  _OP_REQP = []
2355
  REQ_BGL = False
2356
  _FIELDS_DYNAMIC = utils.FieldSet()
2357
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
2358

    
2359
  def ExpandNames(self):
2360
    self.needed_locks = {}
2361

    
2362
    _CheckOutputFields(static=self._FIELDS_STATIC,
2363
                       dynamic=self._FIELDS_DYNAMIC,
2364
                       selected=self.op.output_fields)
2365

    
2366
  def CheckPrereq(self):
2367
    """No prerequisites.
2368

2369
    """
2370
    pass
2371

    
2372
  def Exec(self, feedback_fn):
2373
    """Dump a representation of the cluster config to the standard output.
2374

2375
    """
2376
    values = []
2377
    for field in self.op.output_fields:
2378
      if field == "cluster_name":
2379
        entry = self.cfg.GetClusterName()
2380
      elif field == "master_node":
2381
        entry = self.cfg.GetMasterNode()
2382
      elif field == "drain_flag":
2383
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
2384
      else:
2385
        raise errors.ParameterError(field)
2386
      values.append(entry)
2387
    return values
2388

    
2389

    
2390
class LUActivateInstanceDisks(NoHooksLU):
2391
  """Bring up an instance's disks.
2392

2393
  """
2394
  _OP_REQP = ["instance_name"]
2395
  REQ_BGL = False
2396

    
2397
  def ExpandNames(self):
2398
    self._ExpandAndLockInstance()
2399
    self.needed_locks[locking.LEVEL_NODE] = []
2400
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2401

    
2402
  def DeclareLocks(self, level):
2403
    if level == locking.LEVEL_NODE:
2404
      self._LockInstancesNodes()
2405

    
2406
  def CheckPrereq(self):
2407
    """Check prerequisites.
2408

2409
    This checks that the instance is in the cluster.
2410

2411
    """
2412
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2413
    assert self.instance is not None, \
2414
      "Cannot retrieve locked instance %s" % self.op.instance_name
2415
    _CheckNodeOnline(self, self.instance.primary_node)
2416

    
2417
  def Exec(self, feedback_fn):
2418
    """Activate the disks.
2419

2420
    """
2421
    disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
2422
    if not disks_ok:
2423
      raise errors.OpExecError("Cannot activate block devices")
2424

    
2425
    return disks_info
2426

    
2427

    
2428
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
2429
  """Prepare the block devices for an instance.
2430

2431
  This sets up the block devices on all nodes.
2432

2433
  @type lu: L{LogicalUnit}
2434
  @param lu: the logical unit on whose behalf we execute
2435
  @type instance: L{objects.Instance}
2436
  @param instance: the instance for whose disks we assemble
2437
  @type ignore_secondaries: boolean
2438
  @param ignore_secondaries: if true, errors on secondary nodes
2439
      won't result in an error return from the function
2440
  @return: False if the operation failed, otherwise a list of
2441
      (host, instance_visible_name, node_visible_name)
2442
      with the mapping from node devices to instance devices
2443

2444
  """
2445
  device_info = []
2446
  disks_ok = True
2447
  iname = instance.name
2448
  # With the two passes mechanism we try to reduce the window of
2449
  # opportunity for the race condition of switching DRBD to primary
2450
  # before handshaking occured, but we do not eliminate it
2451

    
2452
  # The proper fix would be to wait (with some limits) until the
2453
  # connection has been made and drbd transitions from WFConnection
2454
  # into any other network-connected state (Connected, SyncTarget,
2455
  # SyncSource, etc.)
2456

    
2457
  # 1st pass, assemble on all nodes in secondary mode
2458
  for inst_disk in instance.disks:
2459
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2460
      lu.cfg.SetDiskID(node_disk, node)
2461
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
2462
      msg = result.RemoteFailMsg()
2463
      if msg:
2464
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2465
                           " (is_primary=False, pass=1): %s",
2466
                           inst_disk.iv_name, node, msg)
2467
        if not ignore_secondaries:
2468
          disks_ok = False
2469

    
2470
  # FIXME: race condition on drbd migration to primary
2471

    
2472
  # 2nd pass, do only the primary node
2473
  for inst_disk in instance.disks:
2474
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2475
      if node != instance.primary_node:
2476
        continue
2477
      lu.cfg.SetDiskID(node_disk, node)
2478
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
2479
      msg = result.RemoteFailMsg()
2480
      if msg:
2481
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2482
                           " (is_primary=True, pass=2): %s",
2483
                           inst_disk.iv_name, node, msg)
2484
        disks_ok = False
2485
    device_info.append((instance.primary_node, inst_disk.iv_name, result.data))
2486

    
2487
  # leave the disks configured for the primary node
2488
  # this is a workaround that would be fixed better by
2489
  # improving the logical/physical id handling
2490
  for disk in instance.disks:
2491
    lu.cfg.SetDiskID(disk, instance.primary_node)
2492

    
2493
  return disks_ok, device_info
2494

    
2495

    
2496
def _StartInstanceDisks(lu, instance, force):
2497
  """Start the disks of an instance.
2498

2499
  """
2500
  disks_ok, dummy = _AssembleInstanceDisks(lu, instance,
2501
                                           ignore_secondaries=force)
2502
  if not disks_ok:
2503
    _ShutdownInstanceDisks(lu, instance)
2504
    if force is not None and not force:
2505
      lu.proc.LogWarning("", hint="If the message above refers to a"
2506
                         " secondary node,"
2507
                         " you can retry the operation using '--force'.")
2508
    raise errors.OpExecError("Disk consistency error")
2509

    
2510

    
2511
class LUDeactivateInstanceDisks(NoHooksLU):
2512
  """Shutdown an instance's disks.
2513

2514
  """
2515
  _OP_REQP = ["instance_name"]
2516
  REQ_BGL = False
2517

    
2518
  def ExpandNames(self):
2519
    self._ExpandAndLockInstance()
2520
    self.needed_locks[locking.LEVEL_NODE] = []
2521
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2522

    
2523
  def DeclareLocks(self, level):
2524
    if level == locking.LEVEL_NODE:
2525
      self._LockInstancesNodes()
2526

    
2527
  def CheckPrereq(self):
2528
    """Check prerequisites.
2529

2530
    This checks that the instance is in the cluster.
2531

2532
    """
2533
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2534
    assert self.instance is not None, \
2535
      "Cannot retrieve locked instance %s" % self.op.instance_name
2536

    
2537
  def Exec(self, feedback_fn):
2538
    """Deactivate the disks
2539

2540
    """
2541
    instance = self.instance
2542
    _SafeShutdownInstanceDisks(self, instance)
2543

    
2544

    
2545
def _SafeShutdownInstanceDisks(lu, instance):
2546
  """Shutdown block devices of an instance.
2547

2548
  This function checks if an instance is running, before calling
2549
  _ShutdownInstanceDisks.
2550

2551
  """
2552
  ins_l = lu.rpc.call_instance_list([instance.primary_node],
2553
                                      [instance.hypervisor])
2554
  ins_l = ins_l[instance.primary_node]
2555
  if ins_l.failed or not isinstance(ins_l.data, list):
2556
    raise errors.OpExecError("Can't contact node '%s'" %
2557
                             instance.primary_node)
2558

    
2559
  if instance.name in ins_l.data:
2560
    raise errors.OpExecError("Instance is running, can't shutdown"
2561
                             " block devices.")
2562

    
2563
  _ShutdownInstanceDisks(lu, instance)
2564

    
2565

    
2566
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
2567
  """Shutdown block devices of an instance.
2568

2569
  This does the shutdown on all nodes of the instance.
2570

2571
  If the ignore_primary is false, errors on the primary node are
2572
  ignored.
2573

2574
  """
2575
  all_result = True
2576
  for disk in instance.disks:
2577
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
2578
      lu.cfg.SetDiskID(top_disk, node)
2579
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
2580
      msg = result.RemoteFailMsg()
2581
      if msg:
2582
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
2583
                      disk.iv_name, node, msg)
2584
        if not ignore_primary or node != instance.primary_node:
2585
          all_result = False
2586
  return all_result
2587

    
2588

    
2589
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
2590
  """Checks if a node has enough free memory.
2591

2592
  This function check if a given node has the needed amount of free
2593
  memory. In case the node has less memory or we cannot get the
2594
  information from the node, this function raise an OpPrereqError
2595
  exception.
2596

2597
  @type lu: C{LogicalUnit}
2598
  @param lu: a logical unit from which we get configuration data
2599
  @type node: C{str}
2600
  @param node: the node to check
2601
  @type reason: C{str}
2602
  @param reason: string to use in the error message
2603
  @type requested: C{int}
2604
  @param requested: the amount of memory in MiB to check for
2605
  @type hypervisor_name: C{str}
2606
  @param hypervisor_name: the hypervisor to ask for memory stats
2607
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
2608
      we cannot check the node
2609

2610
  """
2611
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
2612
  nodeinfo[node].Raise()
2613
  free_mem = nodeinfo[node].data.get('memory_free')
2614
  if not isinstance(free_mem, int):
2615
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
2616
                             " was '%s'" % (node, free_mem))
2617
  if requested > free_mem:
2618
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
2619
                             " needed %s MiB, available %s MiB" %
2620
                             (node, reason, requested, free_mem))
2621

    
2622

    
2623
class LUStartupInstance(LogicalUnit):
2624
  """Starts an instance.
2625

2626
  """
2627
  HPATH = "instance-start"
2628
  HTYPE = constants.HTYPE_INSTANCE
2629
  _OP_REQP = ["instance_name", "force"]
2630
  REQ_BGL = False
2631

    
2632
  def ExpandNames(self):
2633
    self._ExpandAndLockInstance()
2634

    
2635
  def BuildHooksEnv(self):
2636
    """Build hooks env.
2637

2638
    This runs on master, primary and secondary nodes of the instance.
2639

2640
    """
2641
    env = {
2642
      "FORCE": self.op.force,
2643
      }
2644
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2645
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2646
    return env, nl, nl
2647

    
2648
  def CheckPrereq(self):
2649
    """Check prerequisites.
2650

2651
    This checks that the instance is in the cluster.
2652

2653
    """
2654
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2655
    assert self.instance is not None, \
2656
      "Cannot retrieve locked instance %s" % self.op.instance_name
2657

    
2658
    _CheckNodeOnline(self, instance.primary_node)
2659

    
2660
    bep = self.cfg.GetClusterInfo().FillBE(instance)
2661
    # check bridges existance
2662
    _CheckInstanceBridgesExist(self, instance)
2663

    
2664
    _CheckNodeFreeMemory(self, instance.primary_node,
2665
                         "starting instance %s" % instance.name,
2666
                         bep[constants.BE_MEMORY], instance.hypervisor)
2667

    
2668
  def Exec(self, feedback_fn):
2669
    """Start the instance.
2670

2671
    """
2672
    instance = self.instance
2673
    force = self.op.force
2674
    extra_args = getattr(self.op, "extra_args", "")
2675

    
2676
    self.cfg.MarkInstanceUp(instance.name)
2677

    
2678
    node_current = instance.primary_node
2679

    
2680
    _StartInstanceDisks(self, instance, force)
2681

    
2682
    result = self.rpc.call_instance_start(node_current, instance, extra_args)
2683
    msg = result.RemoteFailMsg()
2684
    if msg:
2685
      _ShutdownInstanceDisks(self, instance)
2686
      raise errors.OpExecError("Could not start instance: %s" % msg)
2687

    
2688

    
2689
class LURebootInstance(LogicalUnit):
2690
  """Reboot an instance.
2691

2692
  """
2693
  HPATH = "instance-reboot"
2694
  HTYPE = constants.HTYPE_INSTANCE
2695
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
2696
  REQ_BGL = False
2697

    
2698
  def ExpandNames(self):
2699
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
2700
                                   constants.INSTANCE_REBOOT_HARD,
2701
                                   constants.INSTANCE_REBOOT_FULL]:
2702
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
2703
                                  (constants.INSTANCE_REBOOT_SOFT,
2704
                                   constants.INSTANCE_REBOOT_HARD,
2705
                                   constants.INSTANCE_REBOOT_FULL))
2706
    self._ExpandAndLockInstance()
2707

    
2708
  def BuildHooksEnv(self):
2709
    """Build hooks env.
2710

2711
    This runs on master, primary and secondary nodes of the instance.
2712

2713
    """
2714
    env = {
2715
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
2716
      }
2717
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2718
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2719
    return env, nl, nl
2720

    
2721
  def CheckPrereq(self):
2722
    """Check prerequisites.
2723

2724
    This checks that the instance is in the cluster.
2725

2726
    """
2727
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2728
    assert self.instance is not None, \
2729
      "Cannot retrieve locked instance %s" % self.op.instance_name
2730

    
2731
    _CheckNodeOnline(self, instance.primary_node)
2732

    
2733
    # check bridges existance
2734
    _CheckInstanceBridgesExist(self, instance)
2735

    
2736
  def Exec(self, feedback_fn):
2737
    """Reboot the instance.
2738

2739
    """
2740
    instance = self.instance
2741
    ignore_secondaries = self.op.ignore_secondaries
2742
    reboot_type = self.op.reboot_type
2743
    extra_args = getattr(self.op, "extra_args", "")
2744

    
2745
    node_current = instance.primary_node
2746

    
2747
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
2748
                       constants.INSTANCE_REBOOT_HARD]:
2749
      result = self.rpc.call_instance_reboot(node_current, instance,
2750
                                             reboot_type, extra_args)
2751
      if result.failed or not result.data:
2752
        raise errors.OpExecError("Could not reboot instance")
2753
    else:
2754
      if not self.rpc.call_instance_shutdown(node_current, instance):
2755
        raise errors.OpExecError("could not shutdown instance for full reboot")
2756
      _ShutdownInstanceDisks(self, instance)
2757
      _StartInstanceDisks(self, instance, ignore_secondaries)
2758
      result = self.rpc.call_instance_start(node_current, instance, extra_args)
2759
      msg = result.RemoteFailMsg()
2760
      if msg:
2761
        _ShutdownInstanceDisks(self, instance)
2762
        raise errors.OpExecError("Could not start instance for"
2763
                                 " full reboot: %s" % msg)
2764

    
2765
    self.cfg.MarkInstanceUp(instance.name)
2766

    
2767

    
2768
class LUShutdownInstance(LogicalUnit):
2769
  """Shutdown an instance.
2770

2771
  """
2772
  HPATH = "instance-stop"
2773
  HTYPE = constants.HTYPE_INSTANCE
2774
  _OP_REQP = ["instance_name"]
2775
  REQ_BGL = False
2776

    
2777
  def ExpandNames(self):
2778
    self._ExpandAndLockInstance()
2779

    
2780
  def BuildHooksEnv(self):
2781
    """Build hooks env.
2782

2783
    This runs on master, primary and secondary nodes of the instance.
2784

2785
    """
2786
    env = _BuildInstanceHookEnvByObject(self, self.instance)
2787
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2788
    return env, nl, nl
2789

    
2790
  def CheckPrereq(self):
2791
    """Check prerequisites.
2792

2793
    This checks that the instance is in the cluster.
2794

2795
    """
2796
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2797
    assert self.instance is not None, \
2798
      "Cannot retrieve locked instance %s" % self.op.instance_name
2799
    _CheckNodeOnline(self, self.instance.primary_node)
2800

    
2801
  def Exec(self, feedback_fn):
2802
    """Shutdown the instance.
2803

2804
    """
2805
    instance = self.instance
2806
    node_current = instance.primary_node
2807
    self.cfg.MarkInstanceDown(instance.name)
2808
    result = self.rpc.call_instance_shutdown(node_current, instance)
2809
    if result.failed or not result.data:
2810
      self.proc.LogWarning("Could not shutdown instance")
2811

    
2812
    _ShutdownInstanceDisks(self, instance)
2813

    
2814

    
2815
class LUReinstallInstance(LogicalUnit):
2816
  """Reinstall an instance.
2817

2818
  """
2819
  HPATH = "instance-reinstall"
2820
  HTYPE = constants.HTYPE_INSTANCE
2821
  _OP_REQP = ["instance_name"]
2822
  REQ_BGL = False
2823

    
2824
  def ExpandNames(self):
2825
    self._ExpandAndLockInstance()
2826

    
2827
  def BuildHooksEnv(self):
2828
    """Build hooks env.
2829

2830
    This runs on master, primary and secondary nodes of the instance.
2831

2832
    """
2833
    env = _BuildInstanceHookEnvByObject(self, self.instance)
2834
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2835
    return env, nl, nl
2836

    
2837
  def CheckPrereq(self):
2838
    """Check prerequisites.
2839

2840
    This checks that the instance is in the cluster and is not running.
2841

2842
    """
2843
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2844
    assert instance is not None, \
2845
      "Cannot retrieve locked instance %s" % self.op.instance_name
2846
    _CheckNodeOnline(self, instance.primary_node)
2847

    
2848
    if instance.disk_template == constants.DT_DISKLESS:
2849
      raise errors.OpPrereqError("Instance '%s' has no disks" %
2850
                                 self.op.instance_name)
2851
    if instance.admin_up:
2852
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
2853
                                 self.op.instance_name)
2854
    remote_info = self.rpc.call_instance_info(instance.primary_node,
2855
                                              instance.name,
2856
                                              instance.hypervisor)
2857
    if remote_info.failed or remote_info.data:
2858
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
2859
                                 (self.op.instance_name,
2860
                                  instance.primary_node))
2861

    
2862
    self.op.os_type = getattr(self.op, "os_type", None)
2863
    if self.op.os_type is not None:
2864
      # OS verification
2865
      pnode = self.cfg.GetNodeInfo(
2866
        self.cfg.ExpandNodeName(instance.primary_node))
2867
      if pnode is None:
2868
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
2869
                                   self.op.pnode)
2870
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
2871
      result.Raise()
2872
      if not isinstance(result.data, objects.OS):
2873
        raise errors.OpPrereqError("OS '%s' not in supported OS list for"
2874
                                   " primary node"  % self.op.os_type)
2875

    
2876
    self.instance = instance
2877

    
2878
  def Exec(self, feedback_fn):
2879
    """Reinstall the instance.
2880

2881
    """
2882
    inst = self.instance
2883

    
2884
    if self.op.os_type is not None:
2885
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
2886
      inst.os = self.op.os_type
2887
      self.cfg.Update(inst)
2888

    
2889
    _StartInstanceDisks(self, inst, None)
2890
    try:
2891
      feedback_fn("Running the instance OS create scripts...")
2892
      result = self.rpc.call_instance_os_add(inst.primary_node, inst)
2893
      msg = result.RemoteFailMsg()
2894
      if msg:
2895
        raise errors.OpExecError("Could not install OS for instance %s"
2896
                                 " on node %s: %s" %
2897
                                 (inst.name, inst.primary_node, msg))
2898
    finally:
2899
      _ShutdownInstanceDisks(self, inst)
2900

    
2901

    
2902
class LURenameInstance(LogicalUnit):
2903
  """Rename an instance.
2904

2905
  """
2906
  HPATH = "instance-rename"
2907
  HTYPE = constants.HTYPE_INSTANCE
2908
  _OP_REQP = ["instance_name", "new_name"]
2909

    
2910
  def BuildHooksEnv(self):
2911
    """Build hooks env.
2912

2913
    This runs on master, primary and secondary nodes of the instance.
2914

2915
    """
2916
    env = _BuildInstanceHookEnvByObject(self, self.instance)
2917
    env["INSTANCE_NEW_NAME"] = self.op.new_name
2918
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2919
    return env, nl, nl
2920

    
2921
  def CheckPrereq(self):
2922
    """Check prerequisites.
2923

2924
    This checks that the instance is in the cluster and is not running.
2925

2926
    """
2927
    instance = self.cfg.GetInstanceInfo(
2928
      self.cfg.ExpandInstanceName(self.op.instance_name))
2929
    if instance is None:
2930
      raise errors.OpPrereqError("Instance '%s' not known" %
2931
                                 self.op.instance_name)
2932
    _CheckNodeOnline(self, instance.primary_node)
2933

    
2934
    if instance.admin_up:
2935
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
2936
                                 self.op.instance_name)
2937
    remote_info = self.rpc.call_instance_info(instance.primary_node,
2938
                                              instance.name,
2939
                                              instance.hypervisor)
2940
    remote_info.Raise()
2941
    if remote_info.data:
2942
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
2943
                                 (self.op.instance_name,
2944
                                  instance.primary_node))
2945
    self.instance = instance
2946

    
2947
    # new name verification
2948
    name_info = utils.HostInfo(self.op.new_name)
2949

    
2950
    self.op.new_name = new_name = name_info.name
2951
    instance_list = self.cfg.GetInstanceList()
2952
    if new_name in instance_list:
2953
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
2954
                                 new_name)
2955

    
2956
    if not getattr(self.op, "ignore_ip", False):
2957
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
2958
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
2959
                                   (name_info.ip, new_name))
2960

    
2961

    
2962
  def Exec(self, feedback_fn):
2963
    """Reinstall the instance.
2964

2965
    """
2966
    inst = self.instance
2967
    old_name = inst.name
2968

    
2969
    if inst.disk_template == constants.DT_FILE:
2970
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
2971

    
2972
    self.cfg.RenameInstance(inst.name, self.op.new_name)
2973
    # Change the instance lock. This is definitely safe while we hold the BGL
2974
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
2975
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
2976

    
2977
    # re-read the instance from the configuration after rename
2978
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
2979

    
2980
    if inst.disk_template == constants.DT_FILE:
2981
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
2982
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
2983
                                                     old_file_storage_dir,
2984
                                                     new_file_storage_dir)
2985
      result.Raise()
2986
      if not result.data:
2987
        raise errors.OpExecError("Could not connect to node '%s' to rename"
2988
                                 " directory '%s' to '%s' (but the instance"
2989
                                 " has been renamed in Ganeti)" % (
2990
                                 inst.primary_node, old_file_storage_dir,
2991
                                 new_file_storage_dir))
2992

    
2993
      if not result.data[0]:
2994
        raise errors.OpExecError("Could not rename directory '%s' to '%s'"
2995
                                 " (but the instance has been renamed in"
2996
                                 " Ganeti)" % (old_file_storage_dir,
2997
                                               new_file_storage_dir))
2998

    
2999
    _StartInstanceDisks(self, inst, None)
3000
    try:
3001
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3002
                                                 old_name)
3003
      msg = result.RemoteFailMsg()
3004
      if msg:
3005
        msg = ("Could not run OS rename script for instance %s on node %s"
3006
               " (but the instance has been renamed in Ganeti): %s" %
3007
               (inst.name, inst.primary_node, msg))
3008
        self.proc.LogWarning(msg)
3009
    finally:
3010
      _ShutdownInstanceDisks(self, inst)
3011

    
3012

    
3013
class LURemoveInstance(LogicalUnit):
3014
  """Remove an instance.
3015

3016
  """
3017
  HPATH = "instance-remove"
3018
  HTYPE = constants.HTYPE_INSTANCE
3019
  _OP_REQP = ["instance_name", "ignore_failures"]
3020
  REQ_BGL = False
3021

    
3022
  def ExpandNames(self):
3023
    self._ExpandAndLockInstance()
3024
    self.needed_locks[locking.LEVEL_NODE] = []
3025
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3026

    
3027
  def DeclareLocks(self, level):
3028
    if level == locking.LEVEL_NODE:
3029
      self._LockInstancesNodes()
3030

    
3031
  def BuildHooksEnv(self):
3032
    """Build hooks env.
3033

3034
    This runs on master, primary and secondary nodes of the instance.
3035

3036
    """
3037
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3038
    nl = [self.cfg.GetMasterNode()]
3039
    return env, nl, nl
3040

    
3041
  def CheckPrereq(self):
3042
    """Check prerequisites.
3043

3044
    This checks that the instance is in the cluster.
3045

3046
    """
3047
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3048
    assert self.instance is not None, \
3049
      "Cannot retrieve locked instance %s" % self.op.instance_name
3050

    
3051
  def Exec(self, feedback_fn):
3052
    """Remove the instance.
3053

3054
    """
3055
    instance = self.instance
3056
    logging.info("Shutting down instance %s on node %s",
3057
                 instance.name, instance.primary_node)
3058

    
3059
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3060
    if result.failed or not result.data:
3061
      if self.op.ignore_failures:
3062
        feedback_fn("Warning: can't shutdown instance")
3063
      else:
3064
        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
3065
                                 (instance.name, instance.primary_node))
3066

    
3067
    logging.info("Removing block devices for instance %s", instance.name)
3068

    
3069
    if not _RemoveDisks(self, instance):
3070
      if self.op.ignore_failures:
3071
        feedback_fn("Warning: can't remove instance's disks")
3072
      else:
3073
        raise errors.OpExecError("Can't remove instance's disks")
3074

    
3075
    logging.info("Removing instance %s out of cluster config", instance.name)
3076

    
3077
    self.cfg.RemoveInstance(instance.name)
3078
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3079

    
3080

    
3081
class LUQueryInstances(NoHooksLU):
3082
  """Logical unit for querying instances.
3083

3084
  """
3085
  _OP_REQP = ["output_fields", "names", "use_locking"]
3086
  REQ_BGL = False
3087
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3088
                                    "admin_state", "admin_ram",
3089
                                    "disk_template", "ip", "mac", "bridge",
3090
                                    "sda_size", "sdb_size", "vcpus", "tags",
3091
                                    "network_port", "beparams",
3092
                                    "(disk).(size)/([0-9]+)",
3093
                                    "(disk).(sizes)", "disk_usage",
3094
                                    "(nic).(mac|ip|bridge)/([0-9]+)",
3095
                                    "(nic).(macs|ips|bridges)",
3096
                                    "(disk|nic).(count)",
3097
                                    "serial_no", "hypervisor", "hvparams",] +
3098
                                  ["hv/%s" % name
3099
                                   for name in constants.HVS_PARAMETERS] +
3100
                                  ["be/%s" % name
3101
                                   for name in constants.BES_PARAMETERS])
3102
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3103

    
3104

    
3105
  def ExpandNames(self):
3106
    _CheckOutputFields(static=self._FIELDS_STATIC,
3107
                       dynamic=self._FIELDS_DYNAMIC,
3108
                       selected=self.op.output_fields)
3109

    
3110
    self.needed_locks = {}
3111
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3112
    self.share_locks[locking.LEVEL_NODE] = 1
3113

    
3114
    if self.op.names:
3115
      self.wanted = _GetWantedInstances(self, self.op.names)
3116
    else:
3117
      self.wanted = locking.ALL_SET
3118

    
3119
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3120
    self.do_locking = self.do_node_query and self.op.use_locking
3121
    if self.do_locking:
3122
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3123
      self.needed_locks[locking.LEVEL_NODE] = []
3124
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3125

    
3126
  def DeclareLocks(self, level):
3127
    if level == locking.LEVEL_NODE and self.do_locking:
3128
      self._LockInstancesNodes()
3129

    
3130
  def CheckPrereq(self):
3131
    """Check prerequisites.
3132

3133
    """
3134
    pass
3135

    
3136
  def Exec(self, feedback_fn):
3137
    """Computes the list of nodes and their attributes.
3138

3139
    """
3140
    all_info = self.cfg.GetAllInstancesInfo()
3141
    if self.wanted == locking.ALL_SET:
3142
      # caller didn't specify instance names, so ordering is not important
3143
      if self.do_locking:
3144
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
3145
      else:
3146
        instance_names = all_info.keys()
3147
      instance_names = utils.NiceSort(instance_names)
3148
    else:
3149
      # caller did specify names, so we must keep the ordering
3150
      if self.do_locking:
3151
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
3152
      else:
3153
        tgt_set = all_info.keys()
3154
      missing = set(self.wanted).difference(tgt_set)
3155
      if missing:
3156
        raise errors.OpExecError("Some instances were removed before"
3157
                                 " retrieving their data: %s" % missing)
3158
      instance_names = self.wanted
3159

    
3160
    instance_list = [all_info[iname] for iname in instance_names]
3161

    
3162
    # begin data gathering
3163

    
3164
    nodes = frozenset([inst.primary_node for inst in instance_list])
3165
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3166

    
3167
    bad_nodes = []
3168
    off_nodes = []
3169
    if self.do_node_query:
3170
      live_data = {}
3171
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
3172
      for name in nodes:
3173
        result = node_data[name]
3174
        if result.offline:
3175
          # offline nodes will be in both lists
3176
          off_nodes.append(name)
3177
        if result.failed:
3178
          bad_nodes.append(name)
3179
        else:
3180
          if result.data:
3181
            live_data.update(result.data)
3182
            # else no instance is alive
3183
    else:
3184
      live_data = dict([(name, {}) for name in instance_names])
3185

    
3186
    # end data gathering
3187

    
3188
    HVPREFIX = "hv/"
3189
    BEPREFIX = "be/"
3190
    output = []
3191
    for instance in instance_list:
3192
      iout = []
3193
      i_hv = self.cfg.GetClusterInfo().FillHV(instance)
3194
      i_be = self.cfg.GetClusterInfo().FillBE(instance)
3195
      for field in self.op.output_fields:
3196
        st_match = self._FIELDS_STATIC.Matches(field)
3197
        if field == "name":
3198
          val = instance.name
3199
        elif field == "os":
3200
          val = instance.os
3201
        elif field == "pnode":
3202
          val = instance.primary_node
3203
        elif field == "snodes":
3204
          val = list(instance.secondary_nodes)
3205
        elif field == "admin_state":
3206
          val = instance.admin_up
3207
        elif field == "oper_state":
3208
          if instance.primary_node in bad_nodes:
3209
            val = None
3210
          else:
3211
            val = bool(live_data.get(instance.name))
3212
        elif field == "status":
3213
          if instance.primary_node in off_nodes:
3214
            val = "ERROR_nodeoffline"
3215
          elif instance.primary_node in bad_nodes:
3216
            val = "ERROR_nodedown"
3217
          else:
3218
            running = bool(live_data.get(instance.name))
3219
            if running:
3220
              if instance.admin_up:
3221
                val = "running"
3222
              else:
3223
                val = "ERROR_up"
3224
            else:
3225
              if instance.admin_up:
3226
                val = "ERROR_down"
3227
              else:
3228
                val = "ADMIN_down"
3229
        elif field == "oper_ram":
3230
          if instance.primary_node in bad_nodes:
3231
            val = None
3232
          elif instance.name in live_data:
3233
            val = live_data[instance.name].get("memory", "?")
3234
          else:
3235
            val = "-"
3236
        elif field == "disk_template":
3237
          val = instance.disk_template
3238
        elif field == "ip":
3239
          val = instance.nics[0].ip
3240
        elif field == "bridge":
3241
          val = instance.nics[0].bridge
3242
        elif field == "mac":
3243
          val = instance.nics[0].mac
3244
        elif field == "sda_size" or field == "sdb_size":
3245
          idx = ord(field[2]) - ord('a')
3246
          try:
3247
            val = instance.FindDisk(idx).size
3248
          except errors.OpPrereqError:
3249
            val = None
3250
        elif field == "disk_usage": # total disk usage per node
3251
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
3252
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
3253
        elif field == "tags":
3254
          val = list(instance.GetTags())
3255
        elif field == "serial_no":
3256
          val = instance.serial_no
3257
        elif field == "network_port":
3258
          val = instance.network_port
3259
        elif field == "hypervisor":
3260
          val = instance.hypervisor
3261
        elif field == "hvparams":
3262
          val = i_hv
3263
        elif (field.startswith(HVPREFIX) and
3264
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
3265
          val = i_hv.get(field[len(HVPREFIX):], None)
3266
        elif field == "beparams":
3267
          val = i_be
3268
        elif (field.startswith(BEPREFIX) and
3269
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
3270
          val = i_be.get(field[len(BEPREFIX):], None)
3271
        elif st_match and st_match.groups():
3272
          # matches a variable list
3273
          st_groups = st_match.groups()
3274
          if st_groups and st_groups[0] == "disk":
3275
            if st_groups[1] == "count":
3276
              val = len(instance.disks)
3277
            elif st_groups[1] == "sizes":
3278
              val = [disk.size for disk in instance.disks]
3279
            elif st_groups[1] == "size":
3280
              try:
3281
                val = instance.FindDisk(st_groups[2]).size
3282
              except errors.OpPrereqError:
3283
                val = None
3284
            else:
3285
              assert False, "Unhandled disk parameter"
3286
          elif st_groups[0] == "nic":
3287
            if st_groups[1] == "count":
3288
              val = len(instance.nics)
3289
            elif st_groups[1] == "macs":
3290
              val = [nic.mac for nic in instance.nics]
3291
            elif st_groups[1] == "ips":
3292
              val = [nic.ip for nic in instance.nics]
3293
            elif st_groups[1] == "bridges":
3294
              val = [nic.bridge for nic in instance.nics]
3295
            else:
3296
              # index-based item
3297
              nic_idx = int(st_groups[2])
3298
              if nic_idx >= len(instance.nics):
3299
                val = None
3300
              else:
3301
                if st_groups[1] == "mac":
3302
                  val = instance.nics[nic_idx].mac
3303
                elif st_groups[1] == "ip":
3304
                  val = instance.nics[nic_idx].ip
3305
                elif st_groups[1] == "bridge":
3306
                  val = instance.nics[nic_idx].bridge
3307
                else:
3308
                  assert False, "Unhandled NIC parameter"
3309
          else:
3310
            assert False, "Unhandled variable parameter"
3311
        else:
3312
          raise errors.ParameterError(field)
3313
        iout.append(val)
3314
      output.append(iout)
3315

    
3316
    return output
3317

    
3318

    
3319
class LUFailoverInstance(LogicalUnit):
3320
  """Failover an instance.
3321

3322
  """
3323
  HPATH = "instance-failover"
3324
  HTYPE = constants.HTYPE_INSTANCE
3325
  _OP_REQP = ["instance_name", "ignore_consistency"]
3326
  REQ_BGL = False
3327

    
3328
  def ExpandNames(self):
3329
    self._ExpandAndLockInstance()
3330
    self.needed_locks[locking.LEVEL_NODE] = []
3331
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3332

    
3333
  def DeclareLocks(self, level):
3334
    if level == locking.LEVEL_NODE:
3335
      self._LockInstancesNodes()
3336

    
3337
  def BuildHooksEnv(self):
3338
    """Build hooks env.
3339

3340
    This runs on master, primary and secondary nodes of the instance.
3341

3342
    """
3343
    env = {
3344
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
3345
      }
3346
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3347
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3348
    return env, nl, nl
3349

    
3350
  def CheckPrereq(self):
3351
    """Check prerequisites.
3352

3353
    This checks that the instance is in the cluster.
3354

3355
    """
3356
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3357
    assert self.instance is not None, \
3358
      "Cannot retrieve locked instance %s" % self.op.instance_name
3359

    
3360
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3361
    if instance.disk_template not in constants.DTS_NET_MIRROR:
3362
      raise errors.OpPrereqError("Instance's disk layout is not"
3363
                                 " network mirrored, cannot failover.")
3364

    
3365
    secondary_nodes = instance.secondary_nodes
3366
    if not secondary_nodes:
3367
      raise errors.ProgrammerError("no secondary node but using "
3368
                                   "a mirrored disk template")
3369

    
3370
    target_node = secondary_nodes[0]
3371
    _CheckNodeOnline(self, target_node)
3372
    # check memory requirements on the secondary node
3373
    _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
3374
                         instance.name, bep[constants.BE_MEMORY],
3375
                         instance.hypervisor)
3376

    
3377
    # check bridge existance
3378
    brlist = [nic.bridge for nic in instance.nics]
3379
    result = self.rpc.call_bridges_exist(target_node, brlist)
3380
    result.Raise()
3381
    if not result.data:
3382
      raise errors.OpPrereqError("One or more target bridges %s does not"
3383
                                 " exist on destination node '%s'" %
3384
                                 (brlist, target_node))
3385

    
3386
  def Exec(self, feedback_fn):
3387
    """Failover an instance.
3388

3389
    The failover is done by shutting it down on its present node and
3390
    starting it on the secondary.
3391

3392
    """
3393
    instance = self.instance
3394

    
3395
    source_node = instance.primary_node
3396
    target_node = instance.secondary_nodes[0]
3397

    
3398
    feedback_fn("* checking disk consistency between source and target")
3399
    for dev in instance.disks:
3400
      # for drbd, these are drbd over lvm
3401
      if not _CheckDiskConsistency(self, dev, target_node, False):
3402
        if instance.admin_up and not self.op.ignore_consistency:
3403
          raise errors.OpExecError("Disk %s is degraded on target node,"
3404
                                   " aborting failover." % dev.iv_name)
3405

    
3406
    feedback_fn("* shutting down instance on source node")
3407
    logging.info("Shutting down instance %s on node %s",
3408
                 instance.name, source_node)
3409

    
3410
    result = self.rpc.call_instance_shutdown(source_node, instance)
3411
    if result.failed or not result.data:
3412
      if self.op.ignore_consistency:
3413
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
3414
                             " Proceeding"
3415
                             " anyway. Please make sure node %s is down",
3416
                             instance.name, source_node, source_node)
3417
      else:
3418
        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
3419
                                 (instance.name, source_node))
3420

    
3421
    feedback_fn("* deactivating the instance's disks on source node")
3422
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
3423
      raise errors.OpExecError("Can't shut down the instance's disks.")
3424

    
3425
    instance.primary_node = target_node
3426
    # distribute new instance config to the other nodes
3427
    self.cfg.Update(instance)
3428

    
3429
    # Only start the instance if it's marked as up
3430
    if instance.admin_up:
3431
      feedback_fn("* activating the instance's disks on target node")
3432
      logging.info("Starting instance %s on node %s",
3433
                   instance.name, target_node)
3434

    
3435
      disks_ok, dummy = _AssembleInstanceDisks(self, instance,
3436
                                               ignore_secondaries=True)
3437
      if not disks_ok:
3438
        _ShutdownInstanceDisks(self, instance)
3439
        raise errors.OpExecError("Can't activate the instance's disks")
3440

    
3441
      feedback_fn("* starting the instance on the target node")
3442
      result = self.rpc.call_instance_start(target_node, instance, None)
3443
      msg = result.RemoteFailMsg()
3444
      if msg:
3445
        _ShutdownInstanceDisks(self, instance)
3446
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
3447
                                 (instance.name, target_node, msg))
3448

    
3449

    
3450
class LUMigrateInstance(LogicalUnit):
3451
  """Migrate an instance.
3452

3453
  This is migration without shutting down, compared to the failover,
3454
  which is done with shutdown.
3455

3456
  """
3457
  HPATH = "instance-migrate"
3458
  HTYPE = constants.HTYPE_INSTANCE
3459
  _OP_REQP = ["instance_name", "live", "cleanup"]
3460

    
3461
  REQ_BGL = False
3462

    
3463
  def ExpandNames(self):
3464
    self._ExpandAndLockInstance()
3465
    self.needed_locks[locking.LEVEL_NODE] = []
3466
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3467

    
3468
  def DeclareLocks(self, level):
3469
    if level == locking.LEVEL_NODE:
3470
      self._LockInstancesNodes()
3471

    
3472
  def BuildHooksEnv(self):
3473
    """Build hooks env.
3474

3475
    This runs on master, primary and secondary nodes of the instance.
3476

3477
    """
3478
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3479
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3480
    return env, nl, nl
3481

    
3482
  def CheckPrereq(self):
3483
    """Check prerequisites.
3484

3485
    This checks that the instance is in the cluster.
3486

3487
    """
3488
    instance = self.cfg.GetInstanceInfo(
3489
      self.cfg.ExpandInstanceName(self.op.instance_name))
3490
    if instance is None:
3491
      raise errors.OpPrereqError("Instance '%s' not known" %
3492
                                 self.op.instance_name)
3493

    
3494
    if instance.disk_template != constants.DT_DRBD8:
3495
      raise errors.OpPrereqError("Instance's disk layout is not"
3496
                                 " drbd8, cannot migrate.")
3497

    
3498
    secondary_nodes = instance.secondary_nodes
3499
    if not secondary_nodes:
3500
      raise errors.ProgrammerError("no secondary node but using "
3501
                                   "drbd8 disk template")
3502

    
3503
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
3504

    
3505
    target_node = secondary_nodes[0]
3506
    # check memory requirements on the secondary node
3507
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
3508
                         instance.name, i_be[constants.BE_MEMORY],
3509
                         instance.hypervisor)
3510

    
3511
    # check bridge existance
3512
    brlist = [nic.bridge for nic in instance.nics]
3513
    result = self.rpc.call_bridges_exist(target_node, brlist)
3514
    if result.failed or not result.data:
3515
      raise errors.OpPrereqError("One or more target bridges %s does not"
3516
                                 " exist on destination node '%s'" %
3517
                                 (brlist, target_node))
3518

    
3519
    if not self.op.cleanup:
3520
      result = self.rpc.call_instance_migratable(instance.primary_node,
3521
                                                 instance)
3522
      msg = result.RemoteFailMsg()
3523
      if msg:
3524
        raise errors.OpPrereqError("Can't migrate: %s - please use failover" %
3525
                                   msg)
3526

    
3527
    self.instance = instance
3528

    
3529
  def _WaitUntilSync(self):
3530
    """Poll with custom rpc for disk sync.
3531

3532
    This uses our own step-based rpc call.
3533

3534
    """
3535
    self.feedback_fn("* wait until resync is done")
3536
    all_done = False
3537
    while not all_done:
3538
      all_done = True
3539
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
3540
                                            self.nodes_ip,
3541
                                            self.instance.disks)
3542
      min_percent = 100
3543
      for node, nres in result.items():
3544
        msg = nres.RemoteFailMsg()
3545
        if msg:
3546
          raise errors.OpExecError("Cannot resync disks on node %s: %s" %
3547
                                   (node, msg))
3548
        node_done, node_percent = nres.payload
3549
        all_done = all_done and node_done
3550
        if node_percent is not None:
3551
          min_percent = min(min_percent, node_percent)
3552
      if not all_done:
3553
        if min_percent < 100:
3554
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
3555
        time.sleep(2)
3556

    
3557
  def _EnsureSecondary(self, node):
3558
    """Demote a node to secondary.
3559

3560
    """
3561
    self.feedback_fn("* switching node %s to secondary mode" % node)
3562

    
3563
    for dev in self.instance.disks:
3564
      self.cfg.SetDiskID(dev, node)
3565

    
3566
    result = self.rpc.call_blockdev_close(node, self.instance.name,
3567
                                          self.instance.disks)
3568
    msg = result.RemoteFailMsg()
3569
    if msg:
3570
      raise errors.OpExecError("Cannot change disk to secondary on node %s,"
3571
                               " error %s" % (node, msg))
3572

    
3573
  def _GoStandalone(self):
3574
    """Disconnect from the network.
3575

3576
    """
3577
    self.feedback_fn("* changing into standalone mode")
3578
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
3579
                                               self.instance.disks)
3580
    for node, nres in result.items():
3581
      msg = nres.RemoteFailMsg()
3582
      if msg:
3583
        raise errors.OpExecError("Cannot disconnect disks node %s,"
3584
                                 " error %s" % (node, msg))
3585

    
3586
  def _GoReconnect(self, multimaster):
3587
    """Reconnect to the network.
3588

3589
    """
3590
    if multimaster:
3591
      msg = "dual-master"
3592
    else:
3593
      msg = "single-master"
3594
    self.feedback_fn("* changing disks into %s mode" % msg)
3595
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
3596
                                           self.instance.disks,
3597
                                           self.instance.name, multimaster)
3598
    for node, nres in result.items():
3599
      msg = nres.RemoteFailMsg()
3600
      if msg:
3601
        raise errors.OpExecError("Cannot change disks config on node %s,"
3602
                                 " error: %s" % (node, msg))
3603

    
3604
  def _ExecCleanup(self):
3605
    """Try to cleanup after a failed migration.
3606

3607
    The cleanup is done by:
3608
      - check that the instance is running only on one node
3609
        (and update the config if needed)
3610
      - change disks on its secondary node to secondary
3611
      - wait until disks are fully synchronized
3612
      - disconnect from the network
3613
      - change disks into single-master mode
3614
      - wait again until disks are fully synchronized
3615

3616
    """
3617
    instance = self.instance
3618
    target_node = self.target_node
3619
    source_node = self.source_node
3620

    
3621
    # check running on only one node
3622
    self.feedback_fn("* checking where the instance actually runs"
3623
                     " (if this hangs, the hypervisor might be in"
3624
                     " a bad state)")
3625
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
3626
    for node, result in ins_l.items():
3627
      result.Raise()
3628
      if not isinstance(result.data, list):
3629
        raise errors.OpExecError("Can't contact node '%s'" % node)
3630

    
3631
    runningon_source = instance.name in ins_l[source_node].data
3632
    runningon_target = instance.name in ins_l[target_node].data
3633

    
3634
    if runningon_source and runningon_target:
3635
      raise errors.OpExecError("Instance seems to be running on two nodes,"
3636
                               " or the hypervisor is confused. You will have"
3637
                               " to ensure manually that it runs only on one"
3638
                               " and restart this operation.")
3639

    
3640
    if not (runningon_source or runningon_target):
3641
      raise errors.OpExecError("Instance does not seem to be running at all."
3642
                               " In this case, it's safer to repair by"
3643
                               " running 'gnt-instance stop' to ensure disk"
3644
                               " shutdown, and then restarting it.")
3645

    
3646
    if runningon_target:
3647
      # the migration has actually succeeded, we need to update the config
3648
      self.feedback_fn("* instance running on secondary node (%s),"
3649
                       " updating config" % target_node)
3650
      instance.primary_node = target_node
3651
      self.cfg.Update(instance)
3652
      demoted_node = source_node
3653
    else:
3654
      self.feedback_fn("* instance confirmed to be running on its"
3655
                       " primary node (%s)" % source_node)
3656
      demoted_node = target_node
3657

    
3658
    self._EnsureSecondary(demoted_node)
3659
    try:
3660
      self._WaitUntilSync()
3661
    except errors.OpExecError:
3662
      # we ignore here errors, since if the device is standalone, it
3663
      # won't be able to sync
3664
      pass
3665
    self._GoStandalone()
3666
    self._GoReconnect(False)
3667
    self._WaitUntilSync()
3668

    
3669
    self.feedback_fn("* done")
3670

    
3671
  def _RevertDiskStatus(self):
3672
    """Try to revert the disk status after a failed migration.
3673

3674
    """
3675
    target_node = self.target_node
3676
    try:
3677
      self._EnsureSecondary(target_node)
3678
      self._GoStandalone()
3679
      self._GoReconnect(False)
3680
      self._WaitUntilSync()
3681
    except errors.OpExecError, err:
3682
      self.LogWarning("Migration failed and I can't reconnect the"
3683
                      " drives: error '%s'\n"
3684
                      "Please look and recover the instance status" %
3685
                      str(err))
3686

    
3687
  def _AbortMigration(self):
3688
    """Call the hypervisor code to abort a started migration.
3689

3690
    """
3691
    instance = self.instance
3692
    target_node = self.target_node
3693
    migration_info = self.migration_info
3694

    
3695
    abort_result = self.rpc.call_finalize_migration(target_node,
3696
                                                    instance,
3697
                                                    migration_info,
3698
                                                    False)
3699
    abort_msg = abort_result.RemoteFailMsg()
3700
    if abort_msg:
3701
      logging.error("Aborting migration failed on target node %s: %s" %
3702
                    (target_node, abort_msg))
3703
      # Don't raise an exception here, as we stil have to try to revert the
3704
      # disk status, even if this step failed.
3705

    
3706
  def _ExecMigration(self):
3707
    """Migrate an instance.
3708

3709
    The migrate is done by:
3710
      - change the disks into dual-master mode
3711
      - wait until disks are fully synchronized again
3712
      - migrate the instance
3713
      - change disks on the new secondary node (the old primary) to secondary
3714
      - wait until disks are fully synchronized
3715
      - change disks into single-master mode
3716

3717
    """
3718
    instance = self.instance
3719
    target_node = self.target_node
3720
    source_node = self.source_node
3721

    
3722
    self.feedback_fn("* checking disk consistency between source and target")
3723
    for dev in instance.disks:
3724
      if not _CheckDiskConsistency(self, dev, target_node, False):
3725
        raise errors.OpExecError("Disk %s is degraded or not fully"
3726
                                 " synchronized on target node,"
3727
                                 " aborting migrate." % dev.iv_name)
3728

    
3729
    # First get the migration information from the remote node
3730
    result = self.rpc.call_migration_info(source_node, instance)
3731
    msg = result.RemoteFailMsg()
3732
    if msg:
3733
      log_err = ("Failed fetching source migration information from %s: %s" %
3734
                 (source_node, msg))
3735
      logging.error(log_err)
3736
      raise errors.OpExecError(log_err)
3737

    
3738
    self.migration_info = migration_info = result.payload
3739

    
3740
    # Then switch the disks to master/master mode
3741
    self._EnsureSecondary(target_node)
3742
    self._GoStandalone()
3743
    self._GoReconnect(True)
3744
    self._WaitUntilSync()
3745

    
3746
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
3747
    result = self.rpc.call_accept_instance(target_node,
3748
                                           instance,
3749
                                           migration_info,
3750
                                           self.nodes_ip[target_node])
3751

    
3752
    msg = result.RemoteFailMsg()
3753
    if msg:
3754
      logging.error("Instance pre-migration failed, trying to revert"
3755
                    " disk status: %s", msg)
3756
      self._AbortMigration()
3757
      self._RevertDiskStatus()
3758
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
3759
                               (instance.name, msg))
3760

    
3761
    self.feedback_fn("* migrating instance to %s" % target_node)
3762
    time.sleep(10)
3763
    result = self.rpc.call_instance_migrate(source_node, instance,
3764
                                            self.nodes_ip[target_node],
3765
                                            self.op.live)
3766
    msg = result.RemoteFailMsg()
3767
    if msg:
3768
      logging.error("Instance migration failed, trying to revert"
3769
                    " disk status: %s", msg)
3770
      self._AbortMigration()
3771
      self._RevertDiskStatus()
3772
      raise errors.OpExecError("Could not migrate instance %s: %s" %
3773
                               (instance.name, msg))
3774
    time.sleep(10)
3775

    
3776
    instance.primary_node = target_node
3777
    # distribute new instance config to the other nodes
3778
    self.cfg.Update(instance)
3779

    
3780
    result = self.rpc.call_finalize_migration(target_node,
3781
                                              instance,
3782
                                              migration_info,
3783
                                              True)
3784
    msg = result.RemoteFailMsg()
3785
    if msg:
3786
      logging.error("Instance migration succeeded, but finalization failed:"
3787
                    " %s" % msg)
3788
      raise errors.OpExecError("Could not finalize instance migration: %s" %
3789
                               msg)
3790

    
3791
    self._EnsureSecondary(source_node)
3792
    self._WaitUntilSync()
3793
    self._GoStandalone()
3794
    self._GoReconnect(False)
3795
    self._WaitUntilSync()
3796

    
3797
    self.feedback_fn("* done")
3798

    
3799
  def Exec(self, feedback_fn):
3800
    """Perform the migration.
3801

3802
    """
3803
    self.feedback_fn = feedback_fn
3804

    
3805
    self.source_node = self.instance.primary_node
3806
    self.target_node = self.instance.secondary_nodes[0]
3807
    self.all_nodes = [self.source_node, self.target_node]
3808
    self.nodes_ip = {
3809
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
3810
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
3811
      }
3812
    if self.op.cleanup:
3813
      return self._ExecCleanup()
3814
    else:
3815
      return self._ExecMigration()
3816

    
3817

    
3818
def _CreateBlockDev(lu, node, instance, device, force_create,
3819
                    info, force_open):
3820
  """Create a tree of block devices on a given node.
3821

3822
  If this device type has to be created on secondaries, create it and
3823
  all its children.
3824

3825
  If not, just recurse to children keeping the same 'force' value.
3826

3827
  @param lu: the lu on whose behalf we execute
3828
  @param node: the node on which to create the device
3829
  @type instance: L{objects.Instance}
3830
  @param instance: the instance which owns the device
3831
  @type device: L{objects.Disk}
3832
  @param device: the device to create
3833
  @type force_create: boolean
3834
  @param force_create: whether to force creation of this device; this
3835
      will be change to True whenever we find a device which has
3836
      CreateOnSecondary() attribute
3837
  @param info: the extra 'metadata' we should attach to the device
3838
      (this will be represented as a LVM tag)
3839
  @type force_open: boolean
3840
  @param force_open: this parameter will be passes to the
3841
      L{backend.BlockdevCreate} function where it specifies
3842
      whether we run on primary or not, and it affects both
3843
      the child assembly and the device own Open() execution
3844

3845
  """
3846
  if device.CreateOnSecondary():
3847
    force_create = True
3848

    
3849
  if device.children:
3850
    for child in device.children:
3851
      _CreateBlockDev(lu, node, instance, child, force_create,
3852
                      info, force_open)
3853

    
3854
  if not force_create:
3855
    return
3856

    
3857
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
3858

    
3859

    
3860
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
3861
  """Create a single block device on a given node.
3862

3863
  This will not recurse over children of the device, so they must be
3864
  created in advance.
3865

3866
  @param lu: the lu on whose behalf we execute
3867
  @param node: the node on which to create the device
3868
  @type instance: L{objects.Instance}
3869
  @param instance: the instance which owns the device
3870
  @type device: L{objects.Disk}
3871
  @param device: the device to create
3872
  @param info: the extra 'metadata' we should attach to the device
3873
      (this will be represented as a LVM tag)
3874
  @type force_open: boolean
3875
  @param force_open: this parameter will be passes to the
3876
      L{backend.BlockdevCreate} function where it specifies
3877
      whether we run on primary or not, and it affects both
3878
      the child assembly and the device own Open() execution
3879

3880
  """
3881
  lu.cfg.SetDiskID(device, node)
3882
  result = lu.rpc.call_blockdev_create(node, device, device.size,
3883
                                       instance.name, force_open, info)
3884
  msg = result.RemoteFailMsg()
3885
  if msg:
3886
    raise errors.OpExecError("Can't create block device %s on"
3887
                             " node %s for instance %s: %s" %
3888
                             (device, node, instance.name, msg))
3889
  if device.physical_id is None:
3890
    device.physical_id = result.payload
3891

    
3892

    
3893
def _GenerateUniqueNames(lu, exts):
3894
  """Generate a suitable LV name.
3895

3896
  This will generate a logical volume name for the given instance.
3897

3898
  """
3899
  results = []
3900
  for val in exts:
3901
    new_id = lu.cfg.GenerateUniqueID()
3902
    results.append("%s%s" % (new_id, val))
3903
  return results
3904

    
3905

    
3906
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
3907
                         p_minor, s_minor):
3908
  """Generate a drbd8 device complete with its children.
3909

3910
  """
3911
  port = lu.cfg.AllocatePort()
3912
  vgname = lu.cfg.GetVGName()
3913
  shared_secret = lu.cfg.GenerateDRBDSecret()
3914
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
3915
                          logical_id=(vgname, names[0]))
3916
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
3917
                          logical_id=(vgname, names[1]))
3918
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
3919
                          logical_id=(primary, secondary, port,
3920
                                      p_minor, s_minor,
3921
                                      shared_secret),
3922
                          children=[dev_data, dev_meta],
3923
                          iv_name=iv_name)
3924
  return drbd_dev
3925

    
3926

    
3927
def _GenerateDiskTemplate(lu, template_name,
3928
                          instance_name, primary_node,
3929
                          secondary_nodes, disk_info,
3930
                          file_storage_dir, file_driver,
3931
                          base_index):
3932
  """Generate the entire disk layout for a given template type.
3933

3934
  """
3935
  #TODO: compute space requirements
3936

    
3937
  vgname = lu.cfg.GetVGName()
3938
  disk_count = len(disk_info)
3939
  disks = []
3940
  if template_name == constants.DT_DISKLESS:
3941
    pass
3942
  elif template_name == constants.DT_PLAIN:
3943
    if len(secondary_nodes) != 0:
3944
      raise errors.ProgrammerError("Wrong template configuration")
3945

    
3946
    names = _GenerateUniqueNames(lu, [".disk%d" % i
3947
                                      for i in range(disk_count)])
3948
    for idx, disk in enumerate(disk_info):
3949
      disk_index = idx + base_index
3950
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
3951
                              logical_id=(vgname, names[idx]),
3952
                              iv_name="disk/%d" % disk_index,
3953
                              mode=disk["mode"])
3954
      disks.append(disk_dev)
3955
  elif template_name == constants.DT_DRBD8:
3956
    if len(secondary_nodes) != 1:
3957
      raise errors.ProgrammerError("Wrong template configuration")
3958
    remote_node = secondary_nodes[0]
3959
    minors = lu.cfg.AllocateDRBDMinor(
3960
      [primary_node, remote_node] * len(disk_info), instance_name)
3961

    
3962
    names = []
3963
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % i
3964
                                               for i in range(disk_count)]):
3965
      names.append(lv_prefix + "_data")
3966
      names.append(lv_prefix + "_meta")
3967
    for idx, disk in enumerate(disk_info):
3968
      disk_index = idx + base_index
3969
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
3970
                                      disk["size"], names[idx*2:idx*2+2],
3971
                                      "disk/%d" % disk_index,
3972
                                      minors[idx*2], minors[idx*2+1])
3973
      disk_dev.mode = disk["mode"]
3974
      disks.append(disk_dev)
3975
  elif template_name == constants.DT_FILE:
3976
    if len(secondary_nodes) != 0:
3977
      raise errors.ProgrammerError("Wrong template configuration")
3978

    
3979
    for idx, disk in enumerate(disk_info):
3980
      disk_index = idx + base_index
3981
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
3982
                              iv_name="disk/%d" % disk_index,
3983
                              logical_id=(file_driver,
3984
                                          "%s/disk%d" % (file_storage_dir,
3985
                                                         idx)),
3986
                              mode=disk["mode"])
3987
      disks.append(disk_dev)
3988
  else:
3989
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
3990
  return disks
3991

    
3992

    
3993
def _GetInstanceInfoText(instance):
3994
  """Compute that text that should be added to the disk's metadata.
3995

3996
  """
3997
  return "originstname+%s" % instance.name
3998

    
3999

    
4000
def _CreateDisks(lu, instance):
4001
  """Create all disks for an instance.
4002

4003
  This abstracts away some work from AddInstance.
4004

4005
  @type lu: L{LogicalUnit}
4006
  @param lu: the logical unit on whose behalf we execute
4007
  @type instance: L{objects.Instance}
4008
  @param instance: the instance whose disks we should create
4009
  @rtype: boolean
4010
  @return: the success of the creation
4011

4012
  """
4013
  info = _GetInstanceInfoText(instance)
4014
  pnode = instance.primary_node
4015

    
4016
  if instance.disk_template == constants.DT_FILE:
4017
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4018
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
4019

    
4020
    if result.failed or not result.data:
4021
      raise errors.OpExecError("Could not connect to node '%s'" % pnode)
4022

    
4023
    if not result.data[0]:
4024
      raise errors.OpExecError("Failed to create directory '%s'" %
4025
                               file_storage_dir)
4026

    
4027
  # Note: this needs to be kept in sync with adding of disks in
4028
  # LUSetInstanceParams
4029
  for device in instance.disks:
4030
    logging.info("Creating volume %s for instance %s",
4031
                 device.iv_name, instance.name)
4032
    #HARDCODE
4033
    for node in instance.all_nodes:
4034
      f_create = node == pnode
4035
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
4036

    
4037

    
4038
def _RemoveDisks(lu, instance):
4039
  """Remove all disks for an instance.
4040

4041
  This abstracts away some work from `AddInstance()` and
4042
  `RemoveInstance()`. Note that in case some of the devices couldn't
4043
  be removed, the removal will continue with the other ones (compare
4044
  with `_CreateDisks()`).
4045

4046
  @type lu: L{LogicalUnit}
4047
  @param lu: the logical unit on whose behalf we execute
4048
  @type instance: L{objects.Instance}
4049
  @param instance: the instance whose disks we should remove
4050
  @rtype: boolean
4051
  @return: the success of the removal
4052

4053
  """
4054
  logging.info("Removing block devices for instance %s", instance.name)
4055

    
4056
  result = True
4057
  for device in instance.disks:
4058
    for node, disk in device.ComputeNodeTree(instance.primary_node):
4059
      lu.cfg.SetDiskID(disk, node)
4060
      result = lu.rpc.call_blockdev_remove(node, disk)
4061
      if result.failed or not result.data:
4062
        lu.proc.LogWarning("Could not remove block device %s on node %s,"
4063
                           " continuing anyway", device.iv_name, node)
4064
        result = False
4065

    
4066
  if instance.disk_template == constants.DT_FILE:
4067
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4068
    result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
4069
                                                 file_storage_dir)
4070
    if result.failed or not result.data:
4071
      logging.error("Could not remove directory '%s'", file_storage_dir)
4072
      result = False
4073

    
4074
  return result
4075

    
4076

    
4077
def _ComputeDiskSize(disk_template, disks):
4078
  """Compute disk size requirements in the volume group
4079

4080
  """
4081
  # Required free disk space as a function of disk and swap space
4082
  req_size_dict = {
4083
    constants.DT_DISKLESS: None,
4084
    constants.DT_PLAIN: sum(d["size"] for d in disks),
4085
    # 128 MB are added for drbd metadata for each disk
4086
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
4087
    constants.DT_FILE: None,
4088
  }
4089

    
4090
  if disk_template not in req_size_dict:
4091
    raise errors.ProgrammerError("Disk template '%s' size requirement"
4092
                                 " is unknown" %  disk_template)
4093

    
4094
  return req_size_dict[disk_template]
4095

    
4096

    
4097
def _CheckHVParams(lu, nodenames, hvname, hvparams):
4098
  """Hypervisor parameter validation.
4099

4100
  This function abstract the hypervisor parameter validation to be
4101
  used in both instance create and instance modify.
4102

4103
  @type lu: L{LogicalUnit}
4104
  @param lu: the logical unit for which we check
4105
  @type nodenames: list
4106
  @param nodenames: the list of nodes on which we should check
4107
  @type hvname: string
4108
  @param hvname: the name of the hypervisor we should use
4109
  @type hvparams: dict
4110
  @param hvparams: the parameters which we need to check
4111
  @raise errors.OpPrereqError: if the parameters are not valid
4112

4113
  """
4114
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
4115
                                                  hvname,
4116
                                                  hvparams)
4117
  for node in nodenames:
4118
    info = hvinfo[node]
4119
    if info.offline:
4120
      continue
4121
    msg = info.RemoteFailMsg()
4122
    if msg:
4123
      raise errors.OpPrereqError("Hypervisor parameter validation failed:"
4124
                                 " %s" % msg)
4125

    
4126

    
4127
class LUCreateInstance(LogicalUnit):
4128
  """Create an instance.
4129

4130
  """
4131
  HPATH = "instance-add"
4132
  HTYPE = constants.HTYPE_INSTANCE
4133
  _OP_REQP = ["instance_name", "disks", "disk_template",
4134
              "mode", "start",
4135
              "wait_for_sync", "ip_check", "nics",
4136
              "hvparams", "beparams"]
4137
  REQ_BGL = False
4138

    
4139
  def _ExpandNode(self, node):
4140
    """Expands and checks one node name.
4141

4142
    """
4143
    node_full = self.cfg.ExpandNodeName(node)
4144
    if node_full is None:
4145
      raise errors.OpPrereqError("Unknown node %s" % node)
4146
    return node_full
4147

    
4148
  def ExpandNames(self):
4149
    """ExpandNames for CreateInstance.
4150

4151
    Figure out the right locks for instance creation.
4152

4153
    """
4154
    self.needed_locks = {}
4155

    
4156
    # set optional parameters to none if they don't exist
4157
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
4158
      if not hasattr(self.op, attr):
4159
        setattr(self.op, attr, None)
4160

    
4161
    # cheap checks, mostly valid constants given
4162

    
4163
    # verify creation mode
4164
    if self.op.mode not in (constants.INSTANCE_CREATE,
4165
                            constants.INSTANCE_IMPORT):
4166
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
4167
                                 self.op.mode)
4168

    
4169
    # disk template and mirror node verification
4170
    if self.op.disk_template not in constants.DISK_TEMPLATES:
4171
      raise errors.OpPrereqError("Invalid disk template name")
4172

    
4173
    if self.op.hypervisor is None:
4174
      self.op.hypervisor = self.cfg.GetHypervisorType()
4175

    
4176
    cluster = self.cfg.GetClusterInfo()
4177
    enabled_hvs = cluster.enabled_hypervisors
4178
    if self.op.hypervisor not in enabled_hvs:
4179
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
4180
                                 " cluster (%s)" % (self.op.hypervisor,
4181
                                  ",".join(enabled_hvs)))
4182

    
4183
    # check hypervisor parameter syntax (locally)
4184

    
4185
    filled_hvp = cluster.FillDict(cluster.hvparams[self.op.hypervisor],
4186
                                  self.op.hvparams)
4187
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
4188
    hv_type.CheckParameterSyntax(filled_hvp)
4189

    
4190
    # fill and remember the beparams dict
4191
    utils.CheckBEParams(self.op.beparams)
4192
    self.be_full = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
4193
                                    self.op.beparams)
4194

    
4195
    #### instance parameters check
4196

    
4197
    # instance name verification
4198
    hostname1 = utils.HostInfo(self.op.instance_name)
4199
    self.op.instance_name = instance_name = hostname1.name
4200

    
4201
    # this is just a preventive check, but someone might still add this
4202
    # instance in the meantime, and creation will fail at lock-add time
4203
    if instance_name in self.cfg.GetInstanceList():
4204
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4205
                                 instance_name)
4206

    
4207
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
4208

    
4209
    # NIC buildup
4210
    self.nics = []
4211
    for nic in self.op.nics:
4212
      # ip validity checks
4213
      ip = nic.get("ip", None)
4214
      if ip is None or ip.lower() == "none":
4215
        nic_ip = None
4216
      elif ip.lower() == constants.VALUE_AUTO:
4217
        nic_ip = hostname1.ip
4218
      else:
4219
        if not utils.IsValidIP(ip):
4220
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
4221
                                     " like a valid IP" % ip)
4222
        nic_ip = ip
4223

    
4224
      # MAC address verification
4225
      mac = nic.get("mac", constants.VALUE_AUTO)
4226
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4227
        if not utils.IsValidMac(mac.lower()):
4228
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
4229
                                     mac)
4230
      # bridge verification
4231
      bridge = nic.get("bridge", None)
4232
      if bridge is None:
4233
        bridge = self.cfg.GetDefBridge()
4234
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, bridge=bridge))
4235

    
4236
    # disk checks/pre-build
4237
    self.disks = []
4238
    for disk in self.op.disks:
4239
      mode = disk.get("mode", constants.DISK_RDWR)
4240
      if mode not in constants.DISK_ACCESS_SET:
4241
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
4242
                                   mode)
4243
      size = disk.get("size", None)
4244
      if size is None:
4245
        raise errors.OpPrereqError("Missing disk size")
4246
      try:
4247
        size = int(size)
4248
      except ValueError:
4249
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
4250
      self.disks.append({"size": size, "mode": mode})
4251

    
4252
    # used in CheckPrereq for ip ping check
4253
    self.check_ip = hostname1.ip
4254

    
4255
    # file storage checks
4256
    if (self.op.file_driver and
4257
        not self.op.file_driver in constants.FILE_DRIVER):
4258
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
4259
                                 self.op.file_driver)
4260

    
4261
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
4262
      raise errors.OpPrereqError("File storage directory path not absolute")
4263

    
4264
    ### Node/iallocator related checks
4265
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
4266
      raise errors.OpPrereqError("One and only one of iallocator and primary"
4267
                                 " node must be given")
4268

    
4269
    if self.op.iallocator:
4270
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4271
    else:
4272
      self.op.pnode = self._ExpandNode(self.op.pnode)
4273
      nodelist = [self.op.pnode]
4274
      if self.op.snode is not None:
4275
        self.op.snode = self._ExpandNode(self.op.snode)
4276
        nodelist.append(self.op.snode)
4277
      self.needed_locks[locking.LEVEL_NODE] = nodelist
4278

    
4279
    # in case of import lock the source node too
4280
    if self.op.mode == constants.INSTANCE_IMPORT:
4281
      src_node = getattr(self.op, "src_node", None)
4282
      src_path = getattr(self.op, "src_path", None)
4283

    
4284
      if src_path is None:
4285
        self.op.src_path = src_path = self.op.instance_name
4286

    
4287
      if src_node is None:
4288
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4289
        self.op.src_node = None
4290
        if os.path.isabs(src_path):
4291
          raise errors.OpPrereqError("Importing an instance from an absolute"
4292
                                     " path requires a source node option.")
4293
      else:
4294
        self.op.src_node = src_node = self._ExpandNode(src_node)
4295
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4296
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
4297
        if not os.path.isabs(src_path):
4298
          self.op.src_path = src_path = \
4299
            os.path.join(constants.EXPORT_DIR, src_path)
4300

    
4301
    else: # INSTANCE_CREATE
4302
      if getattr(self.op, "os_type", None) is None:
4303
        raise errors.OpPrereqError("No guest OS specified")
4304

    
4305
  def _RunAllocator(self):
4306
    """Run the allocator based on input opcode.
4307

4308
    """
4309
    nics = [n.ToDict() for n in self.nics]
4310
    ial = IAllocator(self,
4311
                     mode=constants.IALLOCATOR_MODE_ALLOC,
4312
                     name=self.op.instance_name,
4313
                     disk_template=self.op.disk_template,
4314
                     tags=[],
4315
                     os=self.op.os_type,
4316
                     vcpus=self.be_full[constants.BE_VCPUS],
4317
                     mem_size=self.be_full[constants.BE_MEMORY],
4318
                     disks=self.disks,
4319
                     nics=nics,
4320
                     hypervisor=self.op.hypervisor,
4321
                     )
4322

    
4323
    ial.Run(self.op.iallocator)
4324

    
4325
    if not ial.success:
4326
      raise errors.OpPrereqError("Can't compute nodes using"
4327
                                 " iallocator '%s': %s" % (self.op.iallocator,
4328
                                                           ial.info))
4329
    if len(ial.nodes) != ial.required_nodes:
4330
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
4331
                                 " of nodes (%s), required %s" %
4332
                                 (self.op.iallocator, len(ial.nodes),
4333
                                  ial.required_nodes))
4334
    self.op.pnode = ial.nodes[0]
4335
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4336
                 self.op.instance_name, self.op.iallocator,
4337
                 ", ".join(ial.nodes))
4338
    if ial.required_nodes == 2:
4339
      self.op.snode = ial.nodes[1]
4340

    
4341
  def BuildHooksEnv(self):
4342
    """Build hooks env.
4343

4344
    This runs on master, primary and secondary nodes of the instance.
4345

4346
    """
4347
    env = {
4348
      "INSTANCE_DISK_TEMPLATE": self.op.disk_template,
4349
      "INSTANCE_DISK_SIZE": ",".join(str(d["size"]) for d in self.disks),
4350
      "INSTANCE_ADD_MODE": self.op.mode,
4351
      }
4352
    if self.op.mode == constants.INSTANCE_IMPORT:
4353
      env["INSTANCE_SRC_NODE"] = self.op.src_node
4354
      env["INSTANCE_SRC_PATH"] = self.op.src_path
4355
      env["INSTANCE_SRC_IMAGES"] = self.src_images
4356

    
4357
    env.update(_BuildInstanceHookEnv(name=self.op.instance_name,
4358
      primary_node=self.op.pnode,
4359
      secondary_nodes=self.secondaries,
4360
      status=self.op.start,
4361
      os_type=self.op.os_type,
4362
      memory=self.be_full[constants.BE_MEMORY],
4363
      vcpus=self.be_full[constants.BE_VCPUS],
4364
      nics=[(n.ip, n.bridge, n.mac) for n in self.nics],
4365
    ))
4366

    
4367
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
4368
          self.secondaries)
4369
    return env, nl, nl
4370

    
4371

    
4372
  def CheckPrereq(self):
4373
    """Check prerequisites.
4374

4375
    """
4376
    if (not self.cfg.GetVGName() and
4377
        self.op.disk_template not in constants.DTS_NOT_LVM):
4378
      raise errors.OpPrereqError("Cluster does not support lvm-based"
4379
                                 " instances")
4380

    
4381

    
4382
    if self.op.mode == constants.INSTANCE_IMPORT:
4383
      src_node = self.op.src_node
4384
      src_path = self.op.src_path
4385

    
4386
      if src_node is None:
4387
        exp_list = self.rpc.call_export_list(
4388
          self.acquired_locks[locking.LEVEL_NODE])
4389
        found = False
4390
        for node in exp_list:
4391
          if not exp_list[node].failed and src_path in exp_list[node].data:
4392
            found = True
4393
            self.op.src_node = src_node = node
4394
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
4395
                                                       src_path)
4396
            break
4397
        if not found:
4398
          raise errors.OpPrereqError("No export found for relative path %s" %
4399
                                      src_path)
4400

    
4401
      _CheckNodeOnline(self, src_node)
4402
      result = self.rpc.call_export_info(src_node, src_path)
4403
      result.Raise()
4404
      if not result.data:
4405
        raise errors.OpPrereqError("No export found in dir %s" % src_path)
4406

    
4407
      export_info = result.data
4408
      if not export_info.has_section(constants.INISECT_EXP):
4409
        raise errors.ProgrammerError("Corrupted export config")
4410

    
4411
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
4412
      if (int(ei_version) != constants.EXPORT_VERSION):
4413
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
4414
                                   (ei_version, constants.EXPORT_VERSION))
4415

    
4416
      # Check that the new instance doesn't have less disks than the export
4417
      instance_disks = len(self.disks)
4418
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
4419
      if instance_disks < export_disks:
4420
        raise errors.OpPrereqError("Not enough disks to import."
4421
                                   " (instance: %d, export: %d)" %
4422
                                   (instance_disks, export_disks))
4423

    
4424
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
4425
      disk_images = []
4426
      for idx in range(export_disks):
4427
        option = 'disk%d_dump' % idx
4428
        if export_info.has_option(constants.INISECT_INS, option):
4429
          # FIXME: are the old os-es, disk sizes, etc. useful?
4430
          export_name = export_info.get(constants.INISECT_INS, option)
4431
          image = os.path.join(src_path, export_name)
4432
          disk_images.append(image)
4433
        else:
4434
          disk_images.append(False)
4435

    
4436
      self.src_images = disk_images
4437

    
4438
      old_name = export_info.get(constants.INISECT_INS, 'name')
4439
      # FIXME: int() here could throw a ValueError on broken exports
4440
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
4441
      if self.op.instance_name == old_name:
4442
        for idx, nic in enumerate(self.nics):
4443
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
4444
            nic_mac_ini = 'nic%d_mac' % idx
4445
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
4446

    
4447
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
4448
    if self.op.start and not self.op.ip_check:
4449
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
4450
                                 " adding an instance in start mode")
4451

    
4452
    if self.op.ip_check:
4453
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
4454
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4455
                                   (self.check_ip, self.op.instance_name))
4456

    
4457
    #### allocator run
4458

    
4459
    if self.op.iallocator is not None:
4460
      self._RunAllocator()
4461

    
4462
    #### node related checks
4463

    
4464
    # check primary node
4465
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
4466
    assert self.pnode is not None, \
4467
      "Cannot retrieve locked node %s" % self.op.pnode
4468
    if pnode.offline:
4469
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
4470
                                 pnode.name)
4471

    
4472
    self.secondaries = []
4473

    
4474
    # mirror node verification
4475
    if self.op.disk_template in constants.DTS_NET_MIRROR:
4476
      if self.op.snode is None:
4477
        raise errors.OpPrereqError("The networked disk templates need"
4478
                                   " a mirror node")
4479
      if self.op.snode == pnode.name:
4480
        raise errors.OpPrereqError("The secondary node cannot be"
4481
                                   " the primary node.")
4482
      self.secondaries.append(self.op.snode)
4483
      _CheckNodeOnline(self, self.op.snode)
4484

    
4485
    nodenames = [pnode.name] + self.secondaries
4486

    
4487
    req_size = _ComputeDiskSize(self.op.disk_template,
4488
                                self.disks)
4489

    
4490
    # Check lv size requirements
4491
    if req_size is not None:
4492
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
4493
                                         self.op.hypervisor)
4494
      for node in nodenames:
4495
        info = nodeinfo[node]
4496
        info.Raise()
4497
        info = info.data
4498
        if not info:
4499
          raise errors.OpPrereqError("Cannot get current information"
4500
                                     " from node '%s'" % node)
4501
        vg_free = info.get('vg_free', None)
4502
        if not isinstance(vg_free, int):
4503
          raise errors.OpPrereqError("Can't compute free disk space on"
4504
                                     " node %s" % node)
4505
        if req_size > info['vg_free']:
4506
          raise errors.OpPrereqError("Not enough disk space on target node %s."
4507
                                     " %d MB available, %d MB required" %
4508
                                     (node, info['vg_free'], req_size))
4509

    
4510
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
4511

    
4512
    # os verification
4513
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
4514
    result.Raise()
4515
    if not isinstance(result.data, objects.OS):
4516
      raise errors.OpPrereqError("OS '%s' not in supported os list for"
4517
                                 " primary node"  % self.op.os_type)
4518

    
4519
    # bridge check on primary node
4520
    bridges = [n.bridge for n in self.nics]
4521
    result = self.rpc.call_bridges_exist(self.pnode.name, bridges)
4522
    result.Raise()
4523
    if not result.data:
4524
      raise errors.OpPrereqError("One of the target bridges '%s' does not"
4525
                                 " exist on destination node '%s'" %
4526
                                 (",".join(bridges), pnode.name))
4527

    
4528
    # memory check on primary node
4529
    if self.op.start:
4530
      _CheckNodeFreeMemory(self, self.pnode.name,
4531
                           "creating instance %s" % self.op.instance_name,
4532
                           self.be_full[constants.BE_MEMORY],
4533
                           self.op.hypervisor)
4534

    
4535
  def Exec(self, feedback_fn):
4536
    """Create and add the instance to the cluster.
4537

4538
    """
4539
    instance = self.op.instance_name
4540
    pnode_name = self.pnode.name
4541

    
4542
    for nic in self.nics:
4543
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4544
        nic.mac = self.cfg.GenerateMAC()
4545

    
4546
    ht_kind = self.op.hypervisor
4547
    if ht_kind in constants.HTS_REQ_PORT:
4548
      network_port = self.cfg.AllocatePort()
4549
    else:
4550
      network_port = None
4551

    
4552
    ##if self.op.vnc_bind_address is None:
4553
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
4554

    
4555
    # this is needed because os.path.join does not accept None arguments
4556
    if self.op.file_storage_dir is None:
4557
      string_file_storage_dir = ""
4558
    else:
4559
      string_file_storage_dir = self.op.file_storage_dir
4560

    
4561
    # build the full file storage dir path
4562
    file_storage_dir = os.path.normpath(os.path.join(
4563
                                        self.cfg.GetFileStorageDir(),
4564
                                        string_file_storage_dir, instance))
4565

    
4566

    
4567
    disks = _GenerateDiskTemplate(self,
4568
                                  self.op.disk_template,
4569
                                  instance, pnode_name,
4570
                                  self.secondaries,
4571
                                  self.disks,
4572
                                  file_storage_dir,
4573
                                  self.op.file_driver,
4574
                                  0)
4575

    
4576
    iobj = objects.Instance(name=instance, os=self.op.os_type,
4577
                            primary_node=pnode_name,
4578
                            nics=self.nics, disks=disks,
4579
                            disk_template=self.op.disk_template,
4580
                            admin_up=False,
4581
                            network_port=network_port,
4582
                            beparams=self.op.beparams,
4583
                            hvparams=self.op.hvparams,
4584
                            hypervisor=self.op.hypervisor,
4585
                            )
4586

    
4587
    feedback_fn("* creating instance disks...")
4588
    try:
4589
      _CreateDisks(self, iobj)
4590
    except errors.OpExecError:
4591
      self.LogWarning("Device creation failed, reverting...")
4592
      try:
4593
        _RemoveDisks(self, iobj)
4594
      finally:
4595
        self.cfg.ReleaseDRBDMinors(instance)
4596
        raise
4597

    
4598
    feedback_fn("adding instance %s to cluster config" % instance)
4599

    
4600
    self.cfg.AddInstance(iobj)
4601
    # Declare that we don't want to remove the instance lock anymore, as we've
4602
    # added the instance to the config
4603
    del self.remove_locks[locking.LEVEL_INSTANCE]
4604
    # Unlock all the nodes
4605
    if self.op.mode == constants.INSTANCE_IMPORT:
4606
      nodes_keep = [self.op.src_node]
4607
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
4608
                       if node != self.op.src_node]
4609
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
4610
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
4611
    else:
4612
      self.context.glm.release(locking.LEVEL_NODE)
4613
      del self.acquired_locks[locking.LEVEL_NODE]
4614

    
4615
    if self.op.wait_for_sync:
4616
      disk_abort = not _WaitForSync(self, iobj)
4617
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
4618
      # make sure the disks are not degraded (still sync-ing is ok)
4619
      time.sleep(15)
4620
      feedback_fn("* checking mirrors status")
4621
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
4622
    else:
4623
      disk_abort = False
4624

    
4625
    if disk_abort:
4626
      _RemoveDisks(self, iobj)
4627
      self.cfg.RemoveInstance(iobj.name)
4628
      # Make sure the instance lock gets removed
4629
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
4630
      raise errors.OpExecError("There are some degraded disks for"
4631
                               " this instance")
4632

    
4633
    feedback_fn("creating os for instance %s on node %s" %
4634
                (instance, pnode_name))
4635

    
4636
    if iobj.disk_template != constants.DT_DISKLESS:
4637
      if self.op.mode == constants.INSTANCE_CREATE:
4638
        feedback_fn("* running the instance OS create scripts...")
4639
        result = self.rpc.call_instance_os_add(pnode_name, iobj)
4640
        msg = result.RemoteFailMsg()
4641
        if msg:
4642
          raise errors.OpExecError("Could not add os for instance %s"
4643
                                   " on node %s: %s" %
4644
                                   (instance, pnode_name, msg))
4645

    
4646
      elif self.op.mode == constants.INSTANCE_IMPORT:
4647
        feedback_fn("* running the instance OS import scripts...")
4648
        src_node = self.op.src_node
4649
        src_images = self.src_images
4650
        cluster_name = self.cfg.GetClusterName()
4651
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
4652
                                                         src_node, src_images,
4653
                                                         cluster_name)
4654
        import_result.Raise()
4655
        for idx, result in enumerate(import_result.data):
4656
          if not result:
4657
            self.LogWarning("Could not import the image %s for instance"
4658
                            " %s, disk %d, on node %s" %
4659
                            (src_images[idx], instance, idx, pnode_name))
4660
      else:
4661
        # also checked in the prereq part
4662
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
4663
                                     % self.op.mode)
4664

    
4665
    if self.op.start:
4666
      iobj.admin_up = True
4667
      self.cfg.Update(iobj)
4668
      logging.info("Starting instance %s on node %s", instance, pnode_name)
4669
      feedback_fn("* starting instance...")
4670
      result = self.rpc.call_instance_start(pnode_name, iobj, None)
4671
      msg = result.RemoteFailMsg()
4672
      if msg:
4673
        raise errors.OpExecError("Could not start instance: %s" % msg)
4674

    
4675

    
4676
class LUConnectConsole(NoHooksLU):
4677
  """Connect to an instance's console.
4678

4679
  This is somewhat special in that it returns the command line that
4680
  you need to run on the master node in order to connect to the
4681
  console.
4682

4683
  """
4684
  _OP_REQP = ["instance_name"]
4685
  REQ_BGL = False
4686

    
4687
  def ExpandNames(self):
4688
    self._ExpandAndLockInstance()
4689

    
4690
  def CheckPrereq(self):
4691
    """Check prerequisites.
4692

4693
    This checks that the instance is in the cluster.
4694

4695
    """
4696
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4697
    assert self.instance is not None, \
4698
      "Cannot retrieve locked instance %s" % self.op.instance_name
4699
    _CheckNodeOnline(self, self.instance.primary_node)
4700

    
4701
  def Exec(self, feedback_fn):
4702
    """Connect to the console of an instance
4703

4704
    """
4705
    instance = self.instance
4706
    node = instance.primary_node
4707

    
4708
    node_insts = self.rpc.call_instance_list([node],
4709
                                             [instance.hypervisor])[node]
4710
    node_insts.Raise()
4711

    
4712
    if instance.name not in node_insts.data:
4713
      raise errors.OpExecError("Instance %s is not running." % instance.name)
4714

    
4715
    logging.debug("Connecting to console of %s on %s", instance.name, node)
4716

    
4717
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
4718
    cluster = self.cfg.GetClusterInfo()
4719
    # beparams and hvparams are passed separately, to avoid editing the
4720
    # instance and then saving the defaults in the instance itself.
4721
    hvparams = cluster.FillHV(instance)
4722
    beparams = cluster.FillBE(instance)
4723
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
4724

    
4725
    # build ssh cmdline
4726
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
4727

    
4728

    
4729
class LUReplaceDisks(LogicalUnit):
4730
  """Replace the disks of an instance.
4731

4732
  """
4733
  HPATH = "mirrors-replace"
4734
  HTYPE = constants.HTYPE_INSTANCE
4735
  _OP_REQP = ["instance_name", "mode", "disks"]
4736
  REQ_BGL = False
4737

    
4738
  def CheckArguments(self):
4739
    if not hasattr(self.op, "remote_node"):
4740
      self.op.remote_node = None
4741
    if not hasattr(self.op, "iallocator"):
4742
      self.op.iallocator = None
4743

    
4744
    # check for valid parameter combination
4745
    cnt = [self.op.remote_node, self.op.iallocator].count(None)
4746
    if self.op.mode == constants.REPLACE_DISK_CHG:
4747
      if cnt == 2:
4748
        raise errors.OpPrereqError("When changing the secondary either an"
4749
                                   " iallocator script must be used or the"
4750
                                   " new node given")
4751
      elif cnt == 0:
4752
        raise errors.OpPrereqError("Give either the iallocator or the new"
4753
                                   " secondary, not both")
4754
    else: # not replacing the secondary
4755
      if cnt != 2:
4756
        raise errors.OpPrereqError("The iallocator and new node options can"
4757
                                   " be used only when changing the"
4758
                                   " secondary node")
4759

    
4760
  def ExpandNames(self):
4761
    self._ExpandAndLockInstance()
4762

    
4763
    if self.op.iallocator is not None:
4764
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4765
    elif self.op.remote_node is not None:
4766
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
4767
      if remote_node is None:
4768
        raise errors.OpPrereqError("Node '%s' not known" %
4769
                                   self.op.remote_node)
4770
      self.op.remote_node = remote_node
4771
      # Warning: do not remove the locking of the new secondary here
4772
      # unless DRBD8.AddChildren is changed to work in parallel;
4773
      # currently it doesn't since parallel invocations of
4774
      # FindUnusedMinor will conflict
4775
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
4776
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4777
    else:
4778
      self.needed_locks[locking.LEVEL_NODE] = []
4779
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4780

    
4781
  def DeclareLocks(self, level):
4782
    # If we're not already locking all nodes in the set we have to declare the
4783
    # instance's primary/secondary nodes.
4784
    if (level == locking.LEVEL_NODE and
4785
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
4786
      self._LockInstancesNodes()
4787

    
4788
  def _RunAllocator(self):
4789
    """Compute a new secondary node using an IAllocator.
4790

4791
    """
4792
    ial = IAllocator(self,
4793
                     mode=constants.IALLOCATOR_MODE_RELOC,
4794
                     name=self.op.instance_name,
4795
                     relocate_from=[self.sec_node])
4796

    
4797
    ial.Run(self.op.iallocator)
4798

    
4799
    if not ial.success:
4800
      raise errors.OpPrereqError("Can't compute nodes using"
4801
                                 " iallocator '%s': %s" % (self.op.iallocator,
4802
                                                           ial.info))
4803
    if len(ial.nodes) != ial.required_nodes:
4804
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
4805
                                 " of nodes (%s), required %s" %
4806
                                 (len(ial.nodes), ial.required_nodes))
4807
    self.op.remote_node = ial.nodes[0]
4808
    self.LogInfo("Selected new secondary for the instance: %s",
4809
                 self.op.remote_node)
4810

    
4811
  def BuildHooksEnv(self):
4812
    """Build hooks env.
4813

4814
    This runs on the master, the primary and all the secondaries.
4815

4816
    """
4817
    env = {
4818
      "MODE": self.op.mode,
4819
      "NEW_SECONDARY": self.op.remote_node,
4820
      "OLD_SECONDARY": self.instance.secondary_nodes[0],
4821
      }
4822
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4823
    nl = [
4824
      self.cfg.GetMasterNode(),
4825
      self.instance.primary_node,
4826
      ]
4827
    if self.op.remote_node is not None:
4828
      nl.append(self.op.remote_node)
4829
    return env, nl, nl
4830

    
4831
  def CheckPrereq(self):
4832
    """Check prerequisites.
4833

4834
    This checks that the instance is in the cluster.
4835

4836
    """
4837
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4838
    assert instance is not None, \
4839
      "Cannot retrieve locked instance %s" % self.op.instance_name
4840
    self.instance = instance
4841

    
4842
    if instance.disk_template != constants.DT_DRBD8:
4843
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
4844
                                 " instances")
4845

    
4846
    if len(instance.secondary_nodes) != 1:
4847
      raise errors.OpPrereqError("The instance has a strange layout,"
4848
                                 " expected one secondary but found %d" %
4849
                                 len(instance.secondary_nodes))
4850

    
4851
    self.sec_node = instance.secondary_nodes[0]
4852

    
4853
    if self.op.iallocator is not None:
4854
      self._RunAllocator()
4855

    
4856
    remote_node = self.op.remote_node
4857
    if remote_node is not None:
4858
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
4859
      assert self.remote_node_info is not None, \
4860
        "Cannot retrieve locked node %s" % remote_node
4861
    else:
4862
      self.remote_node_info = None
4863
    if remote_node == instance.primary_node:
4864
      raise errors.OpPrereqError("The specified node is the primary node of"
4865
                                 " the instance.")
4866
    elif remote_node == self.sec_node:
4867
      raise errors.OpPrereqError("The specified node is already the"
4868
                                 " secondary node of the instance.")
4869

    
4870
    if self.op.mode == constants.REPLACE_DISK_PRI:
4871
      n1 = self.tgt_node = instance.primary_node
4872
      n2 = self.oth_node = self.sec_node
4873
    elif self.op.mode == constants.REPLACE_DISK_SEC:
4874
      n1 = self.tgt_node = self.sec_node
4875
      n2 = self.oth_node = instance.primary_node
4876
    elif self.op.mode == constants.REPLACE_DISK_CHG:
4877
      n1 = self.new_node = remote_node
4878
      n2 = self.oth_node = instance.primary_node
4879
      self.tgt_node = self.sec_node
4880
    else:
4881
      raise errors.ProgrammerError("Unhandled disk replace mode")
4882

    
4883
    _CheckNodeOnline(self, n1)
4884
    _CheckNodeOnline(self, n2)
4885

    
4886
    if not self.op.disks:
4887
      self.op.disks = range(len(instance.disks))
4888

    
4889
    for disk_idx in self.op.disks:
4890
      instance.FindDisk(disk_idx)
4891

    
4892
  def _ExecD8DiskOnly(self, feedback_fn):
4893
    """Replace a disk on the primary or secondary for dbrd8.
4894

4895
    The algorithm for replace is quite complicated:
4896

4897
      1. for each disk to be replaced:
4898

4899
        1. create new LVs on the target node with unique names
4900
        1. detach old LVs from the drbd device
4901
        1. rename old LVs to name_replaced.<time_t>
4902
        1. rename new LVs to old LVs
4903
        1. attach the new LVs (with the old names now) to the drbd device
4904

4905
      1. wait for sync across all devices
4906

4907
      1. for each modified disk:
4908

4909
        1. remove old LVs (which have the name name_replaces.<time_t>)
4910

4911
    Failures are not very well handled.
4912

4913
    """
4914
    steps_total = 6
4915
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
4916
    instance = self.instance
4917
    iv_names = {}
4918
    vgname = self.cfg.GetVGName()
4919
    # start of work
4920
    cfg = self.cfg
4921
    tgt_node = self.tgt_node
4922
    oth_node = self.oth_node
4923

    
4924
    # Step: check device activation
4925
    self.proc.LogStep(1, steps_total, "check device existence")
4926
    info("checking volume groups")
4927
    my_vg = cfg.GetVGName()
4928
    results = self.rpc.call_vg_list([oth_node, tgt_node])
4929
    if not results:
4930
      raise errors.OpExecError("Can't list volume groups on the nodes")
4931
    for node in oth_node, tgt_node:
4932
      res = results[node]
4933
      if res.failed or not res.data or my_vg not in res.data:
4934
        raise errors.OpExecError("Volume group '%s' not found on %s" %
4935
                                 (my_vg, node))
4936
    for idx, dev in enumerate(instance.disks):
4937
      if idx not in self.op.disks:
4938
        continue
4939
      for node in tgt_node, oth_node:
4940
        info("checking disk/%d on %s" % (idx, node))
4941
        cfg.SetDiskID(dev, node)
4942
        result = self.rpc.call_blockdev_find(node, dev)
4943
        msg = result.RemoteFailMsg()
4944
        if not msg and not result.payload:
4945
          msg = "disk not found"
4946
        if msg:
4947
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
4948
                                   (idx, node, msg))
4949

    
4950
    # Step: check other node consistency
4951
    self.proc.LogStep(2, steps_total, "check peer consistency")
4952
    for idx, dev in enumerate(instance.disks):
4953
      if idx not in self.op.disks:
4954
        continue
4955
      info("checking disk/%d consistency on %s" % (idx, oth_node))
4956
      if not _CheckDiskConsistency(self, dev, oth_node,
4957
                                   oth_node==instance.primary_node):
4958
        raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe"
4959
                                 " to replace disks on this node (%s)" %
4960
                                 (oth_node, tgt_node))
4961

    
4962
    # Step: create new storage
4963
    self.proc.LogStep(3, steps_total, "allocate new storage")
4964
    for idx, dev in enumerate(instance.disks):
4965
      if idx not in self.op.disks:
4966
        continue
4967
      size = dev.size
4968
      cfg.SetDiskID(dev, tgt_node)
4969
      lv_names = [".disk%d_%s" % (idx, suf)
4970
                  for suf in ["data", "meta"]]
4971
      names = _GenerateUniqueNames(self, lv_names)
4972
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=size,
4973
                             logical_id=(vgname, names[0]))
4974
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
4975
                             logical_id=(vgname, names[1]))
4976
      new_lvs = [lv_data, lv_meta]
4977
      old_lvs = dev.children
4978
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
4979
      info("creating new local storage on %s for %s" %
4980
           (tgt_node, dev.iv_name))
4981
      # we pass force_create=True to force the LVM creation
4982
      for new_lv in new_lvs:
4983
        _CreateBlockDev(self, tgt_node, instance, new_lv, True,
4984
                        _GetInstanceInfoText(instance), False)
4985

    
4986
    # Step: for each lv, detach+rename*2+attach
4987
    self.proc.LogStep(4, steps_total, "change drbd configuration")
4988
    for dev, old_lvs, new_lvs in iv_names.itervalues():
4989
      info("detaching %s drbd from local storage" % dev.iv_name)
4990
      result = self.rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs)
4991
      result.Raise()
4992
      if not result.data:
4993
        raise errors.OpExecError("Can't detach drbd from local storage on node"
4994
                                 " %s for device %s" % (tgt_node, dev.iv_name))
4995
      #dev.children = []
4996
      #cfg.Update(instance)
4997

    
4998
      # ok, we created the new LVs, so now we know we have the needed
4999
      # storage; as such, we proceed on the target node to rename
5000
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
5001
      # using the assumption that logical_id == physical_id (which in
5002
      # turn is the unique_id on that node)
5003

    
5004
      # FIXME(iustin): use a better name for the replaced LVs
5005
      temp_suffix = int(time.time())
5006
      ren_fn = lambda d, suff: (d.physical_id[0],
5007
                                d.physical_id[1] + "_replaced-%s" % suff)
5008
      # build the rename list based on what LVs exist on the node
5009
      rlist = []
5010
      for to_ren in old_lvs:
5011
        result = self.rpc.call_blockdev_find(tgt_node, to_ren)
5012
        if not result.RemoteFailMsg() and result.payload:
5013
          # device exists
5014
          rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
5015

    
5016
      info("renaming the old LVs on the target node")
5017
      result = self.rpc.call_blockdev_rename(tgt_node, rlist)
5018
      result.Raise()
5019
      if not result.data:
5020
        raise errors.OpExecError("Can't rename old LVs on node %s" % tgt_node)
5021
      # now we rename the new LVs to the old LVs
5022
      info("renaming the new LVs on the target node")
5023
      rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
5024
      result = self.rpc.call_blockdev_rename(tgt_node, rlist)
5025
      result.Raise()
5026
      if not result.data:
5027
        raise errors.OpExecError("Can't rename new LVs on node %s" % tgt_node)
5028

    
5029
      for old, new in zip(old_lvs, new_lvs):
5030
        new.logical_id = old.logical_id
5031
        cfg.SetDiskID(new, tgt_node)
5032

    
5033
      for disk in old_lvs:
5034
        disk.logical_id = ren_fn(disk, temp_suffix)
5035
        cfg.SetDiskID(disk, tgt_node)
5036

    
5037
      # now that the new lvs have the old name, we can add them to the device
5038
      info("adding new mirror component on %s" % tgt_node)
5039
      result = self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs)
5040
      if result.failed or not result.data:
5041
        for new_lv in new_lvs:
5042
          result = self.rpc.call_blockdev_remove(tgt_node, new_lv)
5043
          if result.failed or not result.data:
5044
            warning("Can't rollback device %s", hint="manually cleanup unused"
5045
                    " logical volumes")
5046
        raise errors.OpExecError("Can't add local storage to drbd")
5047

    
5048
      dev.children = new_lvs
5049
      cfg.Update(instance)
5050

    
5051
    # Step: wait for sync
5052

    
5053
    # this can fail as the old devices are degraded and _WaitForSync
5054
    # does a combined result over all disks, so we don't check its
5055
    # return value
5056
    self.proc.LogStep(5, steps_total, "sync devices")
5057
    _WaitForSync(self, instance, unlock=True)
5058

    
5059
    # so check manually all the devices
5060
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5061
      cfg.SetDiskID(dev, instance.primary_node)
5062
      result = self.rpc.call_blockdev_find(instance.primary_node, dev)
5063
      msg = result.RemoteFailMsg()
5064
      if not msg and not result.payload:
5065
        msg = "disk not found"
5066
      if msg:
5067
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
5068
                                 (name, msg))
5069
      if result.payload[5]:
5070
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
5071

    
5072
    # Step: remove old storage
5073
    self.proc.LogStep(6, steps_total, "removing old storage")
5074
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5075
      info("remove logical volumes for %s" % name)
5076
      for lv in old_lvs:
5077
        cfg.SetDiskID(lv, tgt_node)
5078
        result = self.rpc.call_blockdev_remove(tgt_node, lv)
5079
        if result.failed or not result.data:
5080
          warning("Can't remove old LV", hint="manually remove unused LVs")
5081
          continue
5082

    
5083
  def _ExecD8Secondary(self, feedback_fn):
5084
    """Replace the secondary node for drbd8.
5085

5086
    The algorithm for replace is quite complicated:
5087
      - for all disks of the instance:
5088
        - create new LVs on the new node with same names
5089
        - shutdown the drbd device on the old secondary
5090
        - disconnect the drbd network on the primary
5091
        - create the drbd device on the new secondary
5092
        - network attach the drbd on the primary, using an artifice:
5093
          the drbd code for Attach() will connect to the network if it
5094
          finds a device which is connected to the good local disks but
5095
          not network enabled
5096
      - wait for sync across all devices
5097
      - remove all disks from the old secondary
5098

5099
    Failures are not very well handled.
5100

5101
    """
5102
    steps_total = 6
5103
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
5104
    instance = self.instance
5105
    iv_names = {}
5106
    # start of work
5107
    cfg = self.cfg
5108
    old_node = self.tgt_node
5109
    new_node = self.new_node
5110
    pri_node = instance.primary_node
5111
    nodes_ip = {
5112
      old_node: self.cfg.GetNodeInfo(old_node).secondary_ip,
5113
      new_node: self.cfg.GetNodeInfo(new_node).secondary_ip,
5114
      pri_node: self.cfg.GetNodeInfo(pri_node).secondary_ip,
5115
      }
5116

    
5117
    # Step: check device activation
5118
    self.proc.LogStep(1, steps_total, "check device existence")
5119
    info("checking volume groups")
5120
    my_vg = cfg.GetVGName()
5121
    results = self.rpc.call_vg_list([pri_node, new_node])
5122
    for node in pri_node, new_node:
5123
      res = results[node]
5124
      if res.failed or not res.data or my_vg not in res.data:
5125
        raise errors.OpExecError("Volume group '%s' not found on %s" %
5126
                                 (my_vg, node))
5127
    for idx, dev in enumerate(instance.disks):
5128
      if idx not in self.op.disks:
5129
        continue
5130
      info("checking disk/%d on %s" % (idx, pri_node))
5131
      cfg.SetDiskID(dev, pri_node)
5132
      result = self.rpc.call_blockdev_find(pri_node, dev)
5133
      msg = result.RemoteFailMsg()
5134
      if not msg and not result.payload:
5135
        msg = "disk not found"
5136
      if msg:
5137
        raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5138
                                 (idx, pri_node, msg))
5139

    
5140
    # Step: check other node consistency
5141
    self.proc.LogStep(2, steps_total, "check peer consistency")
5142
    for idx, dev in enumerate(instance.disks):
5143
      if idx not in self.op.disks:
5144
        continue
5145
      info("checking disk/%d consistency on %s" % (idx, pri_node))
5146
      if not _CheckDiskConsistency(self, dev, pri_node, True, ldisk=True):
5147
        raise errors.OpExecError("Primary node (%s) has degraded storage,"
5148
                                 " unsafe to replace the secondary" %
5149
                                 pri_node)
5150

    
5151
    # Step: create new storage
5152
    self.proc.LogStep(3, steps_total, "allocate new storage")
5153
    for idx, dev in enumerate(instance.disks):
5154
      info("adding new local storage on %s for disk/%d" %
5155
           (new_node, idx))
5156
      # we pass force_create=True to force LVM creation
5157
      for new_lv in dev.children:
5158
        _CreateBlockDev(self, new_node, instance, new_lv, True,
5159
                        _GetInstanceInfoText(instance), False)
5160

    
5161
    # Step 4: dbrd minors and drbd setups changes
5162
    # after this, we must manually remove the drbd minors on both the
5163
    # error and the success paths
5164
    minors = cfg.AllocateDRBDMinor([new_node for dev in instance.disks],
5165
                                   instance.name)
5166
    logging.debug("Allocated minors %s" % (minors,))
5167
    self.proc.LogStep(4, steps_total, "changing drbd configuration")
5168
    for idx, (dev, new_minor) in enumerate(zip(instance.disks, minors)):
5169
      size = dev.size
5170
      info("activating a new drbd on %s for disk/%d" % (new_node, idx))
5171
      # create new devices on new_node; note that we create two IDs:
5172
      # one without port, so the drbd will be activated without
5173
      # networking information on the new node at this stage, and one
5174
      # with network, for the latter activation in step 4
5175
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
5176
      if pri_node == o_node1:
5177
        p_minor = o_minor1
5178
      else:
5179
        p_minor = o_minor2
5180

    
5181
      new_alone_id = (pri_node, new_node, None, p_minor, new_minor, o_secret)
5182
      new_net_id = (pri_node, new_node, o_port, p_minor, new_minor, o_secret)
5183

    
5184
      iv_names[idx] = (dev, dev.children, new_net_id)
5185
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
5186
                    new_net_id)
5187
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
5188
                              logical_id=new_alone_id,
5189
                              children=dev.children)
5190
      try:
5191
        _CreateSingleBlockDev(self, new_node, instance, new_drbd,
5192
                              _GetInstanceInfoText(instance), False)
5193
      except errors.BlockDeviceError:
5194
        self.cfg.ReleaseDRBDMinors(instance.name)
5195
        raise
5196

    
5197
    for idx, dev in enumerate(instance.disks):
5198
      # we have new devices, shutdown the drbd on the old secondary
5199
      info("shutting down drbd for disk/%d on old node" % idx)
5200
      cfg.SetDiskID(dev, old_node)
5201
      msg = self.rpc.call_blockdev_shutdown(old_node, dev).RemoteFailMsg()
5202
      if msg:
5203
        warning("Failed to shutdown drbd for disk/%d on old node: %s" %
5204
                (idx, msg),
5205
                hint="Please cleanup this device manually as soon as possible")
5206

    
5207
    info("detaching primary drbds from the network (=> standalone)")
5208
    result = self.rpc.call_drbd_disconnect_net([pri_node], nodes_ip,
5209
                                               instance.disks)[pri_node]
5210

    
5211
    msg = result.RemoteFailMsg()
5212
    if msg:
5213
      # detaches didn't succeed (unlikely)
5214
      self.cfg.ReleaseDRBDMinors(instance.name)
5215
      raise errors.OpExecError("Can't detach the disks from the network on"
5216
                               " old node: %s" % (msg,))
5217

    
5218
    # if we managed to detach at least one, we update all the disks of
5219
    # the instance to point to the new secondary
5220
    info("updating instance configuration")
5221
    for dev, _, new_logical_id in iv_names.itervalues():
5222
      dev.logical_id = new_logical_id
5223
      cfg.SetDiskID(dev, pri_node)
5224
    cfg.Update(instance)
5225

    
5226
    # and now perform the drbd attach
5227
    info("attaching primary drbds to new secondary (standalone => connected)")
5228
    result = self.rpc.call_drbd_attach_net([pri_node, new_node], nodes_ip,
5229
                                           instance.disks, instance.name,
5230
                                           False)
5231
    for to_node, to_result in result.items():
5232
      msg = to_result.RemoteFailMsg()
5233
      if msg:
5234
        warning("can't attach drbd disks on node %s: %s", to_node, msg,
5235
                hint="please do a gnt-instance info to see the"
5236
                " status of disks")
5237

    
5238
    # this can fail as the old devices are degraded and _WaitForSync
5239
    # does a combined result over all disks, so we don't check its
5240
    # return value
5241
    self.proc.LogStep(5, steps_total, "sync devices")
5242
    _WaitForSync(self, instance, unlock=True)
5243

    
5244
    # so check manually all the devices
5245
    for idx, (dev, old_lvs, _) in iv_names.iteritems():
5246
      cfg.SetDiskID(dev, pri_node)
5247
      result = self.rpc.call_blockdev_find(pri_node, dev)
5248
      msg = result.RemoteFailMsg()
5249
      if not msg and not result.payload:
5250
        msg = "disk not found"
5251
      if msg:
5252
        raise errors.OpExecError("Can't find DRBD device disk/%d: %s" %
5253
                                 (idx, msg))
5254
      if result.payload[5]:
5255
        raise errors.OpExecError("DRBD device disk/%d is degraded!" % idx)
5256

    
5257
    self.proc.LogStep(6, steps_total, "removing old storage")
5258
    for idx, (dev, old_lvs, _) in iv_names.iteritems():
5259
      info("remove logical volumes for disk/%d" % idx)
5260
      for lv in old_lvs:
5261
        cfg.SetDiskID(lv, old_node)
5262
        result = self.rpc.call_blockdev_remove(old_node, lv)
5263
        if result.failed or not result.data:
5264
          warning("Can't remove LV on old secondary",
5265
                  hint="Cleanup stale volumes by hand")
5266

    
5267
  def Exec(self, feedback_fn):
5268
    """Execute disk replacement.
5269

5270
    This dispatches the disk replacement to the appropriate handler.
5271

5272
    """
5273
    instance = self.instance
5274

    
5275
    # Activate the instance disks if we're replacing them on a down instance
5276
    if not instance.admin_up:
5277
      _StartInstanceDisks(self, instance, True)
5278

    
5279
    if self.op.mode == constants.REPLACE_DISK_CHG:
5280
      fn = self._ExecD8Secondary
5281
    else:
5282
      fn = self._ExecD8DiskOnly
5283

    
5284
    ret = fn(feedback_fn)
5285

    
5286
    # Deactivate the instance disks if we're replacing them on a down instance
5287
    if not instance.admin_up:
5288
      _SafeShutdownInstanceDisks(self, instance)
5289

    
5290
    return ret
5291

    
5292

    
5293
class LUGrowDisk(LogicalUnit):
5294
  """Grow a disk of an instance.
5295

5296
  """
5297
  HPATH = "disk-grow"
5298
  HTYPE = constants.HTYPE_INSTANCE
5299
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
5300
  REQ_BGL = False
5301

    
5302
  def ExpandNames(self):
5303
    self._ExpandAndLockInstance()
5304
    self.needed_locks[locking.LEVEL_NODE] = []
5305
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5306

    
5307
  def DeclareLocks(self, level):
5308
    if level == locking.LEVEL_NODE:
5309
      self._LockInstancesNodes()
5310

    
5311
  def BuildHooksEnv(self):
5312
    """Build hooks env.
5313

5314
    This runs on the master, the primary and all the secondaries.
5315

5316
    """
5317
    env = {
5318
      "DISK": self.op.disk,
5319
      "AMOUNT": self.op.amount,
5320
      }
5321
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5322
    nl = [
5323
      self.cfg.GetMasterNode(),
5324
      self.instance.primary_node,
5325
      ]
5326
    return env, nl, nl
5327

    
5328
  def CheckPrereq(self):
5329
    """Check prerequisites.
5330

5331
    This checks that the instance is in the cluster.
5332

5333
    """
5334
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5335
    assert instance is not None, \
5336
      "Cannot retrieve locked instance %s" % self.op.instance_name
5337
    nodenames = list(instance.all_nodes)
5338
    for node in nodenames:
5339
      _CheckNodeOnline(self, node)
5340

    
5341

    
5342
    self.instance = instance
5343

    
5344
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
5345
      raise errors.OpPrereqError("Instance's disk layout does not support"
5346
                                 " growing.")
5347

    
5348
    self.disk = instance.FindDisk(self.op.disk)
5349

    
5350
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5351
                                       instance.hypervisor)
5352
    for node in nodenames:
5353
      info = nodeinfo[node]
5354
      if info.failed or not info.data:
5355
        raise errors.OpPrereqError("Cannot get current information"
5356
                                   " from node '%s'" % node)
5357
      vg_free = info.data.get('vg_free', None)
5358
      if not isinstance(vg_free, int):
5359
        raise errors.OpPrereqError("Can't compute free disk space on"
5360
                                   " node %s" % node)
5361
      if self.op.amount > vg_free:
5362
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
5363
                                   " %d MiB available, %d MiB required" %
5364
                                   (node, vg_free, self.op.amount))
5365

    
5366
  def Exec(self, feedback_fn):
5367
    """Execute disk grow.
5368

5369
    """
5370
    instance = self.instance
5371
    disk = self.disk
5372
    for node in instance.all_nodes:
5373
      self.cfg.SetDiskID(disk, node)
5374
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
5375
      msg = result.RemoteFailMsg()
5376
      if msg:
5377
        raise errors.OpExecError("Grow request failed to node %s: %s" %
5378
                                 (node, msg))
5379
    disk.RecordGrow(self.op.amount)
5380
    self.cfg.Update(instance)
5381
    if self.op.wait_for_sync:
5382
      disk_abort = not _WaitForSync(self, instance)
5383
      if disk_abort:
5384
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
5385
                             " status.\nPlease check the instance.")
5386

    
5387

    
5388
class LUQueryInstanceData(NoHooksLU):
5389
  """Query runtime instance data.
5390

5391
  """
5392
  _OP_REQP = ["instances", "static"]
5393
  REQ_BGL = False
5394

    
5395
  def ExpandNames(self):
5396
    self.needed_locks = {}
5397
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
5398

    
5399
    if not isinstance(self.op.instances, list):
5400
      raise errors.OpPrereqError("Invalid argument type 'instances'")
5401

    
5402
    if self.op.instances:
5403
      self.wanted_names = []
5404
      for name in self.op.instances:
5405
        full_name = self.cfg.ExpandInstanceName(name)
5406
        if full_name is None:
5407
          raise errors.OpPrereqError("Instance '%s' not known" % name)
5408
        self.wanted_names.append(full_name)
5409
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
5410
    else:
5411
      self.wanted_names = None
5412
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5413

    
5414
    self.needed_locks[locking.LEVEL_NODE] = []
5415
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5416

    
5417
  def DeclareLocks(self, level):
5418
    if level == locking.LEVEL_NODE:
5419
      self._LockInstancesNodes()
5420

    
5421
  def CheckPrereq(self):
5422
    """Check prerequisites.
5423

5424
    This only checks the optional instance list against the existing names.
5425

5426
    """
5427
    if self.wanted_names is None:
5428
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5429

    
5430
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
5431
                             in self.wanted_names]
5432
    return
5433

    
5434
  def _ComputeDiskStatus(self, instance, snode, dev):
5435
    """Compute block device status.
5436

5437
    """
5438
    static = self.op.static
5439
    if not static:
5440
      self.cfg.SetDiskID(dev, instance.primary_node)
5441
      dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
5442
      msg = dev_pstatus.RemoteFailMsg()
5443
      if msg:
5444
        raise errors.OpExecError("Can't compute disk status for %s: %s" %
5445
                                 (instance.name, msg))
5446
      dev_pstatus = dev_pstatus.payload
5447
    else:
5448
      dev_pstatus = None
5449

    
5450
    if dev.dev_type in constants.LDS_DRBD:
5451
      # we change the snode then (otherwise we use the one passed in)
5452
      if dev.logical_id[0] == instance.primary_node:
5453
        snode = dev.logical_id[1]
5454
      else:
5455
        snode = dev.logical_id[0]
5456

    
5457
    if snode and not static:
5458
      self.cfg.SetDiskID(dev, snode)
5459
      dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
5460
      msg = dev_sstatus.RemoteFailMsg()
5461
      if msg:
5462
        raise errors.OpExecError("Can't compute disk status for %s: %s" %
5463
                                 (instance.name, msg))
5464
      dev_sstatus = dev_sstatus.payload
5465
    else:
5466
      dev_sstatus = None
5467

    
5468
    if dev.children:
5469
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
5470
                      for child in dev.children]
5471
    else:
5472
      dev_children = []
5473

    
5474
    data = {
5475
      "iv_name": dev.iv_name,
5476
      "dev_type": dev.dev_type,
5477
      "logical_id": dev.logical_id,
5478
      "physical_id": dev.physical_id,
5479
      "pstatus": dev_pstatus,
5480
      "sstatus": dev_sstatus,
5481
      "children": dev_children,
5482
      "mode": dev.mode,
5483
      }
5484

    
5485
    return data
5486

    
5487
  def Exec(self, feedback_fn):
5488
    """Gather and return data"""
5489
    result = {}
5490

    
5491
    cluster = self.cfg.GetClusterInfo()
5492

    
5493
    for instance in self.wanted_instances:
5494
      if not self.op.static:
5495
        remote_info = self.rpc.call_instance_info(instance.primary_node,
5496
                                                  instance.name,
5497
                                                  instance.hypervisor)
5498
        remote_info.Raise()
5499
        remote_info = remote_info.data
5500
        if remote_info and "state" in remote_info:
5501
          remote_state = "up"
5502
        else:
5503
          remote_state = "down"
5504
      else:
5505
        remote_state = None
5506
      if instance.admin_up:
5507
        config_state = "up"
5508
      else:
5509
        config_state = "down"
5510

    
5511
      disks = [self._ComputeDiskStatus(instance, None, device)
5512
               for device in instance.disks]
5513

    
5514
      idict = {
5515
        "name": instance.name,
5516
        "config_state": config_state,
5517
        "run_state": remote_state,
5518
        "pnode": instance.primary_node,
5519
        "snodes": instance.secondary_nodes,
5520
        "os": instance.os,
5521
        "nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
5522
        "disks": disks,
5523
        "hypervisor": instance.hypervisor,
5524
        "network_port": instance.network_port,
5525
        "hv_instance": instance.hvparams,
5526
        "hv_actual": cluster.FillHV(instance),
5527
        "be_instance": instance.beparams,
5528
        "be_actual": cluster.FillBE(instance),
5529
        }
5530

    
5531
      result[instance.name] = idict
5532

    
5533
    return result
5534

    
5535

    
5536
class LUSetInstanceParams(LogicalUnit):
5537
  """Modifies an instances's parameters.
5538

5539
  """
5540
  HPATH = "instance-modify"
5541
  HTYPE = constants.HTYPE_INSTANCE
5542
  _OP_REQP = ["instance_name"]
5543
  REQ_BGL = False
5544

    
5545
  def CheckArguments(self):
5546
    if not hasattr(self.op, 'nics'):
5547
      self.op.nics = []
5548
    if not hasattr(self.op, 'disks'):
5549
      self.op.disks = []
5550
    if not hasattr(self.op, 'beparams'):
5551
      self.op.beparams = {}
5552
    if not hasattr(self.op, 'hvparams'):
5553
      self.op.hvparams = {}
5554
    self.op.force = getattr(self.op, "force", False)
5555
    if not (self.op.nics or self.op.disks or
5556
            self.op.hvparams or self.op.beparams):
5557
      raise errors.OpPrereqError("No changes submitted")
5558

    
5559
    utils.CheckBEParams(self.op.beparams)
5560

    
5561
    # Disk validation
5562
    disk_addremove = 0
5563
    for disk_op, disk_dict in self.op.disks:
5564
      if disk_op == constants.DDM_REMOVE:
5565
        disk_addremove += 1
5566
        continue
5567
      elif disk_op == constants.DDM_ADD:
5568
        disk_addremove += 1
5569
      else:
5570
        if not isinstance(disk_op, int):
5571
          raise errors.OpPrereqError("Invalid disk index")
5572
      if disk_op == constants.DDM_ADD:
5573
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
5574
        if mode not in constants.DISK_ACCESS_SET:
5575
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
5576
        size = disk_dict.get('size', None)
5577
        if size is None:
5578
          raise errors.OpPrereqError("Required disk parameter size missing")
5579
        try:
5580
          size = int(size)
5581
        except ValueError, err:
5582
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
5583
                                     str(err))
5584
        disk_dict['size'] = size
5585
      else:
5586
        # modification of disk
5587
        if 'size' in disk_dict:
5588
          raise errors.OpPrereqError("Disk size change not possible, use"
5589
                                     " grow-disk")
5590

    
5591
    if disk_addremove > 1:
5592
      raise errors.OpPrereqError("Only one disk add or remove operation"
5593
                                 " supported at a time")
5594

    
5595
    # NIC validation
5596
    nic_addremove = 0
5597
    for nic_op, nic_dict in self.op.nics:
5598
      if nic_op == constants.DDM_REMOVE:
5599
        nic_addremove += 1
5600
        continue
5601
      elif nic_op == constants.DDM_ADD:
5602
        nic_addremove += 1
5603
      else:
5604
        if not isinstance(nic_op, int):
5605
          raise errors.OpPrereqError("Invalid nic index")
5606

    
5607
      # nic_dict should be a dict
5608
      nic_ip = nic_dict.get('ip', None)
5609
      if nic_ip is not None:
5610
        if nic_ip.lower() == "none":
5611
          nic_dict['ip'] = None
5612
        else:
5613
          if not utils.IsValidIP(nic_ip):
5614
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
5615
      # we can only check None bridges and assign the default one
5616
      nic_bridge = nic_dict.get('bridge', None)
5617
      if nic_bridge is None:
5618
        nic_dict['bridge'] = self.cfg.GetDefBridge()
5619
      # but we can validate MACs
5620
      nic_mac = nic_dict.get('mac', None)
5621
      if nic_mac is not None:
5622
        if self.cfg.IsMacInUse(nic_mac):
5623
          raise errors.OpPrereqError("MAC address %s already in use"
5624
                                     " in cluster" % nic_mac)
5625
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5626
          if not utils.IsValidMac(nic_mac):
5627
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
5628
    if nic_addremove > 1:
5629
      raise errors.OpPrereqError("Only one NIC add or remove operation"
5630
                                 " supported at a time")
5631

    
5632
  def ExpandNames(self):
5633
    self._ExpandAndLockInstance()
5634
    self.needed_locks[locking.LEVEL_NODE] = []
5635
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5636

    
5637
  def DeclareLocks(self, level):
5638
    if level == locking.LEVEL_NODE:
5639
      self._LockInstancesNodes()
5640

    
5641
  def BuildHooksEnv(self):
5642
    """Build hooks env.
5643

5644
    This runs on the master, primary and secondaries.
5645

5646
    """
5647
    args = dict()
5648
    if constants.BE_MEMORY in self.be_new:
5649
      args['memory'] = self.be_new[constants.BE_MEMORY]
5650
    if constants.BE_VCPUS in self.be_new:
5651
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
5652
    # FIXME: readd disk/nic changes
5653
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
5654
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5655
    return env, nl, nl
5656

    
5657
  def CheckPrereq(self):
5658
    """Check prerequisites.
5659

5660
    This only checks the instance list against the existing names.
5661

5662
    """
5663
    force = self.force = self.op.force
5664

    
5665
    # checking the new params on the primary/secondary nodes
5666

    
5667
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5668
    assert self.instance is not None, \
5669
      "Cannot retrieve locked instance %s" % self.op.instance_name
5670
    pnode = instance.primary_node
5671
    nodelist = list(instance.all_nodes)
5672

    
5673
    # hvparams processing
5674
    if self.op.hvparams:
5675
      i_hvdict = copy.deepcopy(instance.hvparams)
5676
      for key, val in self.op.hvparams.iteritems():
5677
        if val == constants.VALUE_DEFAULT:
5678
          try:
5679
            del i_hvdict[key]
5680
          except KeyError:
5681
            pass
5682
        elif val == constants.VALUE_NONE:
5683
          i_hvdict[key] = None
5684
        else:
5685
          i_hvdict[key] = val
5686
      cluster = self.cfg.GetClusterInfo()
5687
      hv_new = cluster.FillDict(cluster.hvparams[instance.hypervisor],
5688
                                i_hvdict)
5689
      # local check
5690
      hypervisor.GetHypervisor(
5691
        instance.hypervisor).CheckParameterSyntax(hv_new)
5692
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
5693
      self.hv_new = hv_new # the new actual values
5694
      self.hv_inst = i_hvdict # the new dict (without defaults)
5695
    else:
5696
      self.hv_new = self.hv_inst = {}
5697

    
5698
    # beparams processing
5699
    if self.op.beparams:
5700
      i_bedict = copy.deepcopy(instance.beparams)
5701
      for key, val in self.op.beparams.iteritems():
5702
        if val == constants.VALUE_DEFAULT:
5703
          try:
5704
            del i_bedict[key]
5705
          except KeyError:
5706
            pass
5707
        else:
5708
          i_bedict[key] = val
5709
      cluster = self.cfg.GetClusterInfo()
5710
      be_new = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
5711
                                i_bedict)
5712
      self.be_new = be_new # the new actual values
5713
      self.be_inst = i_bedict # the new dict (without defaults)
5714
    else:
5715
      self.be_new = self.be_inst = {}
5716

    
5717
    self.warn = []
5718

    
5719
    if constants.BE_MEMORY in self.op.beparams and not self.force:
5720
      mem_check_list = [pnode]
5721
      if be_new[constants.BE_AUTO_BALANCE]:
5722
        # either we changed auto_balance to yes or it was from before
5723
        mem_check_list.extend(instance.secondary_nodes)
5724
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
5725
                                                  instance.hypervisor)
5726
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
5727
                                         instance.hypervisor)
5728
      if nodeinfo[pnode].failed or not isinstance(nodeinfo[pnode].data, dict):
5729
        # Assume the primary node is unreachable and go ahead
5730
        self.warn.append("Can't get info from primary node %s" % pnode)
5731
      else:
5732
        if not instance_info.failed and instance_info.data:
5733
          current_mem = instance_info.data['memory']
5734
        else:
5735
          # Assume instance not running
5736
          # (there is a slight race condition here, but it's not very probable,
5737
          # and we have no other way to check)
5738
          current_mem = 0
5739
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
5740
                    nodeinfo[pnode].data['memory_free'])
5741
        if miss_mem > 0:
5742
          raise errors.OpPrereqError("This change will prevent the instance"
5743
                                     " from starting, due to %d MB of memory"
5744
                                     " missing on its primary node" % miss_mem)
5745

    
5746
      if be_new[constants.BE_AUTO_BALANCE]:
5747
        for node, nres in nodeinfo.iteritems():
5748
          if node not in instance.secondary_nodes:
5749
            continue
5750
          if nres.failed or not isinstance(nres.data, dict):
5751
            self.warn.append("Can't get info from secondary node %s" % node)
5752
          elif be_new[constants.BE_MEMORY] > nres.data['memory_free']:
5753
            self.warn.append("Not enough memory to failover instance to"
5754
                             " secondary node %s" % node)
5755

    
5756
    # NIC processing
5757
    for nic_op, nic_dict in self.op.nics:
5758
      if nic_op == constants.DDM_REMOVE:
5759
        if not instance.nics:
5760
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
5761
        continue
5762
      if nic_op != constants.DDM_ADD:
5763
        # an existing nic
5764
        if nic_op < 0 or nic_op >= len(instance.nics):
5765
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
5766
                                     " are 0 to %d" %
5767
                                     (nic_op, len(instance.nics)))
5768
      nic_bridge = nic_dict.get('bridge', None)
5769
      if nic_bridge is not None:
5770
        if not self.rpc.call_bridges_exist(pnode, [nic_bridge]):
5771
          msg = ("Bridge '%s' doesn't exist on one of"
5772
                 " the instance nodes" % nic_bridge)
5773
          if self.force:
5774
            self.warn.append(msg)
5775
          else:
5776
            raise errors.OpPrereqError(msg)
5777

    
5778
    # DISK processing
5779
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
5780
      raise errors.OpPrereqError("Disk operations not supported for"
5781
                                 " diskless instances")
5782
    for disk_op, disk_dict in self.op.disks:
5783
      if disk_op == constants.DDM_REMOVE:
5784
        if len(instance.disks) == 1:
5785
          raise errors.OpPrereqError("Cannot remove the last disk of"
5786
                                     " an instance")
5787
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
5788
        ins_l = ins_l[pnode]
5789
        if ins_l.failed or not isinstance(ins_l.data, list):
5790
          raise errors.OpPrereqError("Can't contact node '%s'" % pnode)
5791
        if instance.name in ins_l.data:
5792
          raise errors.OpPrereqError("Instance is running, can't remove"
5793
                                     " disks.")
5794

    
5795
      if (disk_op == constants.DDM_ADD and
5796
          len(instance.nics) >= constants.MAX_DISKS):
5797
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
5798
                                   " add more" % constants.MAX_DISKS)
5799
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
5800
        # an existing disk
5801
        if disk_op < 0 or disk_op >= len(instance.disks):
5802
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
5803
                                     " are 0 to %d" %
5804
                                     (disk_op, len(instance.disks)))
5805

    
5806
    return
5807

    
5808
  def Exec(self, feedback_fn):
5809
    """Modifies an instance.
5810

5811
    All parameters take effect only at the next restart of the instance.
5812

5813
    """
5814
    # Process here the warnings from CheckPrereq, as we don't have a
5815
    # feedback_fn there.
5816
    for warn in self.warn:
5817
      feedback_fn("WARNING: %s" % warn)
5818

    
5819
    result = []
5820
    instance = self.instance
5821
    # disk changes
5822
    for disk_op, disk_dict in self.op.disks:
5823
      if disk_op == constants.DDM_REMOVE:
5824
        # remove the last disk
5825
        device = instance.disks.pop()
5826
        device_idx = len(instance.disks)
5827
        for node, disk in device.ComputeNodeTree(instance.primary_node):
5828
          self.cfg.SetDiskID(disk, node)
5829
          rpc_result = self.rpc.call_blockdev_remove(node, disk)
5830
          if rpc_result.failed or not rpc_result.data:
5831
            self.proc.LogWarning("Could not remove disk/%d on node %s,"
5832
                                 " continuing anyway", device_idx, node)
5833
        result.append(("disk/%d" % device_idx, "remove"))
5834
      elif disk_op == constants.DDM_ADD:
5835
        # add a new disk
5836
        if instance.disk_template == constants.DT_FILE:
5837
          file_driver, file_path = instance.disks[0].logical_id
5838
          file_path = os.path.dirname(file_path)
5839
        else:
5840
          file_driver = file_path = None
5841
        disk_idx_base = len(instance.disks)
5842
        new_disk = _GenerateDiskTemplate(self,
5843
                                         instance.disk_template,
5844
                                         instance.name, instance.primary_node,
5845
                                         instance.secondary_nodes,
5846
                                         [disk_dict],
5847
                                         file_path,
5848
                                         file_driver,
5849
                                         disk_idx_base)[0]
5850
        instance.disks.append(new_disk)
5851
        info = _GetInstanceInfoText(instance)
5852

    
5853
        logging.info("Creating volume %s for instance %s",
5854
                     new_disk.iv_name, instance.name)
5855
        # Note: this needs to be kept in sync with _CreateDisks
5856
        #HARDCODE
5857
        for node in instance.all_nodes:
5858
          f_create = node == instance.primary_node
5859
          try:
5860
            _CreateBlockDev(self, node, instance, new_disk,
5861
                            f_create, info, f_create)
5862
          except errors.OpExecError, err:
5863
            self.LogWarning("Failed to create volume %s (%s) on"
5864
                            " node %s: %s",
5865
                            new_disk.iv_name, new_disk, node, err)
5866
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
5867
                       (new_disk.size, new_disk.mode)))
5868
      else:
5869
        # change a given disk
5870
        instance.disks[disk_op].mode = disk_dict['mode']
5871
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
5872
    # NIC changes
5873
    for nic_op, nic_dict in self.op.nics:
5874
      if nic_op == constants.DDM_REMOVE:
5875
        # remove the last nic
5876
        del instance.nics[-1]
5877
        result.append(("nic.%d" % len(instance.nics), "remove"))
5878
      elif nic_op == constants.DDM_ADD:
5879
        # add a new nic
5880
        if 'mac' not in nic_dict:
5881
          mac = constants.VALUE_GENERATE
5882
        else:
5883
          mac = nic_dict['mac']
5884
        if mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5885
          mac = self.cfg.GenerateMAC()
5886
        new_nic = objects.NIC(mac=mac, ip=nic_dict.get('ip', None),
5887
                              bridge=nic_dict.get('bridge', None))
5888
        instance.nics.append(new_nic)
5889
        result.append(("nic.%d" % (len(instance.nics) - 1),
5890
                       "add:mac=%s,ip=%s,bridge=%s" %
5891
                       (new_nic.mac, new_nic.ip, new_nic.bridge)))
5892
      else:
5893
        # change a given nic
5894
        for key in 'mac', 'ip', 'bridge':
5895
          if key in nic_dict:
5896
            setattr(instance.nics[nic_op], key, nic_dict[key])
5897
            result.append(("nic.%s/%d" % (key, nic_op), nic_dict[key]))
5898

    
5899
    # hvparams changes
5900
    if self.op.hvparams:
5901
      instance.hvparams = self.hv_inst
5902
      for key, val in self.op.hvparams.iteritems():
5903
        result.append(("hv/%s" % key, val))
5904

    
5905
    # beparams changes
5906
    if self.op.beparams:
5907
      instance.beparams = self.be_inst
5908
      for key, val in self.op.beparams.iteritems():
5909
        result.append(("be/%s" % key, val))
5910

    
5911
    self.cfg.Update(instance)
5912

    
5913
    return result
5914

    
5915

    
5916
class LUQueryExports(NoHooksLU):
5917
  """Query the exports list
5918

5919
  """
5920
  _OP_REQP = ['nodes']
5921
  REQ_BGL = False
5922

    
5923
  def ExpandNames(self):
5924
    self.needed_locks = {}
5925
    self.share_locks[locking.LEVEL_NODE] = 1
5926
    if not self.op.nodes:
5927
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5928
    else:
5929
      self.needed_locks[locking.LEVEL_NODE] = \
5930
        _GetWantedNodes(self, self.op.nodes)
5931

    
5932
  def CheckPrereq(self):
5933
    """Check prerequisites.
5934

5935
    """
5936
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
5937

    
5938
  def Exec(self, feedback_fn):
5939
    """Compute the list of all the exported system images.
5940

5941
    @rtype: dict
5942
    @return: a dictionary with the structure node->(export-list)
5943
        where export-list is a list of the instances exported on
5944
        that node.
5945

5946
    """
5947
    rpcresult = self.rpc.call_export_list(self.nodes)
5948
    result = {}
5949
    for node in rpcresult:
5950
      if rpcresult[node].failed:
5951
        result[node] = False
5952
      else:
5953
        result[node] = rpcresult[node].data
5954

    
5955
    return result
5956

    
5957

    
5958
class LUExportInstance(LogicalUnit):
5959
  """Export an instance to an image in the cluster.
5960

5961
  """
5962
  HPATH = "instance-export"
5963
  HTYPE = constants.HTYPE_INSTANCE
5964
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
5965
  REQ_BGL = False
5966

    
5967
  def ExpandNames(self):
5968
    self._ExpandAndLockInstance()
5969
    # FIXME: lock only instance primary and destination node
5970
    #
5971
    # Sad but true, for now we have do lock all nodes, as we don't know where
5972
    # the previous export might be, and and in this LU we search for it and
5973
    # remove it from its current node. In the future we could fix this by:
5974
    #  - making a tasklet to search (share-lock all), then create the new one,
5975
    #    then one to remove, after
5976
    #  - removing the removal operation altoghether
5977
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5978

    
5979
  def DeclareLocks(self, level):
5980
    """Last minute lock declaration."""
5981
    # All nodes are locked anyway, so nothing to do here.
5982

    
5983
  def BuildHooksEnv(self):
5984
    """Build hooks env.
5985

5986
    This will run on the master, primary node and target node.
5987

5988
    """
5989
    env = {
5990
      "EXPORT_NODE": self.op.target_node,
5991
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
5992
      }
5993
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5994
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
5995
          self.op.target_node]
5996
    return env, nl, nl
5997

    
5998
  def CheckPrereq(self):
5999
    """Check prerequisites.
6000

6001
    This checks that the instance and node names are valid.
6002

6003
    """
6004
    instance_name = self.op.instance_name
6005
    self.instance = self.cfg.GetInstanceInfo(instance_name)
6006
    assert self.instance is not None, \
6007
          "Cannot retrieve locked instance %s" % self.op.instance_name
6008
    _CheckNodeOnline(self, self.instance.primary_node)
6009

    
6010
    self.dst_node = self.cfg.GetNodeInfo(
6011
      self.cfg.ExpandNodeName(self.op.target_node))
6012

    
6013
    if self.dst_node is None:
6014
      # This is wrong node name, not a non-locked node
6015
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
6016
    _CheckNodeOnline(self, self.dst_node.name)
6017

    
6018
    # instance disk type verification
6019
    for disk in self.instance.disks:
6020
      if disk.dev_type == constants.LD_FILE:
6021
        raise errors.OpPrereqError("Export not supported for instances with"
6022
                                   " file-based disks")
6023

    
6024
  def Exec(self, feedback_fn):
6025
    """Export an instance to an image in the cluster.
6026

6027
    """
6028
    instance = self.instance
6029
    dst_node = self.dst_node
6030
    src_node = instance.primary_node
6031
    if self.op.shutdown:
6032
      # shutdown the instance, but not the disks
6033
      result = self.rpc.call_instance_shutdown(src_node, instance)
6034
      result.Raise()
6035
      if not result.data:
6036
        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
6037
                                 (instance.name, src_node))
6038

    
6039
    vgname = self.cfg.GetVGName()
6040

    
6041
    snap_disks = []
6042

    
6043
    # set the disks ID correctly since call_instance_start needs the
6044
    # correct drbd minor to create the symlinks
6045
    for disk in instance.disks:
6046
      self.cfg.SetDiskID(disk, src_node)
6047

    
6048
    try:
6049
      for disk in instance.disks:
6050
        # new_dev_name will be a snapshot of an lvm leaf of the one we passed
6051
        new_dev_name = self.rpc.call_blockdev_snapshot(src_node, disk)
6052
        if new_dev_name.failed or not new_dev_name.data:
6053
          self.LogWarning("Could not snapshot block device %s on node %s",
6054
                          disk.logical_id[1], src_node)
6055
          snap_disks.append(False)
6056
        else:
6057
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
6058
                                 logical_id=(vgname, new_dev_name.data),
6059
                                 physical_id=(vgname, new_dev_name.data),
6060
                                 iv_name=disk.iv_name)
6061
          snap_disks.append(new_dev)
6062

    
6063
    finally:
6064
      if self.op.shutdown and instance.admin_up:
6065
        result = self.rpc.call_instance_start(src_node, instance, None)
6066
        msg = result.RemoteFailMsg()
6067
        if msg:
6068
          _ShutdownInstanceDisks(self, instance)
6069
          raise errors.OpExecError("Could not start instance: %s" % msg)
6070

    
6071
    # TODO: check for size
6072

    
6073
    cluster_name = self.cfg.GetClusterName()
6074
    for idx, dev in enumerate(snap_disks):
6075
      if dev:
6076
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
6077
                                               instance, cluster_name, idx)
6078
        if result.failed or not result.data:
6079
          self.LogWarning("Could not export block device %s from node %s to"
6080
                          " node %s", dev.logical_id[1], src_node,
6081
                          dst_node.name)
6082
        result = self.rpc.call_blockdev_remove(src_node, dev)
6083
        if result.failed or not result.data:
6084
          self.LogWarning("Could not remove snapshot block device %s from node"
6085
                          " %s", dev.logical_id[1], src_node)
6086

    
6087
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
6088
    if result.failed or not result.data:
6089
      self.LogWarning("Could not finalize export for instance %s on node %s",
6090
                      instance.name, dst_node.name)
6091

    
6092
    nodelist = self.cfg.GetNodeList()
6093
    nodelist.remove(dst_node.name)
6094

    
6095
    # on one-node clusters nodelist will be empty after the removal
6096
    # if we proceed the backup would be removed because OpQueryExports
6097
    # substitutes an empty list with the full cluster node list.
6098
    if nodelist:
6099
      exportlist = self.rpc.call_export_list(nodelist)
6100
      for node in exportlist:
6101
        if exportlist[node].failed:
6102
          continue
6103
        if instance.name in exportlist[node].data:
6104
          if not self.rpc.call_export_remove(node, instance.name):
6105
            self.LogWarning("Could not remove older export for instance %s"
6106
                            " on node %s", instance.name, node)
6107

    
6108

    
6109
class LURemoveExport(NoHooksLU):
6110
  """Remove exports related to the named instance.
6111

6112
  """
6113
  _OP_REQP = ["instance_name"]
6114
  REQ_BGL = False
6115

    
6116
  def ExpandNames(self):
6117
    self.needed_locks = {}
6118
    # We need all nodes to be locked in order for RemoveExport to work, but we
6119
    # don't need to lock the instance itself, as nothing will happen to it (and
6120
    # we can remove exports also for a removed instance)
6121
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6122

    
6123
  def CheckPrereq(self):
6124
    """Check prerequisites.
6125
    """
6126
    pass
6127

    
6128
  def Exec(self, feedback_fn):
6129
    """Remove any export.
6130

6131
    """
6132
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
6133
    # If the instance was not found we'll try with the name that was passed in.
6134
    # This will only work if it was an FQDN, though.
6135
    fqdn_warn = False
6136
    if not instance_name:
6137
      fqdn_warn = True
6138
      instance_name = self.op.instance_name
6139

    
6140
    exportlist = self.rpc.call_export_list(self.acquired_locks[
6141
      locking.LEVEL_NODE])
6142
    found = False
6143
    for node in exportlist:
6144
      if exportlist[node].failed:
6145
        self.LogWarning("Failed to query node %s, continuing" % node)
6146
        continue
6147
      if instance_name in exportlist[node].data:
6148
        found = True
6149
        result = self.rpc.call_export_remove(node, instance_name)
6150
        if result.failed or not result.data:
6151
          logging.error("Could not remove export for instance %s"
6152
                        " on node %s", instance_name, node)
6153

    
6154
    if fqdn_warn and not found:
6155
      feedback_fn("Export not found. If trying to remove an export belonging"
6156
                  " to a deleted instance please use its Fully Qualified"
6157
                  " Domain Name.")
6158

    
6159

    
6160
class TagsLU(NoHooksLU):
6161
  """Generic tags LU.
6162

6163
  This is an abstract class which is the parent of all the other tags LUs.
6164

6165
  """
6166

    
6167
  def ExpandNames(self):
6168
    self.needed_locks = {}
6169
    if self.op.kind == constants.TAG_NODE:
6170
      name = self.cfg.ExpandNodeName(self.op.name)
6171
      if name is None:
6172
        raise errors.OpPrereqError("Invalid node name (%s)" %
6173
                                   (self.op.name,))
6174
      self.op.name = name
6175
      self.needed_locks[locking.LEVEL_NODE] = name
6176
    elif self.op.kind == constants.TAG_INSTANCE:
6177
      name = self.cfg.ExpandInstanceName(self.op.name)
6178
      if name is None:
6179
        raise errors.OpPrereqError("Invalid instance name (%s)" %
6180
                                   (self.op.name,))
6181
      self.op.name = name
6182
      self.needed_locks[locking.LEVEL_INSTANCE] = name
6183

    
6184
  def CheckPrereq(self):
6185
    """Check prerequisites.
6186

6187
    """
6188
    if self.op.kind == constants.TAG_CLUSTER:
6189
      self.target = self.cfg.GetClusterInfo()
6190
    elif self.op.kind == constants.TAG_NODE:
6191
      self.target = self.cfg.GetNodeInfo(self.op.name)
6192
    elif self.op.kind == constants.TAG_INSTANCE:
6193
      self.target = self.cfg.GetInstanceInfo(self.op.name)
6194
    else:
6195
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
6196
                                 str(self.op.kind))
6197

    
6198

    
6199
class LUGetTags(TagsLU):
6200
  """Returns the tags of a given object.
6201

6202
  """
6203
  _OP_REQP = ["kind", "name"]
6204
  REQ_BGL = False
6205

    
6206
  def Exec(self, feedback_fn):
6207
    """Returns the tag list.
6208

6209
    """
6210
    return list(self.target.GetTags())
6211

    
6212

    
6213
class LUSearchTags(NoHooksLU):
6214
  """Searches the tags for a given pattern.
6215

6216
  """
6217
  _OP_REQP = ["pattern"]
6218
  REQ_BGL = False
6219

    
6220
  def ExpandNames(self):
6221
    self.needed_locks = {}
6222

    
6223
  def CheckPrereq(self):
6224
    """Check prerequisites.
6225

6226
    This checks the pattern passed for validity by compiling it.
6227

6228
    """
6229
    try:
6230
      self.re = re.compile(self.op.pattern)
6231
    except re.error, err:
6232
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
6233
                                 (self.op.pattern, err))
6234

    
6235
  def Exec(self, feedback_fn):
6236
    """Returns the tag list.
6237

6238
    """
6239
    cfg = self.cfg
6240
    tgts = [("/cluster", cfg.GetClusterInfo())]
6241
    ilist = cfg.GetAllInstancesInfo().values()
6242
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
6243
    nlist = cfg.GetAllNodesInfo().values()
6244
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
6245
    results = []
6246
    for path, target in tgts:
6247
      for tag in target.GetTags():
6248
        if self.re.search(tag):
6249
          results.append((path, tag))
6250
    return results
6251

    
6252

    
6253
class LUAddTags(TagsLU):
6254
  """Sets a tag on a given object.
6255

6256
  """
6257
  _OP_REQP = ["kind", "name", "tags"]
6258
  REQ_BGL = False
6259

    
6260
  def CheckPrereq(self):
6261
    """Check prerequisites.
6262

6263
    This checks the type and length of the tag name and value.
6264

6265
    """
6266
    TagsLU.CheckPrereq(self)
6267
    for tag in self.op.tags:
6268
      objects.TaggableObject.ValidateTag(tag)
6269

    
6270
  def Exec(self, feedback_fn):
6271
    """Sets the tag.
6272

6273
    """
6274
    try:
6275
      for tag in self.op.tags:
6276
        self.target.AddTag(tag)
6277
    except errors.TagError, err:
6278
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
6279
    try:
6280
      self.cfg.Update(self.target)
6281
    except errors.ConfigurationError:
6282
      raise errors.OpRetryError("There has been a modification to the"
6283
                                " config file and the operation has been"
6284
                                " aborted. Please retry.")
6285

    
6286

    
6287
class LUDelTags(TagsLU):
6288
  """Delete a list of tags from a given object.
6289

6290
  """
6291
  _OP_REQP = ["kind", "name", "tags"]
6292
  REQ_BGL = False
6293

    
6294
  def CheckPrereq(self):
6295
    """Check prerequisites.
6296

6297
    This checks that we have the given tag.
6298

6299
    """
6300
    TagsLU.CheckPrereq(self)
6301
    for tag in self.op.tags:
6302
      objects.TaggableObject.ValidateTag(tag)
6303
    del_tags = frozenset(self.op.tags)
6304
    cur_tags = self.target.GetTags()
6305
    if not del_tags <= cur_tags:
6306
      diff_tags = del_tags - cur_tags
6307
      diff_names = ["'%s'" % tag for tag in diff_tags]
6308
      diff_names.sort()
6309
      raise errors.OpPrereqError("Tag(s) %s not found" %
6310
                                 (",".join(diff_names)))
6311

    
6312
  def Exec(self, feedback_fn):
6313
    """Remove the tag from the object.
6314

6315
    """
6316
    for tag in self.op.tags:
6317
      self.target.RemoveTag(tag)
6318
    try:
6319
      self.cfg.Update(self.target)
6320
    except errors.ConfigurationError:
6321
      raise errors.OpRetryError("There has been a modification to the"
6322
                                " config file and the operation has been"
6323
                                " aborted. Please retry.")
6324

    
6325

    
6326
class LUTestDelay(NoHooksLU):
6327
  """Sleep for a specified amount of time.
6328

6329
  This LU sleeps on the master and/or nodes for a specified amount of
6330
  time.
6331

6332
  """
6333
  _OP_REQP = ["duration", "on_master", "on_nodes"]
6334
  REQ_BGL = False
6335

    
6336
  def ExpandNames(self):
6337
    """Expand names and set required locks.
6338

6339
    This expands the node list, if any.
6340

6341
    """
6342
    self.needed_locks = {}
6343
    if self.op.on_nodes:
6344
      # _GetWantedNodes can be used here, but is not always appropriate to use
6345
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
6346
      # more information.
6347
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
6348
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
6349

    
6350
  def CheckPrereq(self):
6351
    """Check prerequisites.
6352

6353
    """
6354

    
6355
  def Exec(self, feedback_fn):
6356
    """Do the actual sleep.
6357

6358
    """
6359
    if self.op.on_master:
6360
      if not utils.TestDelay(self.op.duration):
6361
        raise errors.OpExecError("Error during master delay test")
6362
    if self.op.on_nodes:
6363
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
6364
      if not result:
6365
        raise errors.OpExecError("Complete failure from rpc call")
6366
      for node, node_result in result.items():
6367
        node_result.Raise()
6368
        if not node_result.data:
6369
          raise errors.OpExecError("Failure during rpc call to node %s,"
6370
                                   " result: %s" % (node, node_result.data))
6371

    
6372

    
6373
class IAllocator(object):
6374
  """IAllocator framework.
6375

6376
  An IAllocator instance has three sets of attributes:
6377
    - cfg that is needed to query the cluster
6378
    - input data (all members of the _KEYS class attribute are required)
6379
    - four buffer attributes (in|out_data|text), that represent the
6380
      input (to the external script) in text and data structure format,
6381
      and the output from it, again in two formats
6382
    - the result variables from the script (success, info, nodes) for
6383
      easy usage
6384

6385
  """
6386
  _ALLO_KEYS = [
6387
    "mem_size", "disks", "disk_template",
6388
    "os", "tags", "nics", "vcpus", "hypervisor",
6389
    ]
6390
  _RELO_KEYS = [
6391
    "relocate_from",
6392
    ]
6393

    
6394
  def __init__(self, lu, mode, name, **kwargs):
6395
    self.lu = lu
6396
    # init buffer variables
6397
    self.in_text = self.out_text = self.in_data = self.out_data = None
6398
    # init all input fields so that pylint is happy
6399
    self.mode = mode
6400
    self.name = name
6401
    self.mem_size = self.disks = self.disk_template = None
6402
    self.os = self.tags = self.nics = self.vcpus = None
6403
    self.hypervisor = None
6404
    self.relocate_from = None
6405
    # computed fields
6406
    self.required_nodes = None
6407
    # init result fields
6408
    self.success = self.info = self.nodes = None
6409
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6410
      keyset = self._ALLO_KEYS
6411
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6412
      keyset = self._RELO_KEYS
6413
    else:
6414
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
6415
                                   " IAllocator" % self.mode)
6416
    for key in kwargs:
6417
      if key not in keyset:
6418
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
6419
                                     " IAllocator" % key)
6420
      setattr(self, key, kwargs[key])
6421
    for key in keyset:
6422
      if key not in kwargs:
6423
        raise errors.ProgrammerError("Missing input parameter '%s' to"
6424
                                     " IAllocator" % key)
6425
    self._BuildInputData()
6426

    
6427
  def _ComputeClusterData(self):
6428
    """Compute the generic allocator input data.
6429

6430
    This is the data that is independent of the actual operation.
6431

6432
    """
6433
    cfg = self.lu.cfg
6434
    cluster_info = cfg.GetClusterInfo()
6435
    # cluster data
6436
    data = {
6437
      "version": 1,
6438
      "cluster_name": cfg.GetClusterName(),
6439
      "cluster_tags": list(cluster_info.GetTags()),
6440
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
6441
      # we don't have job IDs
6442
      }
6443
    iinfo = cfg.GetAllInstancesInfo().values()
6444
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
6445

    
6446
    # node data
6447
    node_results = {}
6448
    node_list = cfg.GetNodeList()
6449

    
6450
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6451
      hypervisor_name = self.hypervisor
6452
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6453
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
6454

    
6455
    node_data = self.lu.rpc.call_node_info(node_list, cfg.GetVGName(),
6456
                                           hypervisor_name)
6457
    node_iinfo = self.lu.rpc.call_all_instances_info(node_list,
6458
                       cluster_info.enabled_hypervisors)
6459
    for nname, nresult in node_data.items():
6460
      # first fill in static (config-based) values
6461
      ninfo = cfg.GetNodeInfo(nname)
6462
      pnr = {
6463
        "tags": list(ninfo.GetTags()),
6464
        "primary_ip": ninfo.primary_ip,
6465
        "secondary_ip": ninfo.secondary_ip,
6466
        "offline": ninfo.offline,
6467
        "master_candidate": ninfo.master_candidate,
6468
        }
6469

    
6470
      if not ninfo.offline:
6471
        nresult.Raise()
6472
        if not isinstance(nresult.data, dict):
6473
          raise errors.OpExecError("Can't get data for node %s" % nname)
6474
        remote_info = nresult.data
6475
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
6476
                     'vg_size', 'vg_free', 'cpu_total']:
6477
          if attr not in remote_info:
6478
            raise errors.OpExecError("Node '%s' didn't return attribute"
6479
                                     " '%s'" % (nname, attr))
6480
          try:
6481
            remote_info[attr] = int(remote_info[attr])
6482
          except ValueError, err:
6483
            raise errors.OpExecError("Node '%s' returned invalid value"
6484
                                     " for '%s': %s" % (nname, attr, err))
6485
        # compute memory used by primary instances
6486
        i_p_mem = i_p_up_mem = 0
6487
        for iinfo, beinfo in i_list:
6488
          if iinfo.primary_node == nname:
6489
            i_p_mem += beinfo[constants.BE_MEMORY]
6490
            if iinfo.name not in node_iinfo[nname].data:
6491
              i_used_mem = 0
6492
            else:
6493
              i_used_mem = int(node_iinfo[nname].data[iinfo.name]['memory'])
6494
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
6495
            remote_info['memory_free'] -= max(0, i_mem_diff)
6496

    
6497
            if iinfo.admin_up:
6498
              i_p_up_mem += beinfo[constants.BE_MEMORY]
6499

    
6500
        # compute memory used by instances
6501
        pnr_dyn = {
6502
          "total_memory": remote_info['memory_total'],
6503
          "reserved_memory": remote_info['memory_dom0'],
6504
          "free_memory": remote_info['memory_free'],
6505
          "total_disk": remote_info['vg_size'],
6506
          "free_disk": remote_info['vg_free'],
6507
          "total_cpus": remote_info['cpu_total'],
6508
          "i_pri_memory": i_p_mem,
6509
          "i_pri_up_memory": i_p_up_mem,
6510
          }
6511
        pnr.update(pnr_dyn)
6512

    
6513
      node_results[nname] = pnr
6514
    data["nodes"] = node_results
6515

    
6516
    # instance data
6517
    instance_data = {}
6518
    for iinfo, beinfo in i_list:
6519
      nic_data = [{"mac": n.mac, "ip": n.ip, "bridge": n.bridge}
6520
                  for n in iinfo.nics]
6521
      pir = {
6522
        "tags": list(iinfo.GetTags()),
6523
        "admin_up": iinfo.admin_up,
6524
        "vcpus": beinfo[constants.BE_VCPUS],
6525
        "memory": beinfo[constants.BE_MEMORY],
6526
        "os": iinfo.os,
6527
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
6528
        "nics": nic_data,
6529
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
6530
        "disk_template": iinfo.disk_template,
6531
        "hypervisor": iinfo.hypervisor,
6532
        }
6533
      instance_data[iinfo.name] = pir
6534

    
6535
    data["instances"] = instance_data
6536

    
6537
    self.in_data = data
6538

    
6539
  def _AddNewInstance(self):
6540
    """Add new instance data to allocator structure.
6541

6542
    This in combination with _AllocatorGetClusterData will create the
6543
    correct structure needed as input for the allocator.
6544

6545
    The checks for the completeness of the opcode must have already been
6546
    done.
6547

6548
    """
6549
    data = self.in_data
6550

    
6551
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
6552

    
6553
    if self.disk_template in constants.DTS_NET_MIRROR:
6554
      self.required_nodes = 2
6555
    else:
6556
      self.required_nodes = 1
6557
    request = {
6558
      "type": "allocate",
6559
      "name": self.name,
6560
      "disk_template": self.disk_template,
6561
      "tags": self.tags,
6562
      "os": self.os,
6563
      "vcpus": self.vcpus,
6564
      "memory": self.mem_size,
6565
      "disks": self.disks,
6566
      "disk_space_total": disk_space,
6567
      "nics": self.nics,
6568
      "required_nodes": self.required_nodes,
6569
      }
6570
    data["request"] = request
6571

    
6572
  def _AddRelocateInstance(self):
6573
    """Add relocate instance data to allocator structure.
6574

6575
    This in combination with _IAllocatorGetClusterData will create the
6576
    correct structure needed as input for the allocator.
6577

6578
    The checks for the completeness of the opcode must have already been
6579
    done.
6580

6581
    """
6582
    instance = self.lu.cfg.GetInstanceInfo(self.name)
6583
    if instance is None:
6584
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
6585
                                   " IAllocator" % self.name)
6586

    
6587
    if instance.disk_template not in constants.DTS_NET_MIRROR:
6588
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
6589

    
6590
    if len(instance.secondary_nodes) != 1:
6591
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
6592

    
6593
    self.required_nodes = 1
6594
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
6595
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
6596

    
6597
    request = {
6598
      "type": "relocate",
6599
      "name": self.name,
6600
      "disk_space_total": disk_space,
6601
      "required_nodes": self.required_nodes,
6602
      "relocate_from": self.relocate_from,
6603
      }
6604
    self.in_data["request"] = request
6605

    
6606
  def _BuildInputData(self):
6607
    """Build input data structures.
6608

6609
    """
6610
    self._ComputeClusterData()
6611

    
6612
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6613
      self._AddNewInstance()
6614
    else:
6615
      self._AddRelocateInstance()
6616

    
6617
    self.in_text = serializer.Dump(self.in_data)
6618

    
6619
  def Run(self, name, validate=True, call_fn=None):
6620
    """Run an instance allocator and return the results.
6621

6622
    """
6623
    if call_fn is None:
6624
      call_fn = self.lu.rpc.call_iallocator_runner
6625
    data = self.in_text
6626

    
6627
    result = call_fn(self.lu.cfg.GetMasterNode(), name, self.in_text)
6628
    result.Raise()
6629

    
6630
    if not isinstance(result.data, (list, tuple)) or len(result.data) != 4:
6631
      raise errors.OpExecError("Invalid result from master iallocator runner")
6632

    
6633
    rcode, stdout, stderr, fail = result.data
6634

    
6635
    if rcode == constants.IARUN_NOTFOUND:
6636
      raise errors.OpExecError("Can't find allocator '%s'" % name)
6637
    elif rcode == constants.IARUN_FAILURE:
6638
      raise errors.OpExecError("Instance allocator call failed: %s,"
6639
                               " output: %s" % (fail, stdout+stderr))
6640
    self.out_text = stdout
6641
    if validate:
6642
      self._ValidateResult()
6643

    
6644
  def _ValidateResult(self):
6645
    """Process the allocator results.
6646

6647
    This will process and if successful save the result in
6648
    self.out_data and the other parameters.
6649

6650
    """
6651
    try:
6652
      rdict = serializer.Load(self.out_text)
6653
    except Exception, err:
6654
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
6655

    
6656
    if not isinstance(rdict, dict):
6657
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
6658

    
6659
    for key in "success", "info", "nodes":
6660
      if key not in rdict:
6661
        raise errors.OpExecError("Can't parse iallocator results:"
6662
                                 " missing key '%s'" % key)
6663
      setattr(self, key, rdict[key])
6664

    
6665
    if not isinstance(rdict["nodes"], list):
6666
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
6667
                               " is not a list")
6668
    self.out_data = rdict
6669

    
6670

    
6671
class LUTestAllocator(NoHooksLU):
6672
  """Run allocator tests.
6673

6674
  This LU runs the allocator tests
6675

6676
  """
6677
  _OP_REQP = ["direction", "mode", "name"]
6678

    
6679
  def CheckPrereq(self):
6680
    """Check prerequisites.
6681

6682
    This checks the opcode parameters depending on the director and mode test.
6683

6684
    """
6685
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
6686
      for attr in ["name", "mem_size", "disks", "disk_template",
6687
                   "os", "tags", "nics", "vcpus"]:
6688
        if not hasattr(self.op, attr):
6689
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
6690
                                     attr)
6691
      iname = self.cfg.ExpandInstanceName(self.op.name)
6692
      if iname is not None:
6693
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
6694
                                   iname)
6695
      if not isinstance(self.op.nics, list):
6696
        raise errors.OpPrereqError("Invalid parameter 'nics'")
6697
      for row in self.op.nics:
6698
        if (not isinstance(row, dict) or
6699
            "mac" not in row or
6700
            "ip" not in row or
6701
            "bridge" not in row):
6702
          raise errors.OpPrereqError("Invalid contents of the"
6703
                                     " 'nics' parameter")
6704
      if not isinstance(self.op.disks, list):
6705
        raise errors.OpPrereqError("Invalid parameter 'disks'")
6706
      for row in self.op.disks:
6707
        if (not isinstance(row, dict) or
6708
            "size" not in row or
6709
            not isinstance(row["size"], int) or
6710
            "mode" not in row or
6711
            row["mode"] not in ['r', 'w']):
6712
          raise errors.OpPrereqError("Invalid contents of the"
6713
                                     " 'disks' parameter")
6714
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
6715
        self.op.hypervisor = self.cfg.GetHypervisorType()
6716
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
6717
      if not hasattr(self.op, "name"):
6718
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
6719
      fname = self.cfg.ExpandInstanceName(self.op.name)
6720
      if fname is None:
6721
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
6722
                                   self.op.name)
6723
      self.op.name = fname
6724
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
6725
    else:
6726
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
6727
                                 self.op.mode)
6728

    
6729
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
6730
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
6731
        raise errors.OpPrereqError("Missing allocator name")
6732
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
6733
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
6734
                                 self.op.direction)
6735

    
6736
  def Exec(self, feedback_fn):
6737
    """Run the allocator test.
6738

6739
    """
6740
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
6741
      ial = IAllocator(self,
6742
                       mode=self.op.mode,
6743
                       name=self.op.name,
6744
                       mem_size=self.op.mem_size,
6745
                       disks=self.op.disks,
6746
                       disk_template=self.op.disk_template,
6747
                       os=self.op.os,
6748
                       tags=self.op.tags,
6749
                       nics=self.op.nics,
6750
                       vcpus=self.op.vcpus,
6751
                       hypervisor=self.op.hypervisor,
6752
                       )
6753
    else:
6754
      ial = IAllocator(self,
6755
                       mode=self.op.mode,
6756
                       name=self.op.name,
6757
                       relocate_from=list(self.relocate_from),
6758
                       )
6759

    
6760
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
6761
      result = ial.in_text
6762
    else:
6763
      ial.Run(self.op.allocator, validate=False)
6764
      result = ial.out_text
6765
    return result