Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 5c0527ed

History | View | Annotate | Download (169.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import sha
29
import time
30
import tempfile
31
import re
32
import platform
33

    
34
from ganeti import rpc
35
from ganeti import ssh
36
from ganeti import logger
37
from ganeti import utils
38
from ganeti import errors
39
from ganeti import hypervisor
40
from ganeti import config
41
from ganeti import constants
42
from ganeti import objects
43
from ganeti import opcodes
44
from ganeti import ssconf
45
from ganeti import serializer
46

    
47

    
48
class LogicalUnit(object):
49
  """Logical Unit base class.
50

51
  Subclasses must follow these rules:
52
    - implement CheckPrereq which also fills in the opcode instance
53
      with all the fields (even if as None)
54
    - implement Exec
55
    - implement BuildHooksEnv
56
    - redefine HPATH and HTYPE
57
    - optionally redefine their run requirements:
58
        REQ_MASTER: the LU needs to run on the master node
59
        REQ_WSSTORE: the LU needs a writable SimpleStore
60

61
  Note that all commands require root permissions.
62

63
  """
64
  HPATH = None
65
  HTYPE = None
66
  _OP_REQP = []
67
  REQ_MASTER = True
68
  REQ_WSSTORE = False
69

    
70
  def __init__(self, processor, op, cfg, sstore):
71
    """Constructor for LogicalUnit.
72

73
    This needs to be overriden in derived classes in order to check op
74
    validity.
75

76
    """
77
    self.proc = processor
78
    self.op = op
79
    self.cfg = cfg
80
    self.sstore = sstore
81
    self.__ssh = None
82

    
83
    for attr_name in self._OP_REQP:
84
      attr_val = getattr(op, attr_name, None)
85
      if attr_val is None:
86
        raise errors.OpPrereqError("Required parameter '%s' missing" %
87
                                   attr_name)
88

    
89
    if not cfg.IsCluster():
90
      raise errors.OpPrereqError("Cluster not initialized yet,"
91
                                 " use 'gnt-cluster init' first.")
92
    if self.REQ_MASTER:
93
      master = sstore.GetMasterNode()
94
      if master != utils.HostInfo().name:
95
        raise errors.OpPrereqError("Commands must be run on the master"
96
                                   " node %s" % master)
97

    
98
  def __GetSSH(self):
99
    """Returns the SshRunner object
100

101
    """
102
    if not self.__ssh:
103
      self.__ssh = ssh.SshRunner(self.sstore)
104
    return self.__ssh
105

    
106
  ssh = property(fget=__GetSSH)
107

    
108
  def CheckPrereq(self):
109
    """Check prerequisites for this LU.
110

111
    This method should check that the prerequisites for the execution
112
    of this LU are fulfilled. It can do internode communication, but
113
    it should be idempotent - no cluster or system changes are
114
    allowed.
115

116
    The method should raise errors.OpPrereqError in case something is
117
    not fulfilled. Its return value is ignored.
118

119
    This method should also update all the parameters of the opcode to
120
    their canonical form; e.g. a short node name must be fully
121
    expanded after this method has successfully completed (so that
122
    hooks, logging, etc. work correctly).
123

124
    """
125
    raise NotImplementedError
126

    
127
  def Exec(self, feedback_fn):
128
    """Execute the LU.
129

130
    This method should implement the actual work. It should raise
131
    errors.OpExecError for failures that are somewhat dealt with in
132
    code, or expected.
133

134
    """
135
    raise NotImplementedError
136

    
137
  def BuildHooksEnv(self):
138
    """Build hooks environment for this LU.
139

140
    This method should return a three-node tuple consisting of: a dict
141
    containing the environment that will be used for running the
142
    specific hook for this LU, a list of node names on which the hook
143
    should run before the execution, and a list of node names on which
144
    the hook should run after the execution.
145

146
    The keys of the dict must not have 'GANETI_' prefixed as this will
147
    be handled in the hooks runner. Also note additional keys will be
148
    added by the hooks runner. If the LU doesn't define any
149
    environment, an empty dict (and not None) should be returned.
150

151
    No nodes should be returned as an empty list (and not None).
152

153
    Note that if the HPATH for a LU class is None, this function will
154
    not be called.
155

156
    """
157
    raise NotImplementedError
158

    
159
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
160
    """Notify the LU about the results of its hooks.
161

162
    This method is called every time a hooks phase is executed, and notifies
163
    the Logical Unit about the hooks' result. The LU can then use it to alter
164
    its result based on the hooks.  By default the method does nothing and the
165
    previous result is passed back unchanged but any LU can define it if it
166
    wants to use the local cluster hook-scripts somehow.
167

168
    Args:
169
      phase: the hooks phase that has just been run
170
      hooks_results: the results of the multi-node hooks rpc call
171
      feedback_fn: function to send feedback back to the caller
172
      lu_result: the previous result this LU had, or None in the PRE phase.
173

174
    """
175
    return lu_result
176

    
177

    
178
class NoHooksLU(LogicalUnit):
179
  """Simple LU which runs no hooks.
180

181
  This LU is intended as a parent for other LogicalUnits which will
182
  run no hooks, in order to reduce duplicate code.
183

184
  """
185
  HPATH = None
186
  HTYPE = None
187

    
188

    
189
def _GetWantedNodes(lu, nodes):
190
  """Returns list of checked and expanded node names.
191

192
  Args:
193
    nodes: List of nodes (strings) or None for all
194

195
  """
196
  if not isinstance(nodes, list):
197
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
198

    
199
  if nodes:
200
    wanted = []
201

    
202
    for name in nodes:
203
      node = lu.cfg.ExpandNodeName(name)
204
      if node is None:
205
        raise errors.OpPrereqError("No such node name '%s'" % name)
206
      wanted.append(node)
207

    
208
  else:
209
    wanted = lu.cfg.GetNodeList()
210
  return utils.NiceSort(wanted)
211

    
212

    
213
def _GetWantedInstances(lu, instances):
214
  """Returns list of checked and expanded instance names.
215

216
  Args:
217
    instances: List of instances (strings) or None for all
218

219
  """
220
  if not isinstance(instances, list):
221
    raise errors.OpPrereqError("Invalid argument type 'instances'")
222

    
223
  if instances:
224
    wanted = []
225

    
226
    for name in instances:
227
      instance = lu.cfg.ExpandInstanceName(name)
228
      if instance is None:
229
        raise errors.OpPrereqError("No such instance name '%s'" % name)
230
      wanted.append(instance)
231

    
232
  else:
233
    wanted = lu.cfg.GetInstanceList()
234
  return utils.NiceSort(wanted)
235

    
236

    
237
def _CheckOutputFields(static, dynamic, selected):
238
  """Checks whether all selected fields are valid.
239

240
  Args:
241
    static: Static fields
242
    dynamic: Dynamic fields
243

244
  """
245
  static_fields = frozenset(static)
246
  dynamic_fields = frozenset(dynamic)
247

    
248
  all_fields = static_fields | dynamic_fields
249

    
250
  if not all_fields.issuperset(selected):
251
    raise errors.OpPrereqError("Unknown output fields selected: %s"
252
                               % ",".join(frozenset(selected).
253
                                          difference(all_fields)))
254

    
255

    
256
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
257
                          memory, vcpus, nics):
258
  """Builds instance related env variables for hooks from single variables.
259

260
  Args:
261
    secondary_nodes: List of secondary nodes as strings
262
  """
263
  env = {
264
    "OP_TARGET": name,
265
    "INSTANCE_NAME": name,
266
    "INSTANCE_PRIMARY": primary_node,
267
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
268
    "INSTANCE_OS_TYPE": os_type,
269
    "INSTANCE_STATUS": status,
270
    "INSTANCE_MEMORY": memory,
271
    "INSTANCE_VCPUS": vcpus,
272
  }
273

    
274
  if nics:
275
    nic_count = len(nics)
276
    for idx, (ip, bridge, mac) in enumerate(nics):
277
      if ip is None:
278
        ip = ""
279
      env["INSTANCE_NIC%d_IP" % idx] = ip
280
      env["INSTANCE_NIC%d_BRIDGE" % idx] = bridge
281
      env["INSTANCE_NIC%d_HWADDR" % idx] = mac
282
  else:
283
    nic_count = 0
284

    
285
  env["INSTANCE_NIC_COUNT"] = nic_count
286

    
287
  return env
288

    
289

    
290
def _BuildInstanceHookEnvByObject(instance, override=None):
291
  """Builds instance related env variables for hooks from an object.
292

293
  Args:
294
    instance: objects.Instance object of instance
295
    override: dict of values to override
296
  """
297
  args = {
298
    'name': instance.name,
299
    'primary_node': instance.primary_node,
300
    'secondary_nodes': instance.secondary_nodes,
301
    'os_type': instance.os,
302
    'status': instance.os,
303
    'memory': instance.memory,
304
    'vcpus': instance.vcpus,
305
    'nics': [(nic.ip, nic.bridge, nic.mac) for nic in instance.nics],
306
  }
307
  if override:
308
    args.update(override)
309
  return _BuildInstanceHookEnv(**args)
310

    
311

    
312
def _CheckInstanceBridgesExist(instance):
313
  """Check that the brigdes needed by an instance exist.
314

315
  """
316
  # check bridges existance
317
  brlist = [nic.bridge for nic in instance.nics]
318
  if not rpc.call_bridges_exist(instance.primary_node, brlist):
319
    raise errors.OpPrereqError("one or more target bridges %s does not"
320
                               " exist on destination node '%s'" %
321
                               (brlist, instance.primary_node))
322

    
323

    
324
class LUDestroyCluster(NoHooksLU):
325
  """Logical unit for destroying the cluster.
326

327
  """
328
  _OP_REQP = []
329

    
330
  def CheckPrereq(self):
331
    """Check prerequisites.
332

333
    This checks whether the cluster is empty.
334

335
    Any errors are signalled by raising errors.OpPrereqError.
336

337
    """
338
    master = self.sstore.GetMasterNode()
339

    
340
    nodelist = self.cfg.GetNodeList()
341
    if len(nodelist) != 1 or nodelist[0] != master:
342
      raise errors.OpPrereqError("There are still %d node(s) in"
343
                                 " this cluster." % (len(nodelist) - 1))
344
    instancelist = self.cfg.GetInstanceList()
345
    if instancelist:
346
      raise errors.OpPrereqError("There are still %d instance(s) in"
347
                                 " this cluster." % len(instancelist))
348

    
349
  def Exec(self, feedback_fn):
350
    """Destroys the cluster.
351

352
    """
353
    master = self.sstore.GetMasterNode()
354
    if not rpc.call_node_stop_master(master):
355
      raise errors.OpExecError("Could not disable the master role")
356
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
357
    utils.CreateBackup(priv_key)
358
    utils.CreateBackup(pub_key)
359
    rpc.call_node_leave_cluster(master)
360

    
361

    
362
class LUVerifyCluster(LogicalUnit):
363
  """Verifies the cluster status.
364

365
  """
366
  HPATH = "cluster-verify"
367
  HTYPE = constants.HTYPE_CLUSTER
368
  _OP_REQP = ["skip_checks"]
369

    
370
  def _VerifyNode(self, node, file_list, local_cksum, vglist, node_result,
371
                  remote_version, feedback_fn):
372
    """Run multiple tests against a node.
373

374
    Test list:
375
      - compares ganeti version
376
      - checks vg existance and size > 20G
377
      - checks config file checksum
378
      - checks ssh to other nodes
379

380
    Args:
381
      node: name of the node to check
382
      file_list: required list of files
383
      local_cksum: dictionary of local files and their checksums
384

385
    """
386
    # compares ganeti version
387
    local_version = constants.PROTOCOL_VERSION
388
    if not remote_version:
389
      feedback_fn("  - ERROR: connection to %s failed" % (node))
390
      return True
391

    
392
    if local_version != remote_version:
393
      feedback_fn("  - ERROR: sw version mismatch: master %s, node(%s) %s" %
394
                      (local_version, node, remote_version))
395
      return True
396

    
397
    # checks vg existance and size > 20G
398

    
399
    bad = False
400
    if not vglist:
401
      feedback_fn("  - ERROR: unable to check volume groups on node %s." %
402
                      (node,))
403
      bad = True
404
    else:
405
      vgstatus = utils.CheckVolumeGroupSize(vglist, self.cfg.GetVGName(),
406
                                            constants.MIN_VG_SIZE)
407
      if vgstatus:
408
        feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
409
        bad = True
410

    
411
    # checks config file checksum
412
    # checks ssh to any
413

    
414
    if 'filelist' not in node_result:
415
      bad = True
416
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
417
    else:
418
      remote_cksum = node_result['filelist']
419
      for file_name in file_list:
420
        if file_name not in remote_cksum:
421
          bad = True
422
          feedback_fn("  - ERROR: file '%s' missing" % file_name)
423
        elif remote_cksum[file_name] != local_cksum[file_name]:
424
          bad = True
425
          feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
426

    
427
    if 'nodelist' not in node_result:
428
      bad = True
429
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
430
    else:
431
      if node_result['nodelist']:
432
        bad = True
433
        for node in node_result['nodelist']:
434
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
435
                          (node, node_result['nodelist'][node]))
436
    if 'node-net-test' not in node_result:
437
      bad = True
438
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
439
    else:
440
      if node_result['node-net-test']:
441
        bad = True
442
        nlist = utils.NiceSort(node_result['node-net-test'].keys())
443
        for node in nlist:
444
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
445
                          (node, node_result['node-net-test'][node]))
446

    
447
    hyp_result = node_result.get('hypervisor', None)
448
    if hyp_result is not None:
449
      feedback_fn("  - ERROR: hypervisor verify failure: '%s'" % hyp_result)
450
    return bad
451

    
452
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
453
                      node_instance, feedback_fn):
454
    """Verify an instance.
455

456
    This function checks to see if the required block devices are
457
    available on the instance's node.
458

459
    """
460
    bad = False
461

    
462
    node_current = instanceconfig.primary_node
463

    
464
    node_vol_should = {}
465
    instanceconfig.MapLVsByNode(node_vol_should)
466

    
467
    for node in node_vol_should:
468
      for volume in node_vol_should[node]:
469
        if node not in node_vol_is or volume not in node_vol_is[node]:
470
          feedback_fn("  - ERROR: volume %s missing on node %s" %
471
                          (volume, node))
472
          bad = True
473

    
474
    if not instanceconfig.status == 'down':
475
      if (node_current not in node_instance or
476
          not instance in node_instance[node_current]):
477
        feedback_fn("  - ERROR: instance %s not running on node %s" %
478
                        (instance, node_current))
479
        bad = True
480

    
481
    for node in node_instance:
482
      if (not node == node_current):
483
        if instance in node_instance[node]:
484
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
485
                          (instance, node))
486
          bad = True
487

    
488
    return bad
489

    
490
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
491
    """Verify if there are any unknown volumes in the cluster.
492

493
    The .os, .swap and backup volumes are ignored. All other volumes are
494
    reported as unknown.
495

496
    """
497
    bad = False
498

    
499
    for node in node_vol_is:
500
      for volume in node_vol_is[node]:
501
        if node not in node_vol_should or volume not in node_vol_should[node]:
502
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
503
                      (volume, node))
504
          bad = True
505
    return bad
506

    
507
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
508
    """Verify the list of running instances.
509

510
    This checks what instances are running but unknown to the cluster.
511

512
    """
513
    bad = False
514
    for node in node_instance:
515
      for runninginstance in node_instance[node]:
516
        if runninginstance not in instancelist:
517
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
518
                          (runninginstance, node))
519
          bad = True
520
    return bad
521

    
522
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
523
    """Verify N+1 Memory Resilience.
524

525
    Check that if one single node dies we can still start all the instances it
526
    was primary for.
527

528
    """
529
    bad = False
530

    
531
    for node, nodeinfo in node_info.iteritems():
532
      # This code checks that every node which is now listed as secondary has
533
      # enough memory to host all instances it is supposed to should a single
534
      # other node in the cluster fail.
535
      # FIXME: not ready for failover to an arbitrary node
536
      # FIXME: does not support file-backed instances
537
      # WARNING: we currently take into account down instances as well as up
538
      # ones, considering that even if they're down someone might want to start
539
      # them even in the event of a node failure.
540
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
541
        needed_mem = 0
542
        for instance in instances:
543
          needed_mem += instance_cfg[instance].memory
544
        if nodeinfo['mfree'] < needed_mem:
545
          feedback_fn("  - ERROR: not enough memory on node %s to accomodate"
546
                      " failovers should node %s fail" % (node, prinode))
547
          bad = True
548
    return bad
549

    
550
  def CheckPrereq(self):
551
    """Check prerequisites.
552

553
    Transform the list of checks we're going to skip into a set and check that
554
    all its members are valid.
555

556
    """
557
    self.skip_set = frozenset(self.op.skip_checks)
558
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
559
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
560

    
561
  def BuildHooksEnv(self):
562
    """Build hooks env.
563

564
    Cluster-Verify hooks just rone in the post phase and their failure makes
565
    the output be logged in the verify output and the verification to fail.
566

567
    """
568
    all_nodes = self.cfg.GetNodeList()
569
    # TODO: populate the environment with useful information for verify hooks
570
    env = {}
571
    return env, [], all_nodes
572

    
573
  def Exec(self, feedback_fn):
574
    """Verify integrity of cluster, performing various test on nodes.
575

576
    """
577
    bad = False
578
    feedback_fn("* Verifying global settings")
579
    for msg in self.cfg.VerifyConfig():
580
      feedback_fn("  - ERROR: %s" % msg)
581

    
582
    vg_name = self.cfg.GetVGName()
583
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
584
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
585
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
586
    i_non_redundant = [] # Non redundant instances
587
    node_volume = {}
588
    node_instance = {}
589
    node_info = {}
590
    instance_cfg = {}
591

    
592
    # FIXME: verify OS list
593
    # do local checksums
594
    file_names = list(self.sstore.GetFileList())
595
    file_names.append(constants.SSL_CERT_FILE)
596
    file_names.append(constants.CLUSTER_CONF_FILE)
597
    local_checksums = utils.FingerprintFiles(file_names)
598

    
599
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
600
    all_volumeinfo = rpc.call_volume_list(nodelist, vg_name)
601
    all_instanceinfo = rpc.call_instance_list(nodelist)
602
    all_vglist = rpc.call_vg_list(nodelist)
603
    node_verify_param = {
604
      'filelist': file_names,
605
      'nodelist': nodelist,
606
      'hypervisor': None,
607
      'node-net-test': [(node.name, node.primary_ip, node.secondary_ip)
608
                        for node in nodeinfo]
609
      }
610
    all_nvinfo = rpc.call_node_verify(nodelist, node_verify_param)
611
    all_rversion = rpc.call_version(nodelist)
612
    all_ninfo = rpc.call_node_info(nodelist, self.cfg.GetVGName())
613

    
614
    for node in nodelist:
615
      feedback_fn("* Verifying node %s" % node)
616
      result = self._VerifyNode(node, file_names, local_checksums,
617
                                all_vglist[node], all_nvinfo[node],
618
                                all_rversion[node], feedback_fn)
619
      bad = bad or result
620

    
621
      # node_volume
622
      volumeinfo = all_volumeinfo[node]
623

    
624
      if isinstance(volumeinfo, basestring):
625
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
626
                    (node, volumeinfo[-400:].encode('string_escape')))
627
        bad = True
628
        node_volume[node] = {}
629
      elif not isinstance(volumeinfo, dict):
630
        feedback_fn("  - ERROR: connection to %s failed" % (node,))
631
        bad = True
632
        continue
633
      else:
634
        node_volume[node] = volumeinfo
635

    
636
      # node_instance
637
      nodeinstance = all_instanceinfo[node]
638
      if type(nodeinstance) != list:
639
        feedback_fn("  - ERROR: connection to %s failed" % (node,))
640
        bad = True
641
        continue
642

    
643
      node_instance[node] = nodeinstance
644

    
645
      # node_info
646
      nodeinfo = all_ninfo[node]
647
      if not isinstance(nodeinfo, dict):
648
        feedback_fn("  - ERROR: connection to %s failed" % (node,))
649
        bad = True
650
        continue
651

    
652
      try:
653
        node_info[node] = {
654
          "mfree": int(nodeinfo['memory_free']),
655
          "dfree": int(nodeinfo['vg_free']),
656
          "pinst": [],
657
          "sinst": [],
658
          # dictionary holding all instances this node is secondary for,
659
          # grouped by their primary node. Each key is a cluster node, and each
660
          # value is a list of instances which have the key as primary and the
661
          # current node as secondary.  this is handy to calculate N+1 memory
662
          # availability if you can only failover from a primary to its
663
          # secondary.
664
          "sinst-by-pnode": {},
665
        }
666
      except ValueError:
667
        feedback_fn("  - ERROR: invalid value returned from node %s" % (node,))
668
        bad = True
669
        continue
670

    
671
    node_vol_should = {}
672

    
673
    for instance in instancelist:
674
      feedback_fn("* Verifying instance %s" % instance)
675
      inst_config = self.cfg.GetInstanceInfo(instance)
676
      result =  self._VerifyInstance(instance, inst_config, node_volume,
677
                                     node_instance, feedback_fn)
678
      bad = bad or result
679

    
680
      inst_config.MapLVsByNode(node_vol_should)
681

    
682
      instance_cfg[instance] = inst_config
683

    
684
      pnode = inst_config.primary_node
685
      if pnode in node_info:
686
        node_info[pnode]['pinst'].append(instance)
687
      else:
688
        feedback_fn("  - ERROR: instance %s, connection to primary node"
689
                    " %s failed" % (instance, pnode))
690
        bad = True
691

    
692
      # If the instance is non-redundant we cannot survive losing its primary
693
      # node, so we are not N+1 compliant. On the other hand we have no disk
694
      # templates with more than one secondary so that situation is not well
695
      # supported either.
696
      # FIXME: does not support file-backed instances
697
      if len(inst_config.secondary_nodes) == 0:
698
        i_non_redundant.append(instance)
699
      elif len(inst_config.secondary_nodes) > 1:
700
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
701
                    % instance)
702

    
703
      for snode in inst_config.secondary_nodes:
704
        if snode in node_info:
705
          node_info[snode]['sinst'].append(instance)
706
          if pnode not in node_info[snode]['sinst-by-pnode']:
707
            node_info[snode]['sinst-by-pnode'][pnode] = []
708
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
709
        else:
710
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
711
                      " %s failed" % (instance, snode))
712

    
713
    feedback_fn("* Verifying orphan volumes")
714
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
715
                                       feedback_fn)
716
    bad = bad or result
717

    
718
    feedback_fn("* Verifying remaining instances")
719
    result = self._VerifyOrphanInstances(instancelist, node_instance,
720
                                         feedback_fn)
721
    bad = bad or result
722

    
723
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
724
      feedback_fn("* Verifying N+1 Memory redundancy")
725
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
726
      bad = bad or result
727

    
728
    feedback_fn("* Other Notes")
729
    if i_non_redundant:
730
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
731
                  % len(i_non_redundant))
732

    
733
    return int(bad)
734

    
735
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
736
    """Analize the post-hooks' result, handle it, and send some
737
    nicely-formatted feedback back to the user.
738

739
    Args:
740
      phase: the hooks phase that has just been run
741
      hooks_results: the results of the multi-node hooks rpc call
742
      feedback_fn: function to send feedback back to the caller
743
      lu_result: previous Exec result
744

745
    """
746
    # We only really run POST phase hooks, and are only interested in their results
747
    if phase == constants.HOOKS_PHASE_POST:
748
      # Used to change hooks' output to proper indentation
749
      indent_re = re.compile('^', re.M)
750
      feedback_fn("* Hooks Results")
751
      if not hooks_results:
752
        feedback_fn("  - ERROR: general communication failure")
753
        lu_result = 1
754
      else:
755
        for node_name in hooks_results:
756
          show_node_header = True
757
          res = hooks_results[node_name]
758
          if res is False or not isinstance(res, list):
759
            feedback_fn("    Communication failure")
760
            lu_result = 1
761
            continue
762
          for script, hkr, output in res:
763
            if hkr == constants.HKR_FAIL:
764
              # The node header is only shown once, if there are
765
              # failing hooks on that node
766
              if show_node_header:
767
                feedback_fn("  Node %s:" % node_name)
768
                show_node_header = False
769
              feedback_fn("    ERROR: Script %s failed, output:" % script)
770
              output = indent_re.sub('      ', output)
771
              feedback_fn("%s" % output)
772
              lu_result = 1
773

    
774
      return lu_result
775

    
776

    
777
class LUVerifyDisks(NoHooksLU):
778
  """Verifies the cluster disks status.
779

780
  """
781
  _OP_REQP = []
782

    
783
  def CheckPrereq(self):
784
    """Check prerequisites.
785

786
    This has no prerequisites.
787

788
    """
789
    pass
790

    
791
  def Exec(self, feedback_fn):
792
    """Verify integrity of cluster disks.
793

794
    """
795
    result = res_nodes, res_nlvm, res_instances, res_missing = [], {}, [], {}
796

    
797
    vg_name = self.cfg.GetVGName()
798
    nodes = utils.NiceSort(self.cfg.GetNodeList())
799
    instances = [self.cfg.GetInstanceInfo(name)
800
                 for name in self.cfg.GetInstanceList()]
801

    
802
    nv_dict = {}
803
    for inst in instances:
804
      inst_lvs = {}
805
      if (inst.status != "up" or
806
          inst.disk_template not in constants.DTS_NET_MIRROR):
807
        continue
808
      inst.MapLVsByNode(inst_lvs)
809
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
810
      for node, vol_list in inst_lvs.iteritems():
811
        for vol in vol_list:
812
          nv_dict[(node, vol)] = inst
813

    
814
    if not nv_dict:
815
      return result
816

    
817
    node_lvs = rpc.call_volume_list(nodes, vg_name)
818

    
819
    to_act = set()
820
    for node in nodes:
821
      # node_volume
822
      lvs = node_lvs[node]
823

    
824
      if isinstance(lvs, basestring):
825
        logger.Info("error enumerating LVs on node %s: %s" % (node, lvs))
826
        res_nlvm[node] = lvs
827
      elif not isinstance(lvs, dict):
828
        logger.Info("connection to node %s failed or invalid data returned" %
829
                    (node,))
830
        res_nodes.append(node)
831
        continue
832

    
833
      for lv_name, (_, lv_inactive, lv_online) in lvs.iteritems():
834
        inst = nv_dict.pop((node, lv_name), None)
835
        if (not lv_online and inst is not None
836
            and inst.name not in res_instances):
837
          res_instances.append(inst.name)
838

    
839
    # any leftover items in nv_dict are missing LVs, let's arrange the
840
    # data better
841
    for key, inst in nv_dict.iteritems():
842
      if inst.name not in res_missing:
843
        res_missing[inst.name] = []
844
      res_missing[inst.name].append(key)
845

    
846
    return result
847

    
848

    
849
class LURenameCluster(LogicalUnit):
850
  """Rename the cluster.
851

852
  """
853
  HPATH = "cluster-rename"
854
  HTYPE = constants.HTYPE_CLUSTER
855
  _OP_REQP = ["name"]
856
  REQ_WSSTORE = True
857

    
858
  def BuildHooksEnv(self):
859
    """Build hooks env.
860

861
    """
862
    env = {
863
      "OP_TARGET": self.sstore.GetClusterName(),
864
      "NEW_NAME": self.op.name,
865
      }
866
    mn = self.sstore.GetMasterNode()
867
    return env, [mn], [mn]
868

    
869
  def CheckPrereq(self):
870
    """Verify that the passed name is a valid one.
871

872
    """
873
    hostname = utils.HostInfo(self.op.name)
874

    
875
    new_name = hostname.name
876
    self.ip = new_ip = hostname.ip
877
    old_name = self.sstore.GetClusterName()
878
    old_ip = self.sstore.GetMasterIP()
879
    if new_name == old_name and new_ip == old_ip:
880
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
881
                                 " cluster has changed")
882
    if new_ip != old_ip:
883
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
884
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
885
                                   " reachable on the network. Aborting." %
886
                                   new_ip)
887

    
888
    self.op.name = new_name
889

    
890
  def Exec(self, feedback_fn):
891
    """Rename the cluster.
892

893
    """
894
    clustername = self.op.name
895
    ip = self.ip
896
    ss = self.sstore
897

    
898
    # shutdown the master IP
899
    master = ss.GetMasterNode()
900
    if not rpc.call_node_stop_master(master):
901
      raise errors.OpExecError("Could not disable the master role")
902

    
903
    try:
904
      # modify the sstore
905
      ss.SetKey(ss.SS_MASTER_IP, ip)
906
      ss.SetKey(ss.SS_CLUSTER_NAME, clustername)
907

    
908
      # Distribute updated ss config to all nodes
909
      myself = self.cfg.GetNodeInfo(master)
910
      dist_nodes = self.cfg.GetNodeList()
911
      if myself.name in dist_nodes:
912
        dist_nodes.remove(myself.name)
913

    
914
      logger.Debug("Copying updated ssconf data to all nodes")
915
      for keyname in [ss.SS_CLUSTER_NAME, ss.SS_MASTER_IP]:
916
        fname = ss.KeyToFilename(keyname)
917
        result = rpc.call_upload_file(dist_nodes, fname)
918
        for to_node in dist_nodes:
919
          if not result[to_node]:
920
            logger.Error("copy of file %s to node %s failed" %
921
                         (fname, to_node))
922
    finally:
923
      if not rpc.call_node_start_master(master):
924
        logger.Error("Could not re-enable the master role on the master,"
925
                     " please restart manually.")
926

    
927

    
928
def _RecursiveCheckIfLVMBased(disk):
929
  """Check if the given disk or its children are lvm-based.
930

931
  Args:
932
    disk: ganeti.objects.Disk object
933

934
  Returns:
935
    boolean indicating whether a LD_LV dev_type was found or not
936

937
  """
938
  if disk.children:
939
    for chdisk in disk.children:
940
      if _RecursiveCheckIfLVMBased(chdisk):
941
        return True
942
  return disk.dev_type == constants.LD_LV
943

    
944

    
945
class LUSetClusterParams(LogicalUnit):
946
  """Change the parameters of the cluster.
947

948
  """
949
  HPATH = "cluster-modify"
950
  HTYPE = constants.HTYPE_CLUSTER
951
  _OP_REQP = []
952

    
953
  def BuildHooksEnv(self):
954
    """Build hooks env.
955

956
    """
957
    env = {
958
      "OP_TARGET": self.sstore.GetClusterName(),
959
      "NEW_VG_NAME": self.op.vg_name,
960
      }
961
    mn = self.sstore.GetMasterNode()
962
    return env, [mn], [mn]
963

    
964
  def CheckPrereq(self):
965
    """Check prerequisites.
966

967
    This checks whether the given params don't conflict and
968
    if the given volume group is valid.
969

970
    """
971
    if not self.op.vg_name:
972
      instances = [self.cfg.GetInstanceInfo(name)
973
                   for name in self.cfg.GetInstanceList()]
974
      for inst in instances:
975
        for disk in inst.disks:
976
          if _RecursiveCheckIfLVMBased(disk):
977
            raise errors.OpPrereqError("Cannot disable lvm storage while"
978
                                       " lvm-based instances exist")
979

    
980
    # if vg_name not None, checks given volume group on all nodes
981
    if self.op.vg_name:
982
      node_list = self.cfg.GetNodeList()
983
      vglist = rpc.call_vg_list(node_list)
984
      for node in node_list:
985
        vgstatus = utils.CheckVolumeGroupSize(vglist[node], self.op.vg_name,
986
                                              constants.MIN_VG_SIZE)
987
        if vgstatus:
988
          raise errors.OpPrereqError("Error on node '%s': %s" %
989
                                     (node, vgstatus))
990

    
991
  def Exec(self, feedback_fn):
992
    """Change the parameters of the cluster.
993

994
    """
995
    if self.op.vg_name != self.cfg.GetVGName():
996
      self.cfg.SetVGName(self.op.vg_name)
997
    else:
998
      feedback_fn("Cluster LVM configuration already in desired"
999
                  " state, not changing")
1000

    
1001

    
1002
def _WaitForSync(cfgw, instance, proc, oneshot=False, unlock=False):
1003
  """Sleep and poll for an instance's disk to sync.
1004

1005
  """
1006
  if not instance.disks:
1007
    return True
1008

    
1009
  if not oneshot:
1010
    proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1011

    
1012
  node = instance.primary_node
1013

    
1014
  for dev in instance.disks:
1015
    cfgw.SetDiskID(dev, node)
1016

    
1017
  retries = 0
1018
  while True:
1019
    max_time = 0
1020
    done = True
1021
    cumul_degraded = False
1022
    rstats = rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1023
    if not rstats:
1024
      proc.LogWarning("Can't get any data from node %s" % node)
1025
      retries += 1
1026
      if retries >= 10:
1027
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1028
                                 " aborting." % node)
1029
      time.sleep(6)
1030
      continue
1031
    retries = 0
1032
    for i in range(len(rstats)):
1033
      mstat = rstats[i]
1034
      if mstat is None:
1035
        proc.LogWarning("Can't compute data for node %s/%s" %
1036
                        (node, instance.disks[i].iv_name))
1037
        continue
1038
      # we ignore the ldisk parameter
1039
      perc_done, est_time, is_degraded, _ = mstat
1040
      cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1041
      if perc_done is not None:
1042
        done = False
1043
        if est_time is not None:
1044
          rem_time = "%d estimated seconds remaining" % est_time
1045
          max_time = est_time
1046
        else:
1047
          rem_time = "no time estimate"
1048
        proc.LogInfo("- device %s: %5.2f%% done, %s" %
1049
                     (instance.disks[i].iv_name, perc_done, rem_time))
1050
    if done or oneshot:
1051
      break
1052

    
1053
    if unlock:
1054
      #utils.Unlock('cmd')
1055
      pass
1056
    try:
1057
      time.sleep(min(60, max_time))
1058
    finally:
1059
      if unlock:
1060
        #utils.Lock('cmd')
1061
        pass
1062

    
1063
  if done:
1064
    proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1065
  return not cumul_degraded
1066

    
1067

    
1068
def _CheckDiskConsistency(cfgw, dev, node, on_primary, ldisk=False):
1069
  """Check that mirrors are not degraded.
1070

1071
  The ldisk parameter, if True, will change the test from the
1072
  is_degraded attribute (which represents overall non-ok status for
1073
  the device(s)) to the ldisk (representing the local storage status).
1074

1075
  """
1076
  cfgw.SetDiskID(dev, node)
1077
  if ldisk:
1078
    idx = 6
1079
  else:
1080
    idx = 5
1081

    
1082
  result = True
1083
  if on_primary or dev.AssembleOnSecondary():
1084
    rstats = rpc.call_blockdev_find(node, dev)
1085
    if not rstats:
1086
      logger.ToStderr("Node %s: Disk degraded, not found or node down" % node)
1087
      result = False
1088
    else:
1089
      result = result and (not rstats[idx])
1090
  if dev.children:
1091
    for child in dev.children:
1092
      result = result and _CheckDiskConsistency(cfgw, child, node, on_primary)
1093

    
1094
  return result
1095

    
1096

    
1097
class LUDiagnoseOS(NoHooksLU):
1098
  """Logical unit for OS diagnose/query.
1099

1100
  """
1101
  _OP_REQP = ["output_fields", "names"]
1102

    
1103
  def CheckPrereq(self):
1104
    """Check prerequisites.
1105

1106
    This always succeeds, since this is a pure query LU.
1107

1108
    """
1109
    if self.op.names:
1110
      raise errors.OpPrereqError("Selective OS query not supported")
1111

    
1112
    self.dynamic_fields = frozenset(["name", "valid", "node_status"])
1113
    _CheckOutputFields(static=[],
1114
                       dynamic=self.dynamic_fields,
1115
                       selected=self.op.output_fields)
1116

    
1117
  @staticmethod
1118
  def _DiagnoseByOS(node_list, rlist):
1119
    """Remaps a per-node return list into an a per-os per-node dictionary
1120

1121
      Args:
1122
        node_list: a list with the names of all nodes
1123
        rlist: a map with node names as keys and OS objects as values
1124

1125
      Returns:
1126
        map: a map with osnames as keys and as value another map, with
1127
             nodes as
1128
             keys and list of OS objects as values
1129
             e.g. {"debian-etch": {"node1": [<object>,...],
1130
                                   "node2": [<object>,]}
1131
                  }
1132

1133
    """
1134
    all_os = {}
1135
    for node_name, nr in rlist.iteritems():
1136
      if not nr:
1137
        continue
1138
      for os_obj in nr:
1139
        if os_obj.name not in all_os:
1140
          # build a list of nodes for this os containing empty lists
1141
          # for each node in node_list
1142
          all_os[os_obj.name] = {}
1143
          for nname in node_list:
1144
            all_os[os_obj.name][nname] = []
1145
        all_os[os_obj.name][node_name].append(os_obj)
1146
    return all_os
1147

    
1148
  def Exec(self, feedback_fn):
1149
    """Compute the list of OSes.
1150

1151
    """
1152
    node_list = self.cfg.GetNodeList()
1153
    node_data = rpc.call_os_diagnose(node_list)
1154
    if node_data == False:
1155
      raise errors.OpExecError("Can't gather the list of OSes")
1156
    pol = self._DiagnoseByOS(node_list, node_data)
1157
    output = []
1158
    for os_name, os_data in pol.iteritems():
1159
      row = []
1160
      for field in self.op.output_fields:
1161
        if field == "name":
1162
          val = os_name
1163
        elif field == "valid":
1164
          val = utils.all([osl and osl[0] for osl in os_data.values()])
1165
        elif field == "node_status":
1166
          val = {}
1167
          for node_name, nos_list in os_data.iteritems():
1168
            val[node_name] = [(v.status, v.path) for v in nos_list]
1169
        else:
1170
          raise errors.ParameterError(field)
1171
        row.append(val)
1172
      output.append(row)
1173

    
1174
    return output
1175

    
1176

    
1177
class LURemoveNode(LogicalUnit):
1178
  """Logical unit for removing a node.
1179

1180
  """
1181
  HPATH = "node-remove"
1182
  HTYPE = constants.HTYPE_NODE
1183
  _OP_REQP = ["node_name"]
1184

    
1185
  def BuildHooksEnv(self):
1186
    """Build hooks env.
1187

1188
    This doesn't run on the target node in the pre phase as a failed
1189
    node would then be impossible to remove.
1190

1191
    """
1192
    env = {
1193
      "OP_TARGET": self.op.node_name,
1194
      "NODE_NAME": self.op.node_name,
1195
      }
1196
    all_nodes = self.cfg.GetNodeList()
1197
    all_nodes.remove(self.op.node_name)
1198
    return env, all_nodes, all_nodes
1199

    
1200
  def CheckPrereq(self):
1201
    """Check prerequisites.
1202

1203
    This checks:
1204
     - the node exists in the configuration
1205
     - it does not have primary or secondary instances
1206
     - it's not the master
1207

1208
    Any errors are signalled by raising errors.OpPrereqError.
1209

1210
    """
1211
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1212
    if node is None:
1213
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1214

    
1215
    instance_list = self.cfg.GetInstanceList()
1216

    
1217
    masternode = self.sstore.GetMasterNode()
1218
    if node.name == masternode:
1219
      raise errors.OpPrereqError("Node is the master node,"
1220
                                 " you need to failover first.")
1221

    
1222
    for instance_name in instance_list:
1223
      instance = self.cfg.GetInstanceInfo(instance_name)
1224
      if node.name == instance.primary_node:
1225
        raise errors.OpPrereqError("Instance %s still running on the node,"
1226
                                   " please remove first." % instance_name)
1227
      if node.name in instance.secondary_nodes:
1228
        raise errors.OpPrereqError("Instance %s has node as a secondary,"
1229
                                   " please remove first." % instance_name)
1230
    self.op.node_name = node.name
1231
    self.node = node
1232

    
1233
  def Exec(self, feedback_fn):
1234
    """Removes the node from the cluster.
1235

1236
    """
1237
    node = self.node
1238
    logger.Info("stopping the node daemon and removing configs from node %s" %
1239
                node.name)
1240

    
1241
    rpc.call_node_leave_cluster(node.name)
1242

    
1243
    logger.Info("Removing node %s from config" % node.name)
1244

    
1245
    self.cfg.RemoveNode(node.name)
1246

    
1247
    utils.RemoveHostFromEtcHosts(node.name)
1248

    
1249

    
1250
class LUQueryNodes(NoHooksLU):
1251
  """Logical unit for querying nodes.
1252

1253
  """
1254
  _OP_REQP = ["output_fields", "names"]
1255

    
1256
  def CheckPrereq(self):
1257
    """Check prerequisites.
1258

1259
    This checks that the fields required are valid output fields.
1260

1261
    """
1262
    self.dynamic_fields = frozenset([
1263
      "dtotal", "dfree",
1264
      "mtotal", "mnode", "mfree",
1265
      "bootid",
1266
      "ctotal",
1267
      ])
1268

    
1269
    _CheckOutputFields(static=["name", "pinst_cnt", "sinst_cnt",
1270
                               "pinst_list", "sinst_list",
1271
                               "pip", "sip", "tags"],
1272
                       dynamic=self.dynamic_fields,
1273
                       selected=self.op.output_fields)
1274

    
1275
    self.wanted = _GetWantedNodes(self, self.op.names)
1276

    
1277
  def Exec(self, feedback_fn):
1278
    """Computes the list of nodes and their attributes.
1279

1280
    """
1281
    nodenames = self.wanted
1282
    nodelist = [self.cfg.GetNodeInfo(name) for name in nodenames]
1283

    
1284
    # begin data gathering
1285

    
1286
    if self.dynamic_fields.intersection(self.op.output_fields):
1287
      live_data = {}
1288
      node_data = rpc.call_node_info(nodenames, self.cfg.GetVGName())
1289
      for name in nodenames:
1290
        nodeinfo = node_data.get(name, None)
1291
        if nodeinfo:
1292
          live_data[name] = {
1293
            "mtotal": utils.TryConvert(int, nodeinfo['memory_total']),
1294
            "mnode": utils.TryConvert(int, nodeinfo['memory_dom0']),
1295
            "mfree": utils.TryConvert(int, nodeinfo['memory_free']),
1296
            "dtotal": utils.TryConvert(int, nodeinfo['vg_size']),
1297
            "dfree": utils.TryConvert(int, nodeinfo['vg_free']),
1298
            "ctotal": utils.TryConvert(int, nodeinfo['cpu_total']),
1299
            "bootid": nodeinfo['bootid'],
1300
            }
1301
        else:
1302
          live_data[name] = {}
1303
    else:
1304
      live_data = dict.fromkeys(nodenames, {})
1305

    
1306
    node_to_primary = dict([(name, set()) for name in nodenames])
1307
    node_to_secondary = dict([(name, set()) for name in nodenames])
1308

    
1309
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
1310
                             "sinst_cnt", "sinst_list"))
1311
    if inst_fields & frozenset(self.op.output_fields):
1312
      instancelist = self.cfg.GetInstanceList()
1313

    
1314
      for instance_name in instancelist:
1315
        inst = self.cfg.GetInstanceInfo(instance_name)
1316
        if inst.primary_node in node_to_primary:
1317
          node_to_primary[inst.primary_node].add(inst.name)
1318
        for secnode in inst.secondary_nodes:
1319
          if secnode in node_to_secondary:
1320
            node_to_secondary[secnode].add(inst.name)
1321

    
1322
    # end data gathering
1323

    
1324
    output = []
1325
    for node in nodelist:
1326
      node_output = []
1327
      for field in self.op.output_fields:
1328
        if field == "name":
1329
          val = node.name
1330
        elif field == "pinst_list":
1331
          val = list(node_to_primary[node.name])
1332
        elif field == "sinst_list":
1333
          val = list(node_to_secondary[node.name])
1334
        elif field == "pinst_cnt":
1335
          val = len(node_to_primary[node.name])
1336
        elif field == "sinst_cnt":
1337
          val = len(node_to_secondary[node.name])
1338
        elif field == "pip":
1339
          val = node.primary_ip
1340
        elif field == "sip":
1341
          val = node.secondary_ip
1342
        elif field == "tags":
1343
          val = list(node.GetTags())
1344
        elif field in self.dynamic_fields:
1345
          val = live_data[node.name].get(field, None)
1346
        else:
1347
          raise errors.ParameterError(field)
1348
        node_output.append(val)
1349
      output.append(node_output)
1350

    
1351
    return output
1352

    
1353

    
1354
class LUQueryNodeVolumes(NoHooksLU):
1355
  """Logical unit for getting volumes on node(s).
1356

1357
  """
1358
  _OP_REQP = ["nodes", "output_fields"]
1359

    
1360
  def CheckPrereq(self):
1361
    """Check prerequisites.
1362

1363
    This checks that the fields required are valid output fields.
1364

1365
    """
1366
    self.nodes = _GetWantedNodes(self, self.op.nodes)
1367

    
1368
    _CheckOutputFields(static=["node"],
1369
                       dynamic=["phys", "vg", "name", "size", "instance"],
1370
                       selected=self.op.output_fields)
1371

    
1372

    
1373
  def Exec(self, feedback_fn):
1374
    """Computes the list of nodes and their attributes.
1375

1376
    """
1377
    nodenames = self.nodes
1378
    volumes = rpc.call_node_volumes(nodenames)
1379

    
1380
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
1381
             in self.cfg.GetInstanceList()]
1382

    
1383
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
1384

    
1385
    output = []
1386
    for node in nodenames:
1387
      if node not in volumes or not volumes[node]:
1388
        continue
1389

    
1390
      node_vols = volumes[node][:]
1391
      node_vols.sort(key=lambda vol: vol['dev'])
1392

    
1393
      for vol in node_vols:
1394
        node_output = []
1395
        for field in self.op.output_fields:
1396
          if field == "node":
1397
            val = node
1398
          elif field == "phys":
1399
            val = vol['dev']
1400
          elif field == "vg":
1401
            val = vol['vg']
1402
          elif field == "name":
1403
            val = vol['name']
1404
          elif field == "size":
1405
            val = int(float(vol['size']))
1406
          elif field == "instance":
1407
            for inst in ilist:
1408
              if node not in lv_by_node[inst]:
1409
                continue
1410
              if vol['name'] in lv_by_node[inst][node]:
1411
                val = inst.name
1412
                break
1413
            else:
1414
              val = '-'
1415
          else:
1416
            raise errors.ParameterError(field)
1417
          node_output.append(str(val))
1418

    
1419
        output.append(node_output)
1420

    
1421
    return output
1422

    
1423

    
1424
class LUAddNode(LogicalUnit):
1425
  """Logical unit for adding node to the cluster.
1426

1427
  """
1428
  HPATH = "node-add"
1429
  HTYPE = constants.HTYPE_NODE
1430
  _OP_REQP = ["node_name"]
1431

    
1432
  def BuildHooksEnv(self):
1433
    """Build hooks env.
1434

1435
    This will run on all nodes before, and on all nodes + the new node after.
1436

1437
    """
1438
    env = {
1439
      "OP_TARGET": self.op.node_name,
1440
      "NODE_NAME": self.op.node_name,
1441
      "NODE_PIP": self.op.primary_ip,
1442
      "NODE_SIP": self.op.secondary_ip,
1443
      }
1444
    nodes_0 = self.cfg.GetNodeList()
1445
    nodes_1 = nodes_0 + [self.op.node_name, ]
1446
    return env, nodes_0, nodes_1
1447

    
1448
  def CheckPrereq(self):
1449
    """Check prerequisites.
1450

1451
    This checks:
1452
     - the new node is not already in the config
1453
     - it is resolvable
1454
     - its parameters (single/dual homed) matches the cluster
1455

1456
    Any errors are signalled by raising errors.OpPrereqError.
1457

1458
    """
1459
    node_name = self.op.node_name
1460
    cfg = self.cfg
1461

    
1462
    dns_data = utils.HostInfo(node_name)
1463

    
1464
    node = dns_data.name
1465
    primary_ip = self.op.primary_ip = dns_data.ip
1466
    secondary_ip = getattr(self.op, "secondary_ip", None)
1467
    if secondary_ip is None:
1468
      secondary_ip = primary_ip
1469
    if not utils.IsValidIP(secondary_ip):
1470
      raise errors.OpPrereqError("Invalid secondary IP given")
1471
    self.op.secondary_ip = secondary_ip
1472

    
1473
    node_list = cfg.GetNodeList()
1474
    if not self.op.readd and node in node_list:
1475
      raise errors.OpPrereqError("Node %s is already in the configuration" %
1476
                                 node)
1477
    elif self.op.readd and node not in node_list:
1478
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
1479

    
1480
    for existing_node_name in node_list:
1481
      existing_node = cfg.GetNodeInfo(existing_node_name)
1482

    
1483
      if self.op.readd and node == existing_node_name:
1484
        if (existing_node.primary_ip != primary_ip or
1485
            existing_node.secondary_ip != secondary_ip):
1486
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
1487
                                     " address configuration as before")
1488
        continue
1489

    
1490
      if (existing_node.primary_ip == primary_ip or
1491
          existing_node.secondary_ip == primary_ip or
1492
          existing_node.primary_ip == secondary_ip or
1493
          existing_node.secondary_ip == secondary_ip):
1494
        raise errors.OpPrereqError("New node ip address(es) conflict with"
1495
                                   " existing node %s" % existing_node.name)
1496

    
1497
    # check that the type of the node (single versus dual homed) is the
1498
    # same as for the master
1499
    myself = cfg.GetNodeInfo(self.sstore.GetMasterNode())
1500
    master_singlehomed = myself.secondary_ip == myself.primary_ip
1501
    newbie_singlehomed = secondary_ip == primary_ip
1502
    if master_singlehomed != newbie_singlehomed:
1503
      if master_singlehomed:
1504
        raise errors.OpPrereqError("The master has no private ip but the"
1505
                                   " new node has one")
1506
      else:
1507
        raise errors.OpPrereqError("The master has a private ip but the"
1508
                                   " new node doesn't have one")
1509

    
1510
    # checks reachablity
1511
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
1512
      raise errors.OpPrereqError("Node not reachable by ping")
1513

    
1514
    if not newbie_singlehomed:
1515
      # check reachability from my secondary ip to newbie's secondary ip
1516
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
1517
                           source=myself.secondary_ip):
1518
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
1519
                                   " based ping to noded port")
1520

    
1521
    self.new_node = objects.Node(name=node,
1522
                                 primary_ip=primary_ip,
1523
                                 secondary_ip=secondary_ip)
1524

    
1525
  def Exec(self, feedback_fn):
1526
    """Adds the new node to the cluster.
1527

1528
    """
1529
    new_node = self.new_node
1530
    node = new_node.name
1531

    
1532
    # set up inter-node password and certificate and restarts the node daemon
1533
    gntpass = self.sstore.GetNodeDaemonPassword()
1534
    if not re.match('^[a-zA-Z0-9.]{1,64}$', gntpass):
1535
      raise errors.OpExecError("ganeti password corruption detected")
1536
    f = open(constants.SSL_CERT_FILE)
1537
    try:
1538
      gntpem = f.read(8192)
1539
    finally:
1540
      f.close()
1541
    # in the base64 pem encoding, neither '!' nor '.' are valid chars,
1542
    # so we use this to detect an invalid certificate; as long as the
1543
    # cert doesn't contain this, the here-document will be correctly
1544
    # parsed by the shell sequence below
1545
    if re.search('^!EOF\.', gntpem, re.MULTILINE):
1546
      raise errors.OpExecError("invalid PEM encoding in the SSL certificate")
1547
    if not gntpem.endswith("\n"):
1548
      raise errors.OpExecError("PEM must end with newline")
1549
    logger.Info("copy cluster pass to %s and starting the node daemon" % node)
1550

    
1551
    # and then connect with ssh to set password and start ganeti-noded
1552
    # note that all the below variables are sanitized at this point,
1553
    # either by being constants or by the checks above
1554
    ss = self.sstore
1555
    mycommand = ("umask 077 && "
1556
                 "echo '%s' > '%s' && "
1557
                 "cat > '%s' << '!EOF.' && \n"
1558
                 "%s!EOF.\n%s restart" %
1559
                 (gntpass, ss.KeyToFilename(ss.SS_NODED_PASS),
1560
                  constants.SSL_CERT_FILE, gntpem,
1561
                  constants.NODE_INITD_SCRIPT))
1562

    
1563
    result = self.ssh.Run(node, 'root', mycommand, batch=False, ask_key=True)
1564
    if result.failed:
1565
      raise errors.OpExecError("Remote command on node %s, error: %s,"
1566
                               " output: %s" %
1567
                               (node, result.fail_reason, result.output))
1568

    
1569
    # check connectivity
1570
    time.sleep(4)
1571

    
1572
    result = rpc.call_version([node])[node]
1573
    if result:
1574
      if constants.PROTOCOL_VERSION == result:
1575
        logger.Info("communication to node %s fine, sw version %s match" %
1576
                    (node, result))
1577
      else:
1578
        raise errors.OpExecError("Version mismatch master version %s,"
1579
                                 " node version %s" %
1580
                                 (constants.PROTOCOL_VERSION, result))
1581
    else:
1582
      raise errors.OpExecError("Cannot get version from the new node")
1583

    
1584
    # setup ssh on node
1585
    logger.Info("copy ssh key to node %s" % node)
1586
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1587
    keyarray = []
1588
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
1589
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
1590
                priv_key, pub_key]
1591

    
1592
    for i in keyfiles:
1593
      f = open(i, 'r')
1594
      try:
1595
        keyarray.append(f.read())
1596
      finally:
1597
        f.close()
1598

    
1599
    result = rpc.call_node_add(node, keyarray[0], keyarray[1], keyarray[2],
1600
                               keyarray[3], keyarray[4], keyarray[5])
1601

    
1602
    if not result:
1603
      raise errors.OpExecError("Cannot transfer ssh keys to the new node")
1604

    
1605
    # Add node to our /etc/hosts, and add key to known_hosts
1606
    utils.AddHostToEtcHosts(new_node.name)
1607

    
1608
    if new_node.secondary_ip != new_node.primary_ip:
1609
      if not rpc.call_node_tcp_ping(new_node.name,
1610
                                    constants.LOCALHOST_IP_ADDRESS,
1611
                                    new_node.secondary_ip,
1612
                                    constants.DEFAULT_NODED_PORT,
1613
                                    10, False):
1614
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
1615
                                 " you gave (%s). Please fix and re-run this"
1616
                                 " command." % new_node.secondary_ip)
1617

    
1618
    node_verify_list = [self.sstore.GetMasterNode()]
1619
    node_verify_param = {
1620
      'nodelist': [node],
1621
      # TODO: do a node-net-test as well?
1622
    }
1623

    
1624
    result = rpc.call_node_verify(node_verify_list, node_verify_param)
1625
    for verifier in node_verify_list:
1626
      if not result[verifier]:
1627
        raise errors.OpExecError("Cannot communicate with %s's node daemon"
1628
                                 " for remote verification" % verifier)
1629
      if result[verifier]['nodelist']:
1630
        for failed in result[verifier]['nodelist']:
1631
          feedback_fn("ssh/hostname verification failed %s -> %s" %
1632
                      (verifier, result[verifier]['nodelist'][failed]))
1633
        raise errors.OpExecError("ssh/hostname verification failed.")
1634

    
1635
    # Distribute updated /etc/hosts and known_hosts to all nodes,
1636
    # including the node just added
1637
    myself = self.cfg.GetNodeInfo(self.sstore.GetMasterNode())
1638
    dist_nodes = self.cfg.GetNodeList()
1639
    if not self.op.readd:
1640
      dist_nodes.append(node)
1641
    if myself.name in dist_nodes:
1642
      dist_nodes.remove(myself.name)
1643

    
1644
    logger.Debug("Copying hosts and known_hosts to all nodes")
1645
    for fname in (constants.ETC_HOSTS, constants.SSH_KNOWN_HOSTS_FILE):
1646
      result = rpc.call_upload_file(dist_nodes, fname)
1647
      for to_node in dist_nodes:
1648
        if not result[to_node]:
1649
          logger.Error("copy of file %s to node %s failed" %
1650
                       (fname, to_node))
1651

    
1652
    to_copy = self.sstore.GetFileList()
1653
    if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31:
1654
      to_copy.append(constants.VNC_PASSWORD_FILE)
1655
    for fname in to_copy:
1656
      result = rpc.call_upload_file([node], fname)
1657
      if not result[node]:
1658
        logger.Error("could not copy file %s to node %s" % (fname, node))
1659

    
1660
    if not self.op.readd:
1661
      logger.Info("adding node %s to cluster.conf" % node)
1662
      self.cfg.AddNode(new_node)
1663

    
1664

    
1665
class LUMasterFailover(LogicalUnit):
1666
  """Failover the master node to the current node.
1667

1668
  This is a special LU in that it must run on a non-master node.
1669

1670
  """
1671
  HPATH = "master-failover"
1672
  HTYPE = constants.HTYPE_CLUSTER
1673
  REQ_MASTER = False
1674
  REQ_WSSTORE = True
1675
  _OP_REQP = []
1676

    
1677
  def BuildHooksEnv(self):
1678
    """Build hooks env.
1679

1680
    This will run on the new master only in the pre phase, and on all
1681
    the nodes in the post phase.
1682

1683
    """
1684
    env = {
1685
      "OP_TARGET": self.new_master,
1686
      "NEW_MASTER": self.new_master,
1687
      "OLD_MASTER": self.old_master,
1688
      }
1689
    return env, [self.new_master], self.cfg.GetNodeList()
1690

    
1691
  def CheckPrereq(self):
1692
    """Check prerequisites.
1693

1694
    This checks that we are not already the master.
1695

1696
    """
1697
    self.new_master = utils.HostInfo().name
1698
    self.old_master = self.sstore.GetMasterNode()
1699

    
1700
    if self.old_master == self.new_master:
1701
      raise errors.OpPrereqError("This commands must be run on the node"
1702
                                 " where you want the new master to be."
1703
                                 " %s is already the master" %
1704
                                 self.old_master)
1705

    
1706
  def Exec(self, feedback_fn):
1707
    """Failover the master node.
1708

1709
    This command, when run on a non-master node, will cause the current
1710
    master to cease being master, and the non-master to become new
1711
    master.
1712

1713
    """
1714
    #TODO: do not rely on gethostname returning the FQDN
1715
    logger.Info("setting master to %s, old master: %s" %
1716
                (self.new_master, self.old_master))
1717

    
1718
    if not rpc.call_node_stop_master(self.old_master):
1719
      logger.Error("could disable the master role on the old master"
1720
                   " %s, please disable manually" % self.old_master)
1721

    
1722
    ss = self.sstore
1723
    ss.SetKey(ss.SS_MASTER_NODE, self.new_master)
1724
    if not rpc.call_upload_file(self.cfg.GetNodeList(),
1725
                                ss.KeyToFilename(ss.SS_MASTER_NODE)):
1726
      logger.Error("could not distribute the new simple store master file"
1727
                   " to the other nodes, please check.")
1728

    
1729
    if not rpc.call_node_start_master(self.new_master):
1730
      logger.Error("could not start the master role on the new master"
1731
                   " %s, please check" % self.new_master)
1732
      feedback_fn("Error in activating the master IP on the new master,"
1733
                  " please fix manually.")
1734

    
1735

    
1736

    
1737
class LUQueryClusterInfo(NoHooksLU):
1738
  """Query cluster configuration.
1739

1740
  """
1741
  _OP_REQP = []
1742
  REQ_MASTER = False
1743

    
1744
  def CheckPrereq(self):
1745
    """No prerequsites needed for this LU.
1746

1747
    """
1748
    pass
1749

    
1750
  def Exec(self, feedback_fn):
1751
    """Return cluster config.
1752

1753
    """
1754
    result = {
1755
      "name": self.sstore.GetClusterName(),
1756
      "software_version": constants.RELEASE_VERSION,
1757
      "protocol_version": constants.PROTOCOL_VERSION,
1758
      "config_version": constants.CONFIG_VERSION,
1759
      "os_api_version": constants.OS_API_VERSION,
1760
      "export_version": constants.EXPORT_VERSION,
1761
      "master": self.sstore.GetMasterNode(),
1762
      "architecture": (platform.architecture()[0], platform.machine()),
1763
      "hypervisor_type": self.sstore.GetHypervisorType(),
1764
      }
1765

    
1766
    return result
1767

    
1768

    
1769
class LUDumpClusterConfig(NoHooksLU):
1770
  """Return a text-representation of the cluster-config.
1771

1772
  """
1773
  _OP_REQP = []
1774

    
1775
  def CheckPrereq(self):
1776
    """No prerequisites.
1777

1778
    """
1779
    pass
1780

    
1781
  def Exec(self, feedback_fn):
1782
    """Dump a representation of the cluster config to the standard output.
1783

1784
    """
1785
    return self.cfg.DumpConfig()
1786

    
1787

    
1788
class LUActivateInstanceDisks(NoHooksLU):
1789
  """Bring up an instance's disks.
1790

1791
  """
1792
  _OP_REQP = ["instance_name"]
1793

    
1794
  def CheckPrereq(self):
1795
    """Check prerequisites.
1796

1797
    This checks that the instance is in the cluster.
1798

1799
    """
1800
    instance = self.cfg.GetInstanceInfo(
1801
      self.cfg.ExpandInstanceName(self.op.instance_name))
1802
    if instance is None:
1803
      raise errors.OpPrereqError("Instance '%s' not known" %
1804
                                 self.op.instance_name)
1805
    self.instance = instance
1806

    
1807

    
1808
  def Exec(self, feedback_fn):
1809
    """Activate the disks.
1810

1811
    """
1812
    disks_ok, disks_info = _AssembleInstanceDisks(self.instance, self.cfg)
1813
    if not disks_ok:
1814
      raise errors.OpExecError("Cannot activate block devices")
1815

    
1816
    return disks_info
1817

    
1818

    
1819
def _AssembleInstanceDisks(instance, cfg, ignore_secondaries=False):
1820
  """Prepare the block devices for an instance.
1821

1822
  This sets up the block devices on all nodes.
1823

1824
  Args:
1825
    instance: a ganeti.objects.Instance object
1826
    ignore_secondaries: if true, errors on secondary nodes won't result
1827
                        in an error return from the function
1828

1829
  Returns:
1830
    false if the operation failed
1831
    list of (host, instance_visible_name, node_visible_name) if the operation
1832
         suceeded with the mapping from node devices to instance devices
1833
  """
1834
  device_info = []
1835
  disks_ok = True
1836
  iname = instance.name
1837
  # With the two passes mechanism we try to reduce the window of
1838
  # opportunity for the race condition of switching DRBD to primary
1839
  # before handshaking occured, but we do not eliminate it
1840

    
1841
  # The proper fix would be to wait (with some limits) until the
1842
  # connection has been made and drbd transitions from WFConnection
1843
  # into any other network-connected state (Connected, SyncTarget,
1844
  # SyncSource, etc.)
1845

    
1846
  # 1st pass, assemble on all nodes in secondary mode
1847
  for inst_disk in instance.disks:
1848
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
1849
      cfg.SetDiskID(node_disk, node)
1850
      result = rpc.call_blockdev_assemble(node, node_disk, iname, False)
1851
      if not result:
1852
        logger.Error("could not prepare block device %s on node %s"
1853
                     " (is_primary=False, pass=1)" % (inst_disk.iv_name, node))
1854
        if not ignore_secondaries:
1855
          disks_ok = False
1856

    
1857
  # FIXME: race condition on drbd migration to primary
1858

    
1859
  # 2nd pass, do only the primary node
1860
  for inst_disk in instance.disks:
1861
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
1862
      if node != instance.primary_node:
1863
        continue
1864
      cfg.SetDiskID(node_disk, node)
1865
      result = rpc.call_blockdev_assemble(node, node_disk, iname, True)
1866
      if not result:
1867
        logger.Error("could not prepare block device %s on node %s"
1868
                     " (is_primary=True, pass=2)" % (inst_disk.iv_name, node))
1869
        disks_ok = False
1870
    device_info.append((instance.primary_node, inst_disk.iv_name, result))
1871

    
1872
  # leave the disks configured for the primary node
1873
  # this is a workaround that would be fixed better by
1874
  # improving the logical/physical id handling
1875
  for disk in instance.disks:
1876
    cfg.SetDiskID(disk, instance.primary_node)
1877

    
1878
  return disks_ok, device_info
1879

    
1880

    
1881
def _StartInstanceDisks(cfg, instance, force):
1882
  """Start the disks of an instance.
1883

1884
  """
1885
  disks_ok, dummy = _AssembleInstanceDisks(instance, cfg,
1886
                                           ignore_secondaries=force)
1887
  if not disks_ok:
1888
    _ShutdownInstanceDisks(instance, cfg)
1889
    if force is not None and not force:
1890
      logger.Error("If the message above refers to a secondary node,"
1891
                   " you can retry the operation using '--force'.")
1892
    raise errors.OpExecError("Disk consistency error")
1893

    
1894

    
1895
class LUDeactivateInstanceDisks(NoHooksLU):
1896
  """Shutdown an instance's disks.
1897

1898
  """
1899
  _OP_REQP = ["instance_name"]
1900

    
1901
  def CheckPrereq(self):
1902
    """Check prerequisites.
1903

1904
    This checks that the instance is in the cluster.
1905

1906
    """
1907
    instance = self.cfg.GetInstanceInfo(
1908
      self.cfg.ExpandInstanceName(self.op.instance_name))
1909
    if instance is None:
1910
      raise errors.OpPrereqError("Instance '%s' not known" %
1911
                                 self.op.instance_name)
1912
    self.instance = instance
1913

    
1914
  def Exec(self, feedback_fn):
1915
    """Deactivate the disks
1916

1917
    """
1918
    instance = self.instance
1919
    ins_l = rpc.call_instance_list([instance.primary_node])
1920
    ins_l = ins_l[instance.primary_node]
1921
    if not type(ins_l) is list:
1922
      raise errors.OpExecError("Can't contact node '%s'" %
1923
                               instance.primary_node)
1924

    
1925
    if self.instance.name in ins_l:
1926
      raise errors.OpExecError("Instance is running, can't shutdown"
1927
                               " block devices.")
1928

    
1929
    _ShutdownInstanceDisks(instance, self.cfg)
1930

    
1931

    
1932
def _ShutdownInstanceDisks(instance, cfg, ignore_primary=False):
1933
  """Shutdown block devices of an instance.
1934

1935
  This does the shutdown on all nodes of the instance.
1936

1937
  If the ignore_primary is false, errors on the primary node are
1938
  ignored.
1939

1940
  """
1941
  result = True
1942
  for disk in instance.disks:
1943
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
1944
      cfg.SetDiskID(top_disk, node)
1945
      if not rpc.call_blockdev_shutdown(node, top_disk):
1946
        logger.Error("could not shutdown block device %s on node %s" %
1947
                     (disk.iv_name, node))
1948
        if not ignore_primary or node != instance.primary_node:
1949
          result = False
1950
  return result
1951

    
1952

    
1953
def _CheckNodeFreeMemory(cfg, node, reason, requested):
1954
  """Checks if a node has enough free memory.
1955

1956
  This function check if a given node has the needed amount of free
1957
  memory. In case the node has less memory or we cannot get the
1958
  information from the node, this function raise an OpPrereqError
1959
  exception.
1960

1961
  Args:
1962
    - cfg: a ConfigWriter instance
1963
    - node: the node name
1964
    - reason: string to use in the error message
1965
    - requested: the amount of memory in MiB
1966

1967
  """
1968
  nodeinfo = rpc.call_node_info([node], cfg.GetVGName())
1969
  if not nodeinfo or not isinstance(nodeinfo, dict):
1970
    raise errors.OpPrereqError("Could not contact node %s for resource"
1971
                             " information" % (node,))
1972

    
1973
  free_mem = nodeinfo[node].get('memory_free')
1974
  if not isinstance(free_mem, int):
1975
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
1976
                             " was '%s'" % (node, free_mem))
1977
  if requested > free_mem:
1978
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
1979
                             " needed %s MiB, available %s MiB" %
1980
                             (node, reason, requested, free_mem))
1981

    
1982

    
1983
class LUStartupInstance(LogicalUnit):
1984
  """Starts an instance.
1985

1986
  """
1987
  HPATH = "instance-start"
1988
  HTYPE = constants.HTYPE_INSTANCE
1989
  _OP_REQP = ["instance_name", "force"]
1990

    
1991
  def BuildHooksEnv(self):
1992
    """Build hooks env.
1993

1994
    This runs on master, primary and secondary nodes of the instance.
1995

1996
    """
1997
    env = {
1998
      "FORCE": self.op.force,
1999
      }
2000
    env.update(_BuildInstanceHookEnvByObject(self.instance))
2001
    nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2002
          list(self.instance.secondary_nodes))
2003
    return env, nl, nl
2004

    
2005
  def CheckPrereq(self):
2006
    """Check prerequisites.
2007

2008
    This checks that the instance is in the cluster.
2009

2010
    """
2011
    instance = self.cfg.GetInstanceInfo(
2012
      self.cfg.ExpandInstanceName(self.op.instance_name))
2013
    if instance is None:
2014
      raise errors.OpPrereqError("Instance '%s' not known" %
2015
                                 self.op.instance_name)
2016

    
2017
    # check bridges existance
2018
    _CheckInstanceBridgesExist(instance)
2019

    
2020
    _CheckNodeFreeMemory(self.cfg, instance.primary_node,
2021
                         "starting instance %s" % instance.name,
2022
                         instance.memory)
2023

    
2024
    self.instance = instance
2025
    self.op.instance_name = instance.name
2026

    
2027
  def Exec(self, feedback_fn):
2028
    """Start the instance.
2029

2030
    """
2031
    instance = self.instance
2032
    force = self.op.force
2033
    extra_args = getattr(self.op, "extra_args", "")
2034

    
2035
    self.cfg.MarkInstanceUp(instance.name)
2036

    
2037
    node_current = instance.primary_node
2038

    
2039
    _StartInstanceDisks(self.cfg, instance, force)
2040

    
2041
    if not rpc.call_instance_start(node_current, instance, extra_args):
2042
      _ShutdownInstanceDisks(instance, self.cfg)
2043
      raise errors.OpExecError("Could not start instance")
2044

    
2045

    
2046
class LURebootInstance(LogicalUnit):
2047
  """Reboot an instance.
2048

2049
  """
2050
  HPATH = "instance-reboot"
2051
  HTYPE = constants.HTYPE_INSTANCE
2052
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
2053

    
2054
  def BuildHooksEnv(self):
2055
    """Build hooks env.
2056

2057
    This runs on master, primary and secondary nodes of the instance.
2058

2059
    """
2060
    env = {
2061
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
2062
      }
2063
    env.update(_BuildInstanceHookEnvByObject(self.instance))
2064
    nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2065
          list(self.instance.secondary_nodes))
2066
    return env, nl, nl
2067

    
2068
  def CheckPrereq(self):
2069
    """Check prerequisites.
2070

2071
    This checks that the instance is in the cluster.
2072

2073
    """
2074
    instance = self.cfg.GetInstanceInfo(
2075
      self.cfg.ExpandInstanceName(self.op.instance_name))
2076
    if instance is None:
2077
      raise errors.OpPrereqError("Instance '%s' not known" %
2078
                                 self.op.instance_name)
2079

    
2080
    # check bridges existance
2081
    _CheckInstanceBridgesExist(instance)
2082

    
2083
    self.instance = instance
2084
    self.op.instance_name = instance.name
2085

    
2086
  def Exec(self, feedback_fn):
2087
    """Reboot the instance.
2088

2089
    """
2090
    instance = self.instance
2091
    ignore_secondaries = self.op.ignore_secondaries
2092
    reboot_type = self.op.reboot_type
2093
    extra_args = getattr(self.op, "extra_args", "")
2094

    
2095
    node_current = instance.primary_node
2096

    
2097
    if reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
2098
                           constants.INSTANCE_REBOOT_HARD,
2099
                           constants.INSTANCE_REBOOT_FULL]:
2100
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
2101
                                  (constants.INSTANCE_REBOOT_SOFT,
2102
                                   constants.INSTANCE_REBOOT_HARD,
2103
                                   constants.INSTANCE_REBOOT_FULL))
2104

    
2105
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
2106
                       constants.INSTANCE_REBOOT_HARD]:
2107
      if not rpc.call_instance_reboot(node_current, instance,
2108
                                      reboot_type, extra_args):
2109
        raise errors.OpExecError("Could not reboot instance")
2110
    else:
2111
      if not rpc.call_instance_shutdown(node_current, instance):
2112
        raise errors.OpExecError("could not shutdown instance for full reboot")
2113
      _ShutdownInstanceDisks(instance, self.cfg)
2114
      _StartInstanceDisks(self.cfg, instance, ignore_secondaries)
2115
      if not rpc.call_instance_start(node_current, instance, extra_args):
2116
        _ShutdownInstanceDisks(instance, self.cfg)
2117
        raise errors.OpExecError("Could not start instance for full reboot")
2118

    
2119
    self.cfg.MarkInstanceUp(instance.name)
2120

    
2121

    
2122
class LUShutdownInstance(LogicalUnit):
2123
  """Shutdown an instance.
2124

2125
  """
2126
  HPATH = "instance-stop"
2127
  HTYPE = constants.HTYPE_INSTANCE
2128
  _OP_REQP = ["instance_name"]
2129

    
2130
  def BuildHooksEnv(self):
2131
    """Build hooks env.
2132

2133
    This runs on master, primary and secondary nodes of the instance.
2134

2135
    """
2136
    env = _BuildInstanceHookEnvByObject(self.instance)
2137
    nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2138
          list(self.instance.secondary_nodes))
2139
    return env, nl, nl
2140

    
2141
  def CheckPrereq(self):
2142
    """Check prerequisites.
2143

2144
    This checks that the instance is in the cluster.
2145

2146
    """
2147
    instance = self.cfg.GetInstanceInfo(
2148
      self.cfg.ExpandInstanceName(self.op.instance_name))
2149
    if instance is None:
2150
      raise errors.OpPrereqError("Instance '%s' not known" %
2151
                                 self.op.instance_name)
2152
    self.instance = instance
2153

    
2154
  def Exec(self, feedback_fn):
2155
    """Shutdown the instance.
2156

2157
    """
2158
    instance = self.instance
2159
    node_current = instance.primary_node
2160
    self.cfg.MarkInstanceDown(instance.name)
2161
    if not rpc.call_instance_shutdown(node_current, instance):
2162
      logger.Error("could not shutdown instance")
2163

    
2164
    _ShutdownInstanceDisks(instance, self.cfg)
2165

    
2166

    
2167
class LUReinstallInstance(LogicalUnit):
2168
  """Reinstall an instance.
2169

2170
  """
2171
  HPATH = "instance-reinstall"
2172
  HTYPE = constants.HTYPE_INSTANCE
2173
  _OP_REQP = ["instance_name"]
2174

    
2175
  def BuildHooksEnv(self):
2176
    """Build hooks env.
2177

2178
    This runs on master, primary and secondary nodes of the instance.
2179

2180
    """
2181
    env = _BuildInstanceHookEnvByObject(self.instance)
2182
    nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2183
          list(self.instance.secondary_nodes))
2184
    return env, nl, nl
2185

    
2186
  def CheckPrereq(self):
2187
    """Check prerequisites.
2188

2189
    This checks that the instance is in the cluster and is not running.
2190

2191
    """
2192
    instance = self.cfg.GetInstanceInfo(
2193
      self.cfg.ExpandInstanceName(self.op.instance_name))
2194
    if instance is None:
2195
      raise errors.OpPrereqError("Instance '%s' not known" %
2196
                                 self.op.instance_name)
2197
    if instance.disk_template == constants.DT_DISKLESS:
2198
      raise errors.OpPrereqError("Instance '%s' has no disks" %
2199
                                 self.op.instance_name)
2200
    if instance.status != "down":
2201
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
2202
                                 self.op.instance_name)
2203
    remote_info = rpc.call_instance_info(instance.primary_node, instance.name)
2204
    if remote_info:
2205
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
2206
                                 (self.op.instance_name,
2207
                                  instance.primary_node))
2208

    
2209
    self.op.os_type = getattr(self.op, "os_type", None)
2210
    if self.op.os_type is not None:
2211
      # OS verification
2212
      pnode = self.cfg.GetNodeInfo(
2213
        self.cfg.ExpandNodeName(instance.primary_node))
2214
      if pnode is None:
2215
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
2216
                                   self.op.pnode)
2217
      os_obj = rpc.call_os_get(pnode.name, self.op.os_type)
2218
      if not os_obj:
2219
        raise errors.OpPrereqError("OS '%s' not in supported OS list for"
2220
                                   " primary node"  % self.op.os_type)
2221

    
2222
    self.instance = instance
2223

    
2224
  def Exec(self, feedback_fn):
2225
    """Reinstall the instance.
2226

2227
    """
2228
    inst = self.instance
2229

    
2230
    if self.op.os_type is not None:
2231
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
2232
      inst.os = self.op.os_type
2233
      self.cfg.AddInstance(inst)
2234

    
2235
    _StartInstanceDisks(self.cfg, inst, None)
2236
    try:
2237
      feedback_fn("Running the instance OS create scripts...")
2238
      if not rpc.call_instance_os_add(inst.primary_node, inst, "sda", "sdb"):
2239
        raise errors.OpExecError("Could not install OS for instance %s"
2240
                                 " on node %s" %
2241
                                 (inst.name, inst.primary_node))
2242
    finally:
2243
      _ShutdownInstanceDisks(inst, self.cfg)
2244

    
2245

    
2246
class LURenameInstance(LogicalUnit):
2247
  """Rename an instance.
2248

2249
  """
2250
  HPATH = "instance-rename"
2251
  HTYPE = constants.HTYPE_INSTANCE
2252
  _OP_REQP = ["instance_name", "new_name"]
2253

    
2254
  def BuildHooksEnv(self):
2255
    """Build hooks env.
2256

2257
    This runs on master, primary and secondary nodes of the instance.
2258

2259
    """
2260
    env = _BuildInstanceHookEnvByObject(self.instance)
2261
    env["INSTANCE_NEW_NAME"] = self.op.new_name
2262
    nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
2263
          list(self.instance.secondary_nodes))
2264
    return env, nl, nl
2265

    
2266
  def CheckPrereq(self):
2267
    """Check prerequisites.
2268

2269
    This checks that the instance is in the cluster and is not running.
2270

2271
    """
2272
    instance = self.cfg.GetInstanceInfo(
2273
      self.cfg.ExpandInstanceName(self.op.instance_name))
2274
    if instance is None:
2275
      raise errors.OpPrereqError("Instance '%s' not known" %
2276
                                 self.op.instance_name)
2277
    if instance.status != "down":
2278
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
2279
                                 self.op.instance_name)
2280
    remote_info = rpc.call_instance_info(instance.primary_node, instance.name)
2281
    if remote_info:
2282
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
2283
                                 (self.op.instance_name,
2284
                                  instance.primary_node))
2285
    self.instance = instance
2286

    
2287
    # new name verification
2288
    name_info = utils.HostInfo(self.op.new_name)
2289

    
2290
    self.op.new_name = new_name = name_info.name
2291
    instance_list = self.cfg.GetInstanceList()
2292
    if new_name in instance_list:
2293
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
2294
                                 new_name)
2295

    
2296
    if not getattr(self.op, "ignore_ip", False):
2297
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
2298
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
2299
                                   (name_info.ip, new_name))
2300

    
2301

    
2302
  def Exec(self, feedback_fn):
2303
    """Reinstall the instance.
2304

2305
    """
2306
    inst = self.instance
2307
    old_name = inst.name
2308

    
2309
    if inst.disk_template == constants.DT_FILE:
2310
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
2311

    
2312
    self.cfg.RenameInstance(inst.name, self.op.new_name)
2313

    
2314
    # re-read the instance from the configuration after rename
2315
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
2316

    
2317
    if inst.disk_template == constants.DT_FILE:
2318
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
2319
      result = rpc.call_file_storage_dir_rename(inst.primary_node,
2320
                                                old_file_storage_dir,
2321
                                                new_file_storage_dir)
2322

    
2323
      if not result:
2324
        raise errors.OpExecError("Could not connect to node '%s' to rename"
2325
                                 " directory '%s' to '%s' (but the instance"
2326
                                 " has been renamed in Ganeti)" % (
2327
                                 inst.primary_node, old_file_storage_dir,
2328
                                 new_file_storage_dir))
2329

    
2330
      if not result[0]:
2331
        raise errors.OpExecError("Could not rename directory '%s' to '%s'"
2332
                                 " (but the instance has been renamed in"
2333
                                 " Ganeti)" % (old_file_storage_dir,
2334
                                               new_file_storage_dir))
2335

    
2336
    _StartInstanceDisks(self.cfg, inst, None)
2337
    try:
2338
      if not rpc.call_instance_run_rename(inst.primary_node, inst, old_name,
2339
                                          "sda", "sdb"):
2340
        msg = ("Could run OS rename script for instance %s on node %s (but the"
2341
               " instance has been renamed in Ganeti)" %
2342
               (inst.name, inst.primary_node))
2343
        logger.Error(msg)
2344
    finally:
2345
      _ShutdownInstanceDisks(inst, self.cfg)
2346

    
2347

    
2348
class LURemoveInstance(LogicalUnit):
2349
  """Remove an instance.
2350

2351
  """
2352
  HPATH = "instance-remove"
2353
  HTYPE = constants.HTYPE_INSTANCE
2354
  _OP_REQP = ["instance_name", "ignore_failures"]
2355

    
2356
  def BuildHooksEnv(self):
2357
    """Build hooks env.
2358

2359
    This runs on master, primary and secondary nodes of the instance.
2360

2361
    """
2362
    env = _BuildInstanceHookEnvByObject(self.instance)
2363
    nl = [self.sstore.GetMasterNode()]
2364
    return env, nl, nl
2365

    
2366
  def CheckPrereq(self):
2367
    """Check prerequisites.
2368

2369
    This checks that the instance is in the cluster.
2370

2371
    """
2372
    instance = self.cfg.GetInstanceInfo(
2373
      self.cfg.ExpandInstanceName(self.op.instance_name))
2374
    if instance is None:
2375
      raise errors.OpPrereqError("Instance '%s' not known" %
2376
                                 self.op.instance_name)
2377
    self.instance = instance
2378

    
2379
  def Exec(self, feedback_fn):
2380
    """Remove the instance.
2381

2382
    """
2383
    instance = self.instance
2384
    logger.Info("shutting down instance %s on node %s" %
2385
                (instance.name, instance.primary_node))
2386

    
2387
    if not rpc.call_instance_shutdown(instance.primary_node, instance):
2388
      if self.op.ignore_failures:
2389
        feedback_fn("Warning: can't shutdown instance")
2390
      else:
2391
        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
2392
                                 (instance.name, instance.primary_node))
2393

    
2394
    logger.Info("removing block devices for instance %s" % instance.name)
2395

    
2396
    if not _RemoveDisks(instance, self.cfg):
2397
      if self.op.ignore_failures:
2398
        feedback_fn("Warning: can't remove instance's disks")
2399
      else:
2400
        raise errors.OpExecError("Can't remove instance's disks")
2401

    
2402
    logger.Info("removing instance %s out of cluster config" % instance.name)
2403

    
2404
    self.cfg.RemoveInstance(instance.name)
2405

    
2406

    
2407
class LUQueryInstances(NoHooksLU):
2408
  """Logical unit for querying instances.
2409

2410
  """
2411
  _OP_REQP = ["output_fields", "names"]
2412

    
2413
  def CheckPrereq(self):
2414
    """Check prerequisites.
2415

2416
    This checks that the fields required are valid output fields.
2417

2418
    """
2419
    self.dynamic_fields = frozenset(["oper_state", "oper_ram", "status"])
2420
    _CheckOutputFields(static=["name", "os", "pnode", "snodes",
2421
                               "admin_state", "admin_ram",
2422
                               "disk_template", "ip", "mac", "bridge",
2423
                               "sda_size", "sdb_size", "vcpus", "tags"],
2424
                       dynamic=self.dynamic_fields,
2425
                       selected=self.op.output_fields)
2426

    
2427
    self.wanted = _GetWantedInstances(self, self.op.names)
2428

    
2429
  def Exec(self, feedback_fn):
2430
    """Computes the list of nodes and their attributes.
2431

2432
    """
2433
    instance_names = self.wanted
2434
    instance_list = [self.cfg.GetInstanceInfo(iname) for iname
2435
                     in instance_names]
2436

    
2437
    # begin data gathering
2438

    
2439
    nodes = frozenset([inst.primary_node for inst in instance_list])
2440

    
2441
    bad_nodes = []
2442
    if self.dynamic_fields.intersection(self.op.output_fields):
2443
      live_data = {}
2444
      node_data = rpc.call_all_instances_info(nodes)
2445
      for name in nodes:
2446
        result = node_data[name]
2447
        if result:
2448
          live_data.update(result)
2449
        elif result == False:
2450
          bad_nodes.append(name)
2451
        # else no instance is alive
2452
    else:
2453
      live_data = dict([(name, {}) for name in instance_names])
2454

    
2455
    # end data gathering
2456

    
2457
    output = []
2458
    for instance in instance_list:
2459
      iout = []
2460
      for field in self.op.output_fields:
2461
        if field == "name":
2462
          val = instance.name
2463
        elif field == "os":
2464
          val = instance.os
2465
        elif field == "pnode":
2466
          val = instance.primary_node
2467
        elif field == "snodes":
2468
          val = list(instance.secondary_nodes)
2469
        elif field == "admin_state":
2470
          val = (instance.status != "down")
2471
        elif field == "oper_state":
2472
          if instance.primary_node in bad_nodes:
2473
            val = None
2474
          else:
2475
            val = bool(live_data.get(instance.name))
2476
        elif field == "status":
2477
          if instance.primary_node in bad_nodes:
2478
            val = "ERROR_nodedown"
2479
          else:
2480
            running = bool(live_data.get(instance.name))
2481
            if running:
2482
              if instance.status != "down":
2483
                val = "running"
2484
              else:
2485
                val = "ERROR_up"
2486
            else:
2487
              if instance.status != "down":
2488
                val = "ERROR_down"
2489
              else:
2490
                val = "ADMIN_down"
2491
        elif field == "admin_ram":
2492
          val = instance.memory
2493
        elif field == "oper_ram":
2494
          if instance.primary_node in bad_nodes:
2495
            val = None
2496
          elif instance.name in live_data:
2497
            val = live_data[instance.name].get("memory", "?")
2498
          else:
2499
            val = "-"
2500
        elif field == "disk_template":
2501
          val = instance.disk_template
2502
        elif field == "ip":
2503
          val = instance.nics[0].ip
2504
        elif field == "bridge":
2505
          val = instance.nics[0].bridge
2506
        elif field == "mac":
2507
          val = instance.nics[0].mac
2508
        elif field == "sda_size" or field == "sdb_size":
2509
          disk = instance.FindDisk(field[:3])
2510
          if disk is None:
2511
            val = None
2512
          else:
2513
            val = disk.size
2514
        elif field == "vcpus":
2515
          val = instance.vcpus
2516
        elif field == "tags":
2517
          val = list(instance.GetTags())
2518
        else:
2519
          raise errors.ParameterError(field)
2520
        iout.append(val)
2521
      output.append(iout)
2522

    
2523
    return output
2524

    
2525

    
2526
class LUFailoverInstance(LogicalUnit):
2527
  """Failover an instance.
2528

2529
  """
2530
  HPATH = "instance-failover"
2531
  HTYPE = constants.HTYPE_INSTANCE
2532
  _OP_REQP = ["instance_name", "ignore_consistency"]
2533

    
2534
  def BuildHooksEnv(self):
2535
    """Build hooks env.
2536

2537
    This runs on master, primary and secondary nodes of the instance.
2538

2539
    """
2540
    env = {
2541
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
2542
      }
2543
    env.update(_BuildInstanceHookEnvByObject(self.instance))
2544
    nl = [self.sstore.GetMasterNode()] + list(self.instance.secondary_nodes)
2545
    return env, nl, nl
2546

    
2547
  def CheckPrereq(self):
2548
    """Check prerequisites.
2549

2550
    This checks that the instance is in the cluster.
2551

2552
    """
2553
    instance = self.cfg.GetInstanceInfo(
2554
      self.cfg.ExpandInstanceName(self.op.instance_name))
2555
    if instance is None:
2556
      raise errors.OpPrereqError("Instance '%s' not known" %
2557
                                 self.op.instance_name)
2558

    
2559
    if instance.disk_template not in constants.DTS_NET_MIRROR:
2560
      raise errors.OpPrereqError("Instance's disk layout is not"
2561
                                 " network mirrored, cannot failover.")
2562

    
2563
    secondary_nodes = instance.secondary_nodes
2564
    if not secondary_nodes:
2565
      raise errors.ProgrammerError("no secondary node but using "
2566
                                   "a mirrored disk template")
2567

    
2568
    target_node = secondary_nodes[0]
2569
    # check memory requirements on the secondary node
2570
    _CheckNodeFreeMemory(self.cfg, target_node, "failing over instance %s" %
2571
                         instance.name, instance.memory)
2572

    
2573
    # check bridge existance
2574
    brlist = [nic.bridge for nic in instance.nics]
2575
    if not rpc.call_bridges_exist(target_node, brlist):
2576
      raise errors.OpPrereqError("One or more target bridges %s does not"
2577
                                 " exist on destination node '%s'" %
2578
                                 (brlist, target_node))
2579

    
2580
    self.instance = instance
2581

    
2582
  def Exec(self, feedback_fn):
2583
    """Failover an instance.
2584

2585
    The failover is done by shutting it down on its present node and
2586
    starting it on the secondary.
2587

2588
    """
2589
    instance = self.instance
2590

    
2591
    source_node = instance.primary_node
2592
    target_node = instance.secondary_nodes[0]
2593

    
2594
    feedback_fn("* checking disk consistency between source and target")
2595
    for dev in instance.disks:
2596
      # for drbd, these are drbd over lvm
2597
      if not _CheckDiskConsistency(self.cfg, dev, target_node, False):
2598
        if instance.status == "up" and not self.op.ignore_consistency:
2599
          raise errors.OpExecError("Disk %s is degraded on target node,"
2600
                                   " aborting failover." % dev.iv_name)
2601

    
2602
    feedback_fn("* shutting down instance on source node")
2603
    logger.Info("Shutting down instance %s on node %s" %
2604
                (instance.name, source_node))
2605

    
2606
    if not rpc.call_instance_shutdown(source_node, instance):
2607
      if self.op.ignore_consistency:
2608
        logger.Error("Could not shutdown instance %s on node %s. Proceeding"
2609
                     " anyway. Please make sure node %s is down"  %
2610
                     (instance.name, source_node, source_node))
2611
      else:
2612
        raise errors.OpExecError("Could not shutdown instance %s on node %s" %
2613
                                 (instance.name, source_node))
2614

    
2615
    feedback_fn("* deactivating the instance's disks on source node")
2616
    if not _ShutdownInstanceDisks(instance, self.cfg, ignore_primary=True):
2617
      raise errors.OpExecError("Can't shut down the instance's disks.")
2618

    
2619
    instance.primary_node = target_node
2620
    # distribute new instance config to the other nodes
2621
    self.cfg.Update(instance)
2622

    
2623
    # Only start the instance if it's marked as up
2624
    if instance.status == "up":
2625
      feedback_fn("* activating the instance's disks on target node")
2626
      logger.Info("Starting instance %s on node %s" %
2627
                  (instance.name, target_node))
2628

    
2629
      disks_ok, dummy = _AssembleInstanceDisks(instance, self.cfg,
2630
                                               ignore_secondaries=True)
2631
      if not disks_ok:
2632
        _ShutdownInstanceDisks(instance, self.cfg)
2633
        raise errors.OpExecError("Can't activate the instance's disks")
2634

    
2635
      feedback_fn("* starting the instance on the target node")
2636
      if not rpc.call_instance_start(target_node, instance, None):
2637
        _ShutdownInstanceDisks(instance, self.cfg)
2638
        raise errors.OpExecError("Could not start instance %s on node %s." %
2639
                                 (instance.name, target_node))
2640

    
2641

    
2642
def _CreateBlockDevOnPrimary(cfg, node, instance, device, info):
2643
  """Create a tree of block devices on the primary node.
2644

2645
  This always creates all devices.
2646

2647
  """
2648
  if device.children:
2649
    for child in device.children:
2650
      if not _CreateBlockDevOnPrimary(cfg, node, instance, child, info):
2651
        return False
2652

    
2653
  cfg.SetDiskID(device, node)
2654
  new_id = rpc.call_blockdev_create(node, device, device.size,
2655
                                    instance.name, True, info)
2656
  if not new_id:
2657
    return False
2658
  if device.physical_id is None:
2659
    device.physical_id = new_id
2660
  return True
2661

    
2662

    
2663
def _CreateBlockDevOnSecondary(cfg, node, instance, device, force, info):
2664
  """Create a tree of block devices on a secondary node.
2665

2666
  If this device type has to be created on secondaries, create it and
2667
  all its children.
2668

2669
  If not, just recurse to children keeping the same 'force' value.
2670

2671
  """
2672
  if device.CreateOnSecondary():
2673
    force = True
2674
  if device.children:
2675
    for child in device.children:
2676
      if not _CreateBlockDevOnSecondary(cfg, node, instance,
2677
                                        child, force, info):
2678
        return False
2679

    
2680
  if not force:
2681
    return True
2682
  cfg.SetDiskID(device, node)
2683
  new_id = rpc.call_blockdev_create(node, device, device.size,
2684
                                    instance.name, False, info)
2685
  if not new_id:
2686
    return False
2687
  if device.physical_id is None:
2688
    device.physical_id = new_id
2689
  return True
2690

    
2691

    
2692
def _GenerateUniqueNames(cfg, exts):
2693
  """Generate a suitable LV name.
2694

2695
  This will generate a logical volume name for the given instance.
2696

2697
  """
2698
  results = []
2699
  for val in exts:
2700
    new_id = cfg.GenerateUniqueID()
2701
    results.append("%s%s" % (new_id, val))
2702
  return results
2703

    
2704

    
2705
def _GenerateDRBD8Branch(cfg, primary, secondary, size, names, iv_name):
2706
  """Generate a drbd8 device complete with its children.
2707

2708
  """
2709
  port = cfg.AllocatePort()
2710
  vgname = cfg.GetVGName()
2711
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
2712
                          logical_id=(vgname, names[0]))
2713
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
2714
                          logical_id=(vgname, names[1]))
2715
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
2716
                          logical_id = (primary, secondary, port),
2717
                          children = [dev_data, dev_meta],
2718
                          iv_name=iv_name)
2719
  return drbd_dev
2720

    
2721

    
2722
def _GenerateDiskTemplate(cfg, template_name,
2723
                          instance_name, primary_node,
2724
                          secondary_nodes, disk_sz, swap_sz,
2725
                          file_storage_dir, file_driver):
2726
  """Generate the entire disk layout for a given template type.
2727

2728
  """
2729
  #TODO: compute space requirements
2730

    
2731
  vgname = cfg.GetVGName()
2732
  if template_name == constants.DT_DISKLESS:
2733
    disks = []
2734
  elif template_name == constants.DT_PLAIN:
2735
    if len(secondary_nodes) != 0:
2736
      raise errors.ProgrammerError("Wrong template configuration")
2737

    
2738
    names = _GenerateUniqueNames(cfg, [".sda", ".sdb"])
2739
    sda_dev = objects.Disk(dev_type=constants.LD_LV, size=disk_sz,
2740
                           logical_id=(vgname, names[0]),
2741
                           iv_name = "sda")
2742
    sdb_dev = objects.Disk(dev_type=constants.LD_LV, size=swap_sz,
2743
                           logical_id=(vgname, names[1]),
2744
                           iv_name = "sdb")
2745
    disks = [sda_dev, sdb_dev]
2746
  elif template_name == constants.DT_DRBD8:
2747
    if len(secondary_nodes) != 1:
2748
      raise errors.ProgrammerError("Wrong template configuration")
2749
    remote_node = secondary_nodes[0]
2750
    names = _GenerateUniqueNames(cfg, [".sda_data", ".sda_meta",
2751
                                       ".sdb_data", ".sdb_meta"])
2752
    drbd_sda_dev = _GenerateDRBD8Branch(cfg, primary_node, remote_node,
2753
                                         disk_sz, names[0:2], "sda")
2754
    drbd_sdb_dev = _GenerateDRBD8Branch(cfg, primary_node, remote_node,
2755
                                         swap_sz, names[2:4], "sdb")
2756
    disks = [drbd_sda_dev, drbd_sdb_dev]
2757
  elif template_name == constants.DT_FILE:
2758
    if len(secondary_nodes) != 0:
2759
      raise errors.ProgrammerError("Wrong template configuration")
2760

    
2761
    file_sda_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk_sz,
2762
                                iv_name="sda", logical_id=(file_driver,
2763
                                "%s/sda" % file_storage_dir))
2764
    file_sdb_dev = objects.Disk(dev_type=constants.LD_FILE, size=swap_sz,
2765
                                iv_name="sdb", logical_id=(file_driver,
2766
                                "%s/sdb" % file_storage_dir))
2767
    disks = [file_sda_dev, file_sdb_dev]
2768
  else:
2769
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
2770
  return disks
2771

    
2772

    
2773
def _GetInstanceInfoText(instance):
2774
  """Compute that text that should be added to the disk's metadata.
2775

2776
  """
2777
  return "originstname+%s" % instance.name
2778

    
2779

    
2780
def _CreateDisks(cfg, instance):
2781
  """Create all disks for an instance.
2782

2783
  This abstracts away some work from AddInstance.
2784

2785
  Args:
2786
    instance: the instance object
2787

2788
  Returns:
2789
    True or False showing the success of the creation process
2790

2791
  """
2792
  info = _GetInstanceInfoText(instance)
2793

    
2794
  if instance.disk_template == constants.DT_FILE:
2795
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
2796
    result = rpc.call_file_storage_dir_create(instance.primary_node,
2797
                                              file_storage_dir)
2798

    
2799
    if not result:
2800
      logger.Error("Could not connect to node '%s'" % instance.primary_node)
2801
      return False
2802

    
2803
    if not result[0]:
2804
      logger.Error("failed to create directory '%s'" % file_storage_dir)
2805
      return False
2806

    
2807
  for device in instance.disks:
2808
    logger.Info("creating volume %s for instance %s" %
2809
                (device.iv_name, instance.name))
2810
    #HARDCODE
2811
    for secondary_node in instance.secondary_nodes:
2812
      if not _CreateBlockDevOnSecondary(cfg, secondary_node, instance,
2813
                                        device, False, info):
2814
        logger.Error("failed to create volume %s (%s) on secondary node %s!" %
2815
                     (device.iv_name, device, secondary_node))
2816
        return False
2817
    #HARDCODE
2818
    if not _CreateBlockDevOnPrimary(cfg, instance.primary_node,
2819
                                    instance, device, info):
2820
      logger.Error("failed to create volume %s on primary!" %
2821
                   device.iv_name)
2822
      return False
2823

    
2824
  return True
2825

    
2826

    
2827
def _RemoveDisks(instance, cfg):
2828
  """Remove all disks for an instance.
2829

2830
  This abstracts away some work from `AddInstance()` and
2831
  `RemoveInstance()`. Note that in case some of the devices couldn't
2832
  be removed, the removal will continue with the other ones (compare
2833
  with `_CreateDisks()`).
2834

2835
  Args:
2836
    instance: the instance object
2837

2838
  Returns:
2839
    True or False showing the success of the removal proces
2840

2841
  """
2842
  logger.Info("removing block devices for instance %s" % instance.name)
2843

    
2844
  result = True
2845
  for device in instance.disks:
2846
    for node, disk in device.ComputeNodeTree(instance.primary_node):
2847
      cfg.SetDiskID(disk, node)
2848
      if not rpc.call_blockdev_remove(node, disk):
2849
        logger.Error("could not remove block device %s on node %s,"
2850
                     " continuing anyway" %
2851
                     (device.iv_name, node))
2852
        result = False
2853

    
2854
  if instance.disk_template == constants.DT_FILE:
2855
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
2856
    if not rpc.call_file_storage_dir_remove(instance.primary_node,
2857
                                            file_storage_dir):
2858
      logger.Error("could not remove directory '%s'" % file_storage_dir)
2859
      result = False
2860

    
2861
  return result
2862

    
2863

    
2864
def _ComputeDiskSize(disk_template, disk_size, swap_size):
2865
  """Compute disk size requirements in the volume group
2866

2867
  This is currently hard-coded for the two-drive layout.
2868

2869
  """
2870
  # Required free disk space as a function of disk and swap space
2871
  req_size_dict = {
2872
    constants.DT_DISKLESS: None,
2873
    constants.DT_PLAIN: disk_size + swap_size,
2874
    # 256 MB are added for drbd metadata, 128MB for each drbd device
2875
    constants.DT_DRBD8: disk_size + swap_size + 256,
2876
    constants.DT_FILE: None,
2877
  }
2878

    
2879
  if disk_template not in req_size_dict:
2880
    raise errors.ProgrammerError("Disk template '%s' size requirement"
2881
                                 " is unknown" %  disk_template)
2882

    
2883
  return req_size_dict[disk_template]
2884

    
2885

    
2886
class LUCreateInstance(LogicalUnit):
2887
  """Create an instance.
2888

2889
  """
2890
  HPATH = "instance-add"
2891
  HTYPE = constants.HTYPE_INSTANCE
2892
  _OP_REQP = ["instance_name", "mem_size", "disk_size",
2893
              "disk_template", "swap_size", "mode", "start", "vcpus",
2894
              "wait_for_sync", "ip_check", "mac"]
2895

    
2896
  def _RunAllocator(self):
2897
    """Run the allocator based on input opcode.
2898

2899
    """
2900
    disks = [{"size": self.op.disk_size, "mode": "w"},
2901
             {"size": self.op.swap_size, "mode": "w"}]
2902
    nics = [{"mac": self.op.mac, "ip": getattr(self.op, "ip", None),
2903
             "bridge": self.op.bridge}]
2904
    ial = IAllocator(self.cfg, self.sstore,
2905
                     mode=constants.IALLOCATOR_MODE_ALLOC,
2906
                     name=self.op.instance_name,
2907
                     disk_template=self.op.disk_template,
2908
                     tags=[],
2909
                     os=self.op.os_type,
2910
                     vcpus=self.op.vcpus,
2911
                     mem_size=self.op.mem_size,
2912
                     disks=disks,
2913
                     nics=nics,
2914
                     )
2915

    
2916
    ial.Run(self.op.iallocator)
2917

    
2918
    if not ial.success:
2919
      raise errors.OpPrereqError("Can't compute nodes using"
2920
                                 " iallocator '%s': %s" % (self.op.iallocator,
2921
                                                           ial.info))
2922
    if len(ial.nodes) != ial.required_nodes:
2923
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
2924
                                 " of nodes (%s), required %s" %
2925
                                 (len(ial.nodes), ial.required_nodes))
2926
    self.op.pnode = ial.nodes[0]
2927
    logger.ToStdout("Selected nodes for the instance: %s" %
2928
                    (", ".join(ial.nodes),))
2929
    logger.Info("Selected nodes for instance %s via iallocator %s: %s" %
2930
                (self.op.instance_name, self.op.iallocator, ial.nodes))
2931
    if ial.required_nodes == 2:
2932
      self.op.snode = ial.nodes[1]
2933

    
2934
  def BuildHooksEnv(self):
2935
    """Build hooks env.
2936

2937
    This runs on master, primary and secondary nodes of the instance.
2938

2939
    """
2940
    env = {
2941
      "INSTANCE_DISK_TEMPLATE": self.op.disk_template,
2942
      "INSTANCE_DISK_SIZE": self.op.disk_size,
2943
      "INSTANCE_SWAP_SIZE": self.op.swap_size,
2944
      "INSTANCE_ADD_MODE": self.op.mode,
2945
      }
2946
    if self.op.mode == constants.INSTANCE_IMPORT:
2947
      env["INSTANCE_SRC_NODE"] = self.op.src_node
2948
      env["INSTANCE_SRC_PATH"] = self.op.src_path
2949
      env["INSTANCE_SRC_IMAGE"] = self.src_image
2950

    
2951
    env.update(_BuildInstanceHookEnv(name=self.op.instance_name,
2952
      primary_node=self.op.pnode,
2953
      secondary_nodes=self.secondaries,
2954
      status=self.instance_status,
2955
      os_type=self.op.os_type,
2956
      memory=self.op.mem_size,
2957
      vcpus=self.op.vcpus,
2958
      nics=[(self.inst_ip, self.op.bridge, self.op.mac)],
2959
    ))
2960

    
2961
    nl = ([self.sstore.GetMasterNode(), self.op.pnode] +
2962
          self.secondaries)
2963
    return env, nl, nl
2964

    
2965

    
2966
  def CheckPrereq(self):
2967
    """Check prerequisites.
2968

2969
    """
2970
    # set optional parameters to none if they don't exist
2971
    for attr in ["kernel_path", "initrd_path", "hvm_boot_order", "pnode",
2972
                 "iallocator", "hvm_acpi", "hvm_pae", "hvm_cdrom_image_path",
2973
                 "vnc_bind_address"]:
2974
      if not hasattr(self.op, attr):
2975
        setattr(self.op, attr, None)
2976

    
2977
    if self.op.mode not in (constants.INSTANCE_CREATE,
2978
                            constants.INSTANCE_IMPORT):
2979
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
2980
                                 self.op.mode)
2981

    
2982
    if (not self.cfg.GetVGName() and
2983
        self.op.disk_template not in constants.DTS_NOT_LVM):
2984
      raise errors.OpPrereqError("Cluster does not support lvm-based"
2985
                                 " instances")
2986

    
2987
    if self.op.mode == constants.INSTANCE_IMPORT:
2988
      src_node = getattr(self.op, "src_node", None)
2989
      src_path = getattr(self.op, "src_path", None)
2990
      if src_node is None or src_path is None:
2991
        raise errors.OpPrereqError("Importing an instance requires source"
2992
                                   " node and path options")
2993
      src_node_full = self.cfg.ExpandNodeName(src_node)
2994
      if src_node_full is None:
2995
        raise errors.OpPrereqError("Unknown source node '%s'" % src_node)
2996
      self.op.src_node = src_node = src_node_full
2997

    
2998
      if not os.path.isabs(src_path):
2999
        raise errors.OpPrereqError("The source path must be absolute")
3000

    
3001
      export_info = rpc.call_export_info(src_node, src_path)
3002

    
3003
      if not export_info:
3004
        raise errors.OpPrereqError("No export found in dir %s" % src_path)
3005

    
3006
      if not export_info.has_section(constants.INISECT_EXP):
3007
        raise errors.ProgrammerError("Corrupted export config")
3008

    
3009
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
3010
      if (int(ei_version) != constants.EXPORT_VERSION):
3011
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
3012
                                   (ei_version, constants.EXPORT_VERSION))
3013

    
3014
      if int(export_info.get(constants.INISECT_INS, 'disk_count')) > 1:
3015
        raise errors.OpPrereqError("Can't import instance with more than"
3016
                                   " one data disk")
3017

    
3018
      # FIXME: are the old os-es, disk sizes, etc. useful?
3019
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
3020
      diskimage = os.path.join(src_path, export_info.get(constants.INISECT_INS,
3021
                                                         'disk0_dump'))
3022
      self.src_image = diskimage
3023
    else: # INSTANCE_CREATE
3024
      if getattr(self.op, "os_type", None) is None:
3025
        raise errors.OpPrereqError("No guest OS specified")
3026

    
3027
    #### instance parameters check
3028

    
3029
    # disk template and mirror node verification
3030
    if self.op.disk_template not in constants.DISK_TEMPLATES:
3031
      raise errors.OpPrereqError("Invalid disk template name")
3032

    
3033
    # instance name verification
3034
    hostname1 = utils.HostInfo(self.op.instance_name)
3035

    
3036
    self.op.instance_name = instance_name = hostname1.name
3037
    instance_list = self.cfg.GetInstanceList()
3038
    if instance_name in instance_list:
3039
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3040
                                 instance_name)
3041

    
3042
    # ip validity checks
3043
    ip = getattr(self.op, "ip", None)
3044
    if ip is None or ip.lower() == "none":
3045
      inst_ip = None
3046
    elif ip.lower() == "auto":
3047
      inst_ip = hostname1.ip
3048
    else:
3049
      if not utils.IsValidIP(ip):
3050
        raise errors.OpPrereqError("given IP address '%s' doesn't look"
3051
                                   " like a valid IP" % ip)
3052
      inst_ip = ip
3053
    self.inst_ip = self.op.ip = inst_ip
3054

    
3055
    if self.op.start and not self.op.ip_check:
3056
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
3057
                                 " adding an instance in start mode")
3058

    
3059
    if self.op.ip_check:
3060
      if utils.TcpPing(hostname1.ip, constants.DEFAULT_NODED_PORT):
3061
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3062
                                   (hostname1.ip, instance_name))
3063

    
3064
    # MAC address verification
3065
    if self.op.mac != "auto":
3066
      if not utils.IsValidMac(self.op.mac.lower()):
3067
        raise errors.OpPrereqError("invalid MAC address specified: %s" %
3068
                                   self.op.mac)
3069

    
3070
    # bridge verification
3071
    bridge = getattr(self.op, "bridge", None)
3072
    if bridge is None:
3073
      self.op.bridge = self.cfg.GetDefBridge()
3074
    else:
3075
      self.op.bridge = bridge
3076

    
3077
    # boot order verification
3078
    if self.op.hvm_boot_order is not None:
3079
      if len(self.op.hvm_boot_order.strip("acdn")) != 0:
3080
        raise errors.OpPrereqError("invalid boot order specified,"
3081
                                   " must be one or more of [acdn]")
3082
    # file storage checks
3083
    if (self.op.file_driver and
3084
        not self.op.file_driver in constants.FILE_DRIVER):
3085
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
3086
                                 self.op.file_driver)
3087

    
3088
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
3089
      raise errors.OpPrereqError("File storage directory not a relative"
3090
                                 " path")
3091
    #### allocator run
3092

    
3093
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
3094
      raise errors.OpPrereqError("One and only one of iallocator and primary"
3095
                                 " node must be given")
3096

    
3097
    if self.op.iallocator is not None:
3098
      self._RunAllocator()
3099

    
3100
    #### node related checks
3101

    
3102
    # check primary node
3103
    pnode = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.pnode))
3104
    if pnode is None:
3105
      raise errors.OpPrereqError("Primary node '%s' is unknown" %
3106
                                 self.op.pnode)
3107
    self.op.pnode = pnode.name
3108
    self.pnode = pnode
3109
    self.secondaries = []
3110

    
3111
    # mirror node verification
3112
    if self.op.disk_template in constants.DTS_NET_MIRROR:
3113
      if getattr(self.op, "snode", None) is None:
3114
        raise errors.OpPrereqError("The networked disk templates need"
3115
                                   " a mirror node")
3116

    
3117
      snode_name = self.cfg.ExpandNodeName(self.op.snode)
3118
      if snode_name is None:
3119
        raise errors.OpPrereqError("Unknown secondary node '%s'" %
3120
                                   self.op.snode)
3121
      elif snode_name == pnode.name:
3122
        raise errors.OpPrereqError("The secondary node cannot be"
3123
                                   " the primary node.")
3124
      self.secondaries.append(snode_name)
3125

    
3126
    req_size = _ComputeDiskSize(self.op.disk_template,
3127
                                self.op.disk_size, self.op.swap_size)
3128

    
3129
    # Check lv size requirements
3130
    if req_size is not None:
3131
      nodenames = [pnode.name] + self.secondaries
3132
      nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName())
3133
      for node in nodenames:
3134
        info = nodeinfo.get(node, None)
3135
        if not info:
3136
          raise errors.OpPrereqError("Cannot get current information"
3137
                                     " from node '%s'" % node)
3138
        vg_free = info.get('vg_free', None)
3139
        if not isinstance(vg_free, int):
3140
          raise errors.OpPrereqError("Can't compute free disk space on"
3141
                                     " node %s" % node)
3142
        if req_size > info['vg_free']:
3143
          raise errors.OpPrereqError("Not enough disk space on target node %s."
3144
                                     " %d MB available, %d MB required" %
3145
                                     (node, info['vg_free'], req_size))
3146

    
3147
    # os verification
3148
    os_obj = rpc.call_os_get(pnode.name, self.op.os_type)
3149
    if not os_obj:
3150
      raise errors.OpPrereqError("OS '%s' not in supported os list for"
3151
                                 " primary node"  % self.op.os_type)
3152

    
3153
    if self.op.kernel_path == constants.VALUE_NONE:
3154
      raise errors.OpPrereqError("Can't set instance kernel to none")
3155

    
3156

    
3157
    # bridge check on primary node
3158
    if not rpc.call_bridges_exist(self.pnode.name, [self.op.bridge]):
3159
      raise errors.OpPrereqError("target bridge '%s' does not exist on"
3160
                                 " destination node '%s'" %
3161
                                 (self.op.bridge, pnode.name))
3162

    
3163
    # memory check on primary node
3164
    if self.op.start:
3165
      _CheckNodeFreeMemory(self.cfg, self.pnode.name,
3166
                           "creating instance %s" % self.op.instance_name,
3167
                           self.op.mem_size)
3168

    
3169
    # hvm_cdrom_image_path verification
3170
    if self.op.hvm_cdrom_image_path is not None:
3171
      if not os.path.isabs(self.op.hvm_cdrom_image_path):
3172
        raise errors.OpPrereqError("The path to the HVM CDROM image must"
3173
                                   " be an absolute path or None, not %s" %
3174
                                   self.op.hvm_cdrom_image_path)
3175
      if not os.path.isfile(self.op.hvm_cdrom_image_path):
3176
        raise errors.OpPrereqError("The HVM CDROM image must either be a"
3177
                                   " regular file or a symlink pointing to"
3178
                                   " an existing regular file, not %s" %
3179
                                   self.op.hvm_cdrom_image_path)
3180

    
3181
    # vnc_bind_address verification
3182
    if self.op.vnc_bind_address is not None:
3183
      if not utils.IsValidIP(self.op.vnc_bind_address):
3184
        raise errors.OpPrereqError("given VNC bind address '%s' doesn't look"
3185
                                   " like a valid IP address" %
3186
                                   self.op.vnc_bind_address)
3187

    
3188
    if self.op.start:
3189
      self.instance_status = 'up'
3190
    else:
3191
      self.instance_status = 'down'
3192

    
3193
  def Exec(self, feedback_fn):
3194
    """Create and add the instance to the cluster.
3195

3196
    """
3197
    instance = self.op.instance_name
3198
    pnode_name = self.pnode.name
3199

    
3200
    if self.op.mac == "auto":
3201
      mac_address = self.cfg.GenerateMAC()
3202
    else:
3203
      mac_address = self.op.mac
3204

    
3205
    nic = objects.NIC(bridge=self.op.bridge, mac=mac_address)
3206
    if self.inst_ip is not None:
3207
      nic.ip = self.inst_ip
3208

    
3209
    ht_kind = self.sstore.GetHypervisorType()
3210
    if ht_kind in constants.HTS_REQ_PORT:
3211
      network_port = self.cfg.AllocatePort()
3212
    else:
3213
      network_port = None
3214

    
3215
    if self.op.vnc_bind_address is None:
3216
      self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
3217

    
3218
    # this is needed because os.path.join does not accept None arguments
3219
    if self.op.file_storage_dir is None:
3220
      string_file_storage_dir = ""
3221
    else:
3222
      string_file_storage_dir = self.op.file_storage_dir
3223

    
3224
    # build the full file storage dir path
3225
    file_storage_dir = os.path.normpath(os.path.join(
3226
                                        self.sstore.GetFileStorageDir(),
3227
                                        string_file_storage_dir, instance))
3228

    
3229

    
3230
    disks = _GenerateDiskTemplate(self.cfg,
3231
                                  self.op.disk_template,
3232
                                  instance, pnode_name,
3233
                                  self.secondaries, self.op.disk_size,
3234
                                  self.op.swap_size,
3235
                                  file_storage_dir,
3236
                                  self.op.file_driver)
3237

    
3238
    iobj = objects.Instance(name=instance, os=self.op.os_type,
3239
                            primary_node=pnode_name,
3240
                            memory=self.op.mem_size,
3241
                            vcpus=self.op.vcpus,
3242
                            nics=[nic], disks=disks,
3243
                            disk_template=self.op.disk_template,
3244
                            status=self.instance_status,
3245
                            network_port=network_port,
3246
                            kernel_path=self.op.kernel_path,
3247
                            initrd_path=self.op.initrd_path,
3248
                            hvm_boot_order=self.op.hvm_boot_order,
3249
                            hvm_acpi=self.op.hvm_acpi,
3250
                            hvm_pae=self.op.hvm_pae,
3251
                            hvm_cdrom_image_path=self.op.hvm_cdrom_image_path,
3252
                            vnc_bind_address=self.op.vnc_bind_address,
3253
                            )
3254

    
3255
    feedback_fn("* creating instance disks...")
3256
    if not _CreateDisks(self.cfg, iobj):
3257
      _RemoveDisks(iobj, self.cfg)
3258
      raise errors.OpExecError("Device creation failed, reverting...")
3259

    
3260
    feedback_fn("adding instance %s to cluster config" % instance)
3261

    
3262
    self.cfg.AddInstance(iobj)
3263

    
3264
    if self.op.wait_for_sync:
3265
      disk_abort = not _WaitForSync(self.cfg, iobj, self.proc)
3266
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
3267
      # make sure the disks are not degraded (still sync-ing is ok)
3268
      time.sleep(15)
3269
      feedback_fn("* checking mirrors status")
3270
      disk_abort = not _WaitForSync(self.cfg, iobj, self.proc, oneshot=True)
3271
    else:
3272
      disk_abort = False
3273

    
3274
    if disk_abort:
3275
      _RemoveDisks(iobj, self.cfg)
3276
      self.cfg.RemoveInstance(iobj.name)
3277
      raise errors.OpExecError("There are some degraded disks for"
3278
                               " this instance")
3279

    
3280
    feedback_fn("creating os for instance %s on node %s" %
3281
                (instance, pnode_name))
3282

    
3283
    if iobj.disk_template != constants.DT_DISKLESS:
3284
      if self.op.mode == constants.INSTANCE_CREATE:
3285
        feedback_fn("* running the instance OS create scripts...")
3286
        if not rpc.call_instance_os_add(pnode_name, iobj, "sda", "sdb"):
3287
          raise errors.OpExecError("could not add os for instance %s"
3288
                                   " on node %s" %
3289
                                   (instance, pnode_name))
3290

    
3291
      elif self.op.mode == constants.INSTANCE_IMPORT:
3292
        feedback_fn("* running the instance OS import scripts...")
3293
        src_node = self.op.src_node
3294
        src_image = self.src_image
3295
        if not rpc.call_instance_os_import(pnode_name, iobj, "sda", "sdb",
3296
                                                src_node, src_image):
3297
          raise errors.OpExecError("Could not import os for instance"
3298
                                   " %s on node %s" %
3299
                                   (instance, pnode_name))
3300
      else:
3301
        # also checked in the prereq part
3302
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
3303
                                     % self.op.mode)
3304

    
3305
    if self.op.start:
3306
      logger.Info("starting instance %s on node %s" % (instance, pnode_name))
3307
      feedback_fn("* starting instance...")
3308
      if not rpc.call_instance_start(pnode_name, iobj, None):
3309
        raise errors.OpExecError("Could not start instance")
3310

    
3311

    
3312
class LUConnectConsole(NoHooksLU):
3313
  """Connect to an instance's console.
3314

3315
  This is somewhat special in that it returns the command line that
3316
  you need to run on the master node in order to connect to the
3317
  console.
3318

3319
  """
3320
  _OP_REQP = ["instance_name"]
3321

    
3322
  def CheckPrereq(self):
3323
    """Check prerequisites.
3324

3325
    This checks that the instance is in the cluster.
3326

3327
    """
3328
    instance = self.cfg.GetInstanceInfo(
3329
      self.cfg.ExpandInstanceName(self.op.instance_name))
3330
    if instance is None:
3331
      raise errors.OpPrereqError("Instance '%s' not known" %
3332
                                 self.op.instance_name)
3333
    self.instance = instance
3334

    
3335
  def Exec(self, feedback_fn):
3336
    """Connect to the console of an instance
3337

3338
    """
3339
    instance = self.instance
3340
    node = instance.primary_node
3341

    
3342
    node_insts = rpc.call_instance_list([node])[node]
3343
    if node_insts is False:
3344
      raise errors.OpExecError("Can't connect to node %s." % node)
3345

    
3346
    if instance.name not in node_insts:
3347
      raise errors.OpExecError("Instance %s is not running." % instance.name)
3348

    
3349
    logger.Debug("connecting to console of %s on %s" % (instance.name, node))
3350

    
3351
    hyper = hypervisor.GetHypervisor()
3352
    console_cmd = hyper.GetShellCommandForConsole(instance)
3353

    
3354
    # build ssh cmdline
3355
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
3356

    
3357

    
3358
class LUReplaceDisks(LogicalUnit):
3359
  """Replace the disks of an instance.
3360

3361
  """
3362
  HPATH = "mirrors-replace"
3363
  HTYPE = constants.HTYPE_INSTANCE
3364
  _OP_REQP = ["instance_name", "mode", "disks"]
3365

    
3366
  def _RunAllocator(self):
3367
    """Compute a new secondary node using an IAllocator.
3368

3369
    """
3370
    ial = IAllocator(self.cfg, self.sstore,
3371
                     mode=constants.IALLOCATOR_MODE_RELOC,
3372
                     name=self.op.instance_name,
3373
                     relocate_from=[self.sec_node])
3374

    
3375
    ial.Run(self.op.iallocator)
3376

    
3377
    if not ial.success:
3378
      raise errors.OpPrereqError("Can't compute nodes using"
3379
                                 " iallocator '%s': %s" % (self.op.iallocator,
3380
                                                           ial.info))
3381
    if len(ial.nodes) != ial.required_nodes:
3382
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
3383
                                 " of nodes (%s), required %s" %
3384
                                 (len(ial.nodes), ial.required_nodes))
3385
    self.op.remote_node = ial.nodes[0]
3386
    logger.ToStdout("Selected new secondary for the instance: %s" %
3387
                    self.op.remote_node)
3388

    
3389
  def BuildHooksEnv(self):
3390
    """Build hooks env.
3391

3392
    This runs on the master, the primary and all the secondaries.
3393

3394
    """
3395
    env = {
3396
      "MODE": self.op.mode,
3397
      "NEW_SECONDARY": self.op.remote_node,
3398
      "OLD_SECONDARY": self.instance.secondary_nodes[0],
3399
      }
3400
    env.update(_BuildInstanceHookEnvByObject(self.instance))
3401
    nl = [
3402
      self.sstore.GetMasterNode(),
3403
      self.instance.primary_node,
3404
      ]
3405
    if self.op.remote_node is not None:
3406
      nl.append(self.op.remote_node)
3407
    return env, nl, nl
3408

    
3409
  def CheckPrereq(self):
3410
    """Check prerequisites.
3411

3412
    This checks that the instance is in the cluster.
3413

3414
    """
3415
    if not hasattr(self.op, "remote_node"):
3416
      self.op.remote_node = None
3417

    
3418
    instance = self.cfg.GetInstanceInfo(
3419
      self.cfg.ExpandInstanceName(self.op.instance_name))
3420
    if instance is None:
3421
      raise errors.OpPrereqError("Instance '%s' not known" %
3422
                                 self.op.instance_name)
3423
    self.instance = instance
3424
    self.op.instance_name = instance.name
3425

    
3426
    if instance.disk_template not in constants.DTS_NET_MIRROR:
3427
      raise errors.OpPrereqError("Instance's disk layout is not"
3428
                                 " network mirrored.")
3429

    
3430
    if len(instance.secondary_nodes) != 1:
3431
      raise errors.OpPrereqError("The instance has a strange layout,"
3432
                                 " expected one secondary but found %d" %
3433
                                 len(instance.secondary_nodes))
3434

    
3435
    self.sec_node = instance.secondary_nodes[0]
3436

    
3437
    ia_name = getattr(self.op, "iallocator", None)
3438
    if ia_name is not None:
3439
      if self.op.remote_node is not None:
3440
        raise errors.OpPrereqError("Give either the iallocator or the new"
3441
                                   " secondary, not both")
3442
      self.op.remote_node = self._RunAllocator()
3443

    
3444
    remote_node = self.op.remote_node
3445
    if remote_node is not None:
3446
      remote_node = self.cfg.ExpandNodeName(remote_node)
3447
      if remote_node is None:
3448
        raise errors.OpPrereqError("Node '%s' not known" %
3449
                                   self.op.remote_node)
3450
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
3451
    else:
3452
      self.remote_node_info = None
3453
    if remote_node == instance.primary_node:
3454
      raise errors.OpPrereqError("The specified node is the primary node of"
3455
                                 " the instance.")
3456
    elif remote_node == self.sec_node:
3457
      if self.op.mode == constants.REPLACE_DISK_SEC:
3458
        # this is for DRBD8, where we can't execute the same mode of
3459
        # replacement as for drbd7 (no different port allocated)
3460
        raise errors.OpPrereqError("Same secondary given, cannot execute"
3461
                                   " replacement")
3462
    if instance.disk_template == constants.DT_DRBD8:
3463
      if (self.op.mode == constants.REPLACE_DISK_ALL and
3464
          remote_node is not None):
3465
        # switch to replace secondary mode
3466
        self.op.mode = constants.REPLACE_DISK_SEC
3467

    
3468
      if self.op.mode == constants.REPLACE_DISK_ALL:
3469
        raise errors.OpPrereqError("Template 'drbd' only allows primary or"
3470
                                   " secondary disk replacement, not"
3471
                                   " both at once")
3472
      elif self.op.mode == constants.REPLACE_DISK_PRI:
3473
        if remote_node is not None:
3474
          raise errors.OpPrereqError("Template 'drbd' does not allow changing"
3475
                                     " the secondary while doing a primary"
3476
                                     " node disk replacement")
3477
        self.tgt_node = instance.primary_node
3478
        self.oth_node = instance.secondary_nodes[0]
3479
      elif self.op.mode == constants.REPLACE_DISK_SEC:
3480
        self.new_node = remote_node # this can be None, in which case
3481
                                    # we don't change the secondary
3482
        self.tgt_node = instance.secondary_nodes[0]
3483
        self.oth_node = instance.primary_node
3484
      else:
3485
        raise errors.ProgrammerError("Unhandled disk replace mode")
3486

    
3487
    for name in self.op.disks:
3488
      if instance.FindDisk(name) is None:
3489
        raise errors.OpPrereqError("Disk '%s' not found for instance '%s'" %
3490
                                   (name, instance.name))
3491
    self.op.remote_node = remote_node
3492

    
3493
  def _ExecD8DiskOnly(self, feedback_fn):
3494
    """Replace a disk on the primary or secondary for dbrd8.
3495

3496
    The algorithm for replace is quite complicated:
3497
      - for each disk to be replaced:
3498
        - create new LVs on the target node with unique names
3499
        - detach old LVs from the drbd device
3500
        - rename old LVs to name_replaced.<time_t>
3501
        - rename new LVs to old LVs
3502
        - attach the new LVs (with the old names now) to the drbd device
3503
      - wait for sync across all devices
3504
      - for each modified disk:
3505
        - remove old LVs (which have the name name_replaces.<time_t>)
3506

3507
    Failures are not very well handled.
3508

3509
    """
3510
    steps_total = 6
3511
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
3512
    instance = self.instance
3513
    iv_names = {}
3514
    vgname = self.cfg.GetVGName()
3515
    # start of work
3516
    cfg = self.cfg
3517
    tgt_node = self.tgt_node
3518
    oth_node = self.oth_node
3519

    
3520
    # Step: check device activation
3521
    self.proc.LogStep(1, steps_total, "check device existence")
3522
    info("checking volume groups")
3523
    my_vg = cfg.GetVGName()
3524
    results = rpc.call_vg_list([oth_node, tgt_node])
3525
    if not results:
3526
      raise errors.OpExecError("Can't list volume groups on the nodes")
3527
    for node in oth_node, tgt_node:
3528
      res = results.get(node, False)
3529
      if not res or my_vg not in res:
3530
        raise errors.OpExecError("Volume group '%s' not found on %s" %
3531
                                 (my_vg, node))
3532
    for dev in instance.disks:
3533
      if not dev.iv_name in self.op.disks:
3534
        continue
3535
      for node in tgt_node, oth_node:
3536
        info("checking %s on %s" % (dev.iv_name, node))
3537
        cfg.SetDiskID(dev, node)
3538
        if not rpc.call_blockdev_find(node, dev):
3539
          raise errors.OpExecError("Can't find device %s on node %s" %
3540
                                   (dev.iv_name, node))
3541

    
3542
    # Step: check other node consistency
3543
    self.proc.LogStep(2, steps_total, "check peer consistency")
3544
    for dev in instance.disks:
3545
      if not dev.iv_name in self.op.disks:
3546
        continue
3547
      info("checking %s consistency on %s" % (dev.iv_name, oth_node))
3548
      if not _CheckDiskConsistency(self.cfg, dev, oth_node,
3549
                                   oth_node==instance.primary_node):
3550
        raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe"
3551
                                 " to replace disks on this node (%s)" %
3552
                                 (oth_node, tgt_node))
3553

    
3554
    # Step: create new storage
3555
    self.proc.LogStep(3, steps_total, "allocate new storage")
3556
    for dev in instance.disks:
3557
      if not dev.iv_name in self.op.disks:
3558
        continue
3559
      size = dev.size
3560
      cfg.SetDiskID(dev, tgt_node)
3561
      lv_names = [".%s_%s" % (dev.iv_name, suf) for suf in ["data", "meta"]]
3562
      names = _GenerateUniqueNames(cfg, lv_names)
3563
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=size,
3564
                             logical_id=(vgname, names[0]))
3565
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
3566
                             logical_id=(vgname, names[1]))
3567
      new_lvs = [lv_data, lv_meta]
3568
      old_lvs = dev.children
3569
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
3570
      info("creating new local storage on %s for %s" %
3571
           (tgt_node, dev.iv_name))
3572
      # since we *always* want to create this LV, we use the
3573
      # _Create...OnPrimary (which forces the creation), even if we
3574
      # are talking about the secondary node
3575
      for new_lv in new_lvs:
3576
        if not _CreateBlockDevOnPrimary(cfg, tgt_node, instance, new_lv,
3577
                                        _GetInstanceInfoText(instance)):
3578
          raise errors.OpExecError("Failed to create new LV named '%s' on"
3579
                                   " node '%s'" %
3580
                                   (new_lv.logical_id[1], tgt_node))
3581

    
3582
    # Step: for each lv, detach+rename*2+attach
3583
    self.proc.LogStep(4, steps_total, "change drbd configuration")
3584
    for dev, old_lvs, new_lvs in iv_names.itervalues():
3585
      info("detaching %s drbd from local storage" % dev.iv_name)
3586
      if not rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs):
3587
        raise errors.OpExecError("Can't detach drbd from local storage on node"
3588
                                 " %s for device %s" % (tgt_node, dev.iv_name))
3589
      #dev.children = []
3590
      #cfg.Update(instance)
3591

    
3592
      # ok, we created the new LVs, so now we know we have the needed
3593
      # storage; as such, we proceed on the target node to rename
3594
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
3595
      # using the assumption that logical_id == physical_id (which in
3596
      # turn is the unique_id on that node)
3597

    
3598
      # FIXME(iustin): use a better name for the replaced LVs
3599
      temp_suffix = int(time.time())
3600
      ren_fn = lambda d, suff: (d.physical_id[0],
3601
                                d.physical_id[1] + "_replaced-%s" % suff)
3602
      # build the rename list based on what LVs exist on the node
3603
      rlist = []
3604
      for to_ren in old_lvs:
3605
        find_res = rpc.call_blockdev_find(tgt_node, to_ren)
3606
        if find_res is not None: # device exists
3607
          rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
3608

    
3609
      info("renaming the old LVs on the target node")
3610
      if not rpc.call_blockdev_rename(tgt_node, rlist):
3611
        raise errors.OpExecError("Can't rename old LVs on node %s" % tgt_node)
3612
      # now we rename the new LVs to the old LVs
3613
      info("renaming the new LVs on the target node")
3614
      rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
3615
      if not rpc.call_blockdev_rename(tgt_node, rlist):
3616
        raise errors.OpExecError("Can't rename new LVs on node %s" % tgt_node)
3617

    
3618
      for old, new in zip(old_lvs, new_lvs):
3619
        new.logical_id = old.logical_id
3620
        cfg.SetDiskID(new, tgt_node)
3621

    
3622
      for disk in old_lvs:
3623
        disk.logical_id = ren_fn(disk, temp_suffix)
3624
        cfg.SetDiskID(disk, tgt_node)
3625

    
3626
      # now that the new lvs have the old name, we can add them to the device
3627
      info("adding new mirror component on %s" % tgt_node)
3628
      if not rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs):
3629
        for new_lv in new_lvs:
3630
          if not rpc.call_blockdev_remove(tgt_node, new_lv):
3631
            warning("Can't rollback device %s", hint="manually cleanup unused"
3632
                    " logical volumes")
3633
        raise errors.OpExecError("Can't add local storage to drbd")
3634

    
3635
      dev.children = new_lvs
3636
      cfg.Update(instance)
3637

    
3638
    # Step: wait for sync
3639

    
3640
    # this can fail as the old devices are degraded and _WaitForSync
3641
    # does a combined result over all disks, so we don't check its
3642
    # return value
3643
    self.proc.LogStep(5, steps_total, "sync devices")
3644
    _WaitForSync(cfg, instance, self.proc, unlock=True)
3645

    
3646
    # so check manually all the devices
3647
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
3648
      cfg.SetDiskID(dev, instance.primary_node)
3649
      is_degr = rpc.call_blockdev_find(instance.primary_node, dev)[5]
3650
      if is_degr:
3651
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
3652

    
3653
    # Step: remove old storage
3654
    self.proc.LogStep(6, steps_total, "removing old storage")
3655
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
3656
      info("remove logical volumes for %s" % name)
3657
      for lv in old_lvs:
3658
        cfg.SetDiskID(lv, tgt_node)
3659
        if not rpc.call_blockdev_remove(tgt_node, lv):
3660
          warning("Can't remove old LV", hint="manually remove unused LVs")
3661
          continue
3662

    
3663
  def _ExecD8Secondary(self, feedback_fn):
3664
    """Replace the secondary node for drbd8.
3665

3666
    The algorithm for replace is quite complicated:
3667
      - for all disks of the instance:
3668
        - create new LVs on the new node with same names
3669
        - shutdown the drbd device on the old secondary
3670
        - disconnect the drbd network on the primary
3671
        - create the drbd device on the new secondary
3672
        - network attach the drbd on the primary, using an artifice:
3673
          the drbd code for Attach() will connect to the network if it
3674
          finds a device which is connected to the good local disks but
3675
          not network enabled
3676
      - wait for sync across all devices
3677
      - remove all disks from the old secondary
3678

3679
    Failures are not very well handled.
3680

3681
    """
3682
    steps_total = 6
3683
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
3684
    instance = self.instance
3685
    iv_names = {}
3686
    vgname = self.cfg.GetVGName()
3687
    # start of work
3688
    cfg = self.cfg
3689
    old_node = self.tgt_node
3690
    new_node = self.new_node
3691
    pri_node = instance.primary_node
3692

    
3693
    # Step: check device activation
3694
    self.proc.LogStep(1, steps_total, "check device existence")
3695
    info("checking volume groups")
3696
    my_vg = cfg.GetVGName()
3697
    results = rpc.call_vg_list([pri_node, new_node])
3698
    if not results:
3699
      raise errors.OpExecError("Can't list volume groups on the nodes")
3700
    for node in pri_node, new_node:
3701
      res = results.get(node, False)
3702
      if not res or my_vg not in res:
3703
        raise errors.OpExecError("Volume group '%s' not found on %s" %
3704
                                 (my_vg, node))
3705
    for dev in instance.disks:
3706
      if not dev.iv_name in self.op.disks:
3707
        continue
3708
      info("checking %s on %s" % (dev.iv_name, pri_node))
3709
      cfg.SetDiskID(dev, pri_node)
3710
      if not rpc.call_blockdev_find(pri_node, dev):
3711
        raise errors.OpExecError("Can't find device %s on node %s" %
3712
                                 (dev.iv_name, pri_node))
3713

    
3714
    # Step: check other node consistency
3715
    self.proc.LogStep(2, steps_total, "check peer consistency")
3716
    for dev in instance.disks:
3717
      if not dev.iv_name in self.op.disks:
3718
        continue
3719
      info("checking %s consistency on %s" % (dev.iv_name, pri_node))
3720
      if not _CheckDiskConsistency(self.cfg, dev, pri_node, True, ldisk=True):
3721
        raise errors.OpExecError("Primary node (%s) has degraded storage,"
3722
                                 " unsafe to replace the secondary" %
3723
                                 pri_node)
3724

    
3725
    # Step: create new storage
3726
    self.proc.LogStep(3, steps_total, "allocate new storage")
3727
    for dev in instance.disks:
3728
      size = dev.size
3729
      info("adding new local storage on %s for %s" % (new_node, dev.iv_name))
3730
      # since we *always* want to create this LV, we use the
3731
      # _Create...OnPrimary (which forces the creation), even if we
3732
      # are talking about the secondary node
3733
      for new_lv in dev.children:
3734
        if not _CreateBlockDevOnPrimary(cfg, new_node, instance, new_lv,
3735
                                        _GetInstanceInfoText(instance)):
3736
          raise errors.OpExecError("Failed to create new LV named '%s' on"
3737
                                   " node '%s'" %
3738
                                   (new_lv.logical_id[1], new_node))
3739

    
3740
      iv_names[dev.iv_name] = (dev, dev.children)
3741

    
3742
    self.proc.LogStep(4, steps_total, "changing drbd configuration")
3743
    for dev in instance.disks:
3744
      size = dev.size
3745
      info("activating a new drbd on %s for %s" % (new_node, dev.iv_name))
3746
      # create new devices on new_node
3747
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
3748
                              logical_id=(pri_node, new_node,
3749
                                          dev.logical_id[2]),
3750
                              children=dev.children)
3751
      if not _CreateBlockDevOnSecondary(cfg, new_node, instance,
3752
                                        new_drbd, False,
3753
                                      _GetInstanceInfoText(instance)):
3754
        raise errors.OpExecError("Failed to create new DRBD on"
3755
                                 " node '%s'" % new_node)
3756

    
3757
    for dev in instance.disks:
3758
      # we have new devices, shutdown the drbd on the old secondary
3759
      info("shutting down drbd for %s on old node" % dev.iv_name)
3760
      cfg.SetDiskID(dev, old_node)
3761
      if not rpc.call_blockdev_shutdown(old_node, dev):
3762
        warning("Failed to shutdown drbd for %s on old node" % dev.iv_name,
3763
                hint="Please cleanup this device manually as soon as possible")
3764

    
3765
    info("detaching primary drbds from the network (=> standalone)")
3766
    done = 0
3767
    for dev in instance.disks:
3768
      cfg.SetDiskID(dev, pri_node)
3769
      # set the physical (unique in bdev terms) id to None, meaning
3770
      # detach from network
3771
      dev.physical_id = (None,) * len(dev.physical_id)
3772
      # and 'find' the device, which will 'fix' it to match the
3773
      # standalone state
3774
      if rpc.call_blockdev_find(pri_node, dev):
3775
        done += 1
3776
      else:
3777
        warning("Failed to detach drbd %s from network, unusual case" %
3778
                dev.iv_name)
3779

    
3780
    if not done:
3781
      # no detaches succeeded (very unlikely)
3782
      raise errors.OpExecError("Can't detach at least one DRBD from old node")
3783

    
3784
    # if we managed to detach at least one, we update all the disks of
3785
    # the instance to point to the new secondary
3786
    info("updating instance configuration")
3787
    for dev in instance.disks:
3788
      dev.logical_id = (pri_node, new_node) + dev.logical_id[2:]
3789
      cfg.SetDiskID(dev, pri_node)
3790
    cfg.Update(instance)
3791

    
3792
    # and now perform the drbd attach
3793
    info("attaching primary drbds to new secondary (standalone => connected)")
3794
    failures = []
3795
    for dev in instance.disks:
3796
      info("attaching primary drbd for %s to new secondary node" % dev.iv_name)
3797
      # since the attach is smart, it's enough to 'find' the device,
3798
      # it will automatically activate the network, if the physical_id
3799
      # is correct
3800
      cfg.SetDiskID(dev, pri_node)
3801
      if not rpc.call_blockdev_find(pri_node, dev):
3802
        warning("can't attach drbd %s to new secondary!" % dev.iv_name,
3803
                "please do a gnt-instance info to see the status of disks")
3804

    
3805
    # this can fail as the old devices are degraded and _WaitForSync
3806
    # does a combined result over all disks, so we don't check its
3807
    # return value
3808
    self.proc.LogStep(5, steps_total, "sync devices")
3809
    _WaitForSync(cfg, instance, self.proc, unlock=True)
3810

    
3811
    # so check manually all the devices
3812
    for name, (dev, old_lvs) in iv_names.iteritems():
3813
      cfg.SetDiskID(dev, pri_node)
3814
      is_degr = rpc.call_blockdev_find(pri_node, dev)[5]
3815
      if is_degr:
3816
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
3817

    
3818
    self.proc.LogStep(6, steps_total, "removing old storage")
3819
    for name, (dev, old_lvs) in iv_names.iteritems():
3820
      info("remove logical volumes for %s" % name)
3821
      for lv in old_lvs:
3822
        cfg.SetDiskID(lv, old_node)
3823
        if not rpc.call_blockdev_remove(old_node, lv):
3824
          warning("Can't remove LV on old secondary",
3825
                  hint="Cleanup stale volumes by hand")
3826

    
3827
  def Exec(self, feedback_fn):
3828
    """Execute disk replacement.
3829

3830
    This dispatches the disk replacement to the appropriate handler.
3831

3832
    """
3833
    instance = self.instance
3834

    
3835
    # Activate the instance disks if we're replacing them on a down instance
3836
    if instance.status == "down":
3837
      op = opcodes.OpActivateInstanceDisks(instance_name=instance.name)
3838
      self.proc.ChainOpCode(op)
3839

    
3840
    if instance.disk_template == constants.DT_DRBD8:
3841
      if self.op.remote_node is None:
3842
        fn = self._ExecD8DiskOnly
3843
      else:
3844
        fn = self._ExecD8Secondary
3845
    else:
3846
      raise errors.ProgrammerError("Unhandled disk replacement case")
3847

    
3848
    ret = fn(feedback_fn)
3849

    
3850
    # Deactivate the instance disks if we're replacing them on a down instance
3851
    if instance.status == "down":
3852
      op = opcodes.OpDeactivateInstanceDisks(instance_name=instance.name)
3853
      self.proc.ChainOpCode(op)
3854

    
3855
    return ret
3856

    
3857

    
3858
class LUGrowDisk(LogicalUnit):
3859
  """Grow a disk of an instance.
3860

3861
  """
3862
  HPATH = "disk-grow"
3863
  HTYPE = constants.HTYPE_INSTANCE
3864
  _OP_REQP = ["instance_name", "disk", "amount"]
3865

    
3866
  def BuildHooksEnv(self):
3867
    """Build hooks env.
3868

3869
    This runs on the master, the primary and all the secondaries.
3870

3871
    """
3872
    env = {
3873
      "DISK": self.op.disk,
3874
      "AMOUNT": self.op.amount,
3875
      }
3876
    env.update(_BuildInstanceHookEnvByObject(self.instance))
3877
    nl = [
3878
      self.sstore.GetMasterNode(),
3879
      self.instance.primary_node,
3880
      ]
3881
    return env, nl, nl
3882

    
3883
  def CheckPrereq(self):
3884
    """Check prerequisites.
3885

3886
    This checks that the instance is in the cluster.
3887

3888
    """
3889
    instance = self.cfg.GetInstanceInfo(
3890
      self.cfg.ExpandInstanceName(self.op.instance_name))
3891
    if instance is None:
3892
      raise errors.OpPrereqError("Instance '%s' not known" %
3893
                                 self.op.instance_name)
3894
    self.instance = instance
3895
    self.op.instance_name = instance.name
3896

    
3897
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
3898
      raise errors.OpPrereqError("Instance's disk layout does not support"
3899
                                 " growing.")
3900

    
3901
    if instance.FindDisk(self.op.disk) is None:
3902
      raise errors.OpPrereqError("Disk '%s' not found for instance '%s'" %
3903
                                 (self.op.disk, instance.name))
3904

    
3905
    nodenames = [instance.primary_node] + list(instance.secondary_nodes)
3906
    nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName())
3907
    for node in nodenames:
3908
      info = nodeinfo.get(node, None)
3909
      if not info:
3910
        raise errors.OpPrereqError("Cannot get current information"
3911
                                   " from node '%s'" % node)
3912
      vg_free = info.get('vg_free', None)
3913
      if not isinstance(vg_free, int):
3914
        raise errors.OpPrereqError("Can't compute free disk space on"
3915
                                   " node %s" % node)
3916
      if self.op.amount > info['vg_free']:
3917
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
3918
                                   " %d MiB available, %d MiB required" %
3919
                                   (node, info['vg_free'], self.op.amount))
3920

    
3921
  def Exec(self, feedback_fn):
3922
    """Execute disk grow.
3923

3924
    """
3925
    instance = self.instance
3926
    disk = instance.FindDisk(self.op.disk)
3927
    for node in (instance.secondary_nodes + (instance.primary_node,)):
3928
      self.cfg.SetDiskID(disk, node)
3929
      result = rpc.call_blockdev_grow(node, disk, self.op.amount)
3930
      if not result or not isinstance(result, tuple) or len(result) != 2:
3931
        raise errors.OpExecError("grow request failed to node %s" % node)
3932
      elif not result[0]:
3933
        raise errors.OpExecError("grow request failed to node %s: %s" %
3934
                                 (node, result[1]))
3935
    disk.RecordGrow(self.op.amount)
3936
    self.cfg.Update(instance)
3937
    return
3938

    
3939

    
3940
class LUQueryInstanceData(NoHooksLU):
3941
  """Query runtime instance data.
3942

3943
  """
3944
  _OP_REQP = ["instances"]
3945

    
3946
  def CheckPrereq(self):
3947
    """Check prerequisites.
3948

3949
    This only checks the optional instance list against the existing names.
3950

3951
    """
3952
    if not isinstance(self.op.instances, list):
3953
      raise errors.OpPrereqError("Invalid argument type 'instances'")
3954
    if self.op.instances:
3955
      self.wanted_instances = []
3956
      names = self.op.instances
3957
      for name in names:
3958
        instance = self.cfg.GetInstanceInfo(self.cfg.ExpandInstanceName(name))
3959
        if instance is None:
3960
          raise errors.OpPrereqError("No such instance name '%s'" % name)
3961
        self.wanted_instances.append(instance)
3962
    else:
3963
      self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
3964
                               in self.cfg.GetInstanceList()]
3965
    return
3966

    
3967

    
3968
  def _ComputeDiskStatus(self, instance, snode, dev):
3969
    """Compute block device status.
3970

3971
    """
3972
    self.cfg.SetDiskID(dev, instance.primary_node)
3973
    dev_pstatus = rpc.call_blockdev_find(instance.primary_node, dev)
3974
    if dev.dev_type in constants.LDS_DRBD:
3975
      # we change the snode then (otherwise we use the one passed in)
3976
      if dev.logical_id[0] == instance.primary_node:
3977
        snode = dev.logical_id[1]
3978
      else:
3979
        snode = dev.logical_id[0]
3980

    
3981
    if snode:
3982
      self.cfg.SetDiskID(dev, snode)
3983
      dev_sstatus = rpc.call_blockdev_find(snode, dev)
3984
    else:
3985
      dev_sstatus = None
3986

    
3987
    if dev.children:
3988
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
3989
                      for child in dev.children]
3990
    else:
3991
      dev_children = []
3992

    
3993
    data = {
3994
      "iv_name": dev.iv_name,
3995
      "dev_type": dev.dev_type,
3996
      "logical_id": dev.logical_id,
3997
      "physical_id": dev.physical_id,
3998
      "pstatus": dev_pstatus,
3999
      "sstatus": dev_sstatus,
4000
      "children": dev_children,
4001
      }
4002

    
4003
    return data
4004

    
4005
  def Exec(self, feedback_fn):
4006
    """Gather and return data"""
4007
    result = {}
4008
    for instance in self.wanted_instances:
4009
      remote_info = rpc.call_instance_info(instance.primary_node,
4010
                                                instance.name)
4011
      if remote_info and "state" in remote_info:
4012
        remote_state = "up"
4013
      else:
4014
        remote_state = "down"
4015
      if instance.status == "down":
4016
        config_state = "down"
4017
      else:
4018
        config_state = "up"
4019

    
4020
      disks = [self._ComputeDiskStatus(instance, None, device)
4021
               for device in instance.disks]
4022

    
4023
      idict = {
4024
        "name": instance.name,
4025
        "config_state": config_state,
4026
        "run_state": remote_state,
4027
        "pnode": instance.primary_node,
4028
        "snodes": instance.secondary_nodes,
4029
        "os": instance.os,
4030
        "memory": instance.memory,
4031
        "nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
4032
        "disks": disks,
4033
        "vcpus": instance.vcpus,
4034
        }
4035

    
4036
      htkind = self.sstore.GetHypervisorType()
4037
      if htkind == constants.HT_XEN_PVM30:
4038
        idict["kernel_path"] = instance.kernel_path
4039
        idict["initrd_path"] = instance.initrd_path
4040

    
4041
      if htkind == constants.HT_XEN_HVM31:
4042
        idict["hvm_boot_order"] = instance.hvm_boot_order
4043
        idict["hvm_acpi"] = instance.hvm_acpi
4044
        idict["hvm_pae"] = instance.hvm_pae
4045
        idict["hvm_cdrom_image_path"] = instance.hvm_cdrom_image_path
4046

    
4047
      if htkind in constants.HTS_REQ_PORT:
4048
        idict["vnc_bind_address"] = instance.vnc_bind_address
4049
        idict["network_port"] = instance.network_port
4050

    
4051
      result[instance.name] = idict
4052

    
4053
    return result
4054

    
4055

    
4056
class LUSetInstanceParams(LogicalUnit):
4057
  """Modifies an instances's parameters.
4058

4059
  """
4060
  HPATH = "instance-modify"
4061
  HTYPE = constants.HTYPE_INSTANCE
4062
  _OP_REQP = ["instance_name"]
4063

    
4064
  def BuildHooksEnv(self):
4065
    """Build hooks env.
4066

4067
    This runs on the master, primary and secondaries.
4068

4069
    """
4070
    args = dict()
4071
    if self.mem:
4072
      args['memory'] = self.mem
4073
    if self.vcpus:
4074
      args['vcpus'] = self.vcpus
4075
    if self.do_ip or self.do_bridge or self.mac:
4076
      if self.do_ip:
4077
        ip = self.ip
4078
      else:
4079
        ip = self.instance.nics[0].ip
4080
      if self.bridge:
4081
        bridge = self.bridge
4082
      else:
4083
        bridge = self.instance.nics[0].bridge
4084
      if self.mac:
4085
        mac = self.mac
4086
      else:
4087
        mac = self.instance.nics[0].mac
4088
      args['nics'] = [(ip, bridge, mac)]
4089
    env = _BuildInstanceHookEnvByObject(self.instance, override=args)
4090
    nl = [self.sstore.GetMasterNode(),
4091
          self.instance.primary_node] + list(self.instance.secondary_nodes)
4092
    return env, nl, nl
4093

    
4094
  def CheckPrereq(self):
4095
    """Check prerequisites.
4096

4097
    This only checks the instance list against the existing names.
4098

4099
    """
4100
    self.mem = getattr(self.op, "mem", None)
4101
    self.vcpus = getattr(self.op, "vcpus", None)
4102
    self.ip = getattr(self.op, "ip", None)
4103
    self.mac = getattr(self.op, "mac", None)
4104
    self.bridge = getattr(self.op, "bridge", None)
4105
    self.kernel_path = getattr(self.op, "kernel_path", None)
4106
    self.initrd_path = getattr(self.op, "initrd_path", None)
4107
    self.hvm_boot_order = getattr(self.op, "hvm_boot_order", None)
4108
    self.hvm_acpi = getattr(self.op, "hvm_acpi", None)
4109
    self.hvm_pae = getattr(self.op, "hvm_pae", None)
4110
    self.hvm_cdrom_image_path = getattr(self.op, "hvm_cdrom_image_path", None)
4111
    self.vnc_bind_address = getattr(self.op, "vnc_bind_address", None)
4112
    all_parms = [self.mem, self.vcpus, self.ip, self.bridge, self.mac,
4113
                 self.kernel_path, self.initrd_path, self.hvm_boot_order,
4114
                 self.hvm_acpi, self.hvm_pae, self.hvm_cdrom_image_path,
4115
                 self.vnc_bind_address]
4116
    if all_parms.count(None) == len(all_parms):
4117
      raise errors.OpPrereqError("No changes submitted")
4118
    if self.mem is not None:
4119
      try:
4120
        self.mem = int(self.mem)
4121
      except ValueError, err:
4122
        raise errors.OpPrereqError("Invalid memory size: %s" % str(err))
4123
    if self.vcpus is not None:
4124
      try:
4125
        self.vcpus = int(self.vcpus)
4126
      except ValueError, err:
4127
        raise errors.OpPrereqError("Invalid vcpus number: %s" % str(err))
4128
    if self.ip is not None:
4129
      self.do_ip = True
4130
      if self.ip.lower() == "none":
4131
        self.ip = None
4132
      else:
4133
        if not utils.IsValidIP(self.ip):
4134
          raise errors.OpPrereqError("Invalid IP address '%s'." % self.ip)
4135
    else:
4136
      self.do_ip = False
4137
    self.do_bridge = (self.bridge is not None)
4138
    if self.mac is not None:
4139
      if self.cfg.IsMacInUse(self.mac):
4140
        raise errors.OpPrereqError('MAC address %s already in use in cluster' %
4141
                                   self.mac)
4142
      if not utils.IsValidMac(self.mac):
4143
        raise errors.OpPrereqError('Invalid MAC address %s' % self.mac)
4144

    
4145
    if self.kernel_path is not None:
4146
      self.do_kernel_path = True
4147
      if self.kernel_path == constants.VALUE_NONE:
4148
        raise errors.OpPrereqError("Can't set instance to no kernel")
4149

    
4150
      if self.kernel_path != constants.VALUE_DEFAULT:
4151
        if not os.path.isabs(self.kernel_path):
4152
          raise errors.OpPrereqError("The kernel path must be an absolute"
4153
                                    " filename")
4154
    else:
4155
      self.do_kernel_path = False
4156

    
4157
    if self.initrd_path is not None:
4158
      self.do_initrd_path = True
4159
      if self.initrd_path not in (constants.VALUE_NONE,
4160
                                  constants.VALUE_DEFAULT):
4161
        if not os.path.isabs(self.initrd_path):
4162
          raise errors.OpPrereqError("The initrd path must be an absolute"
4163
                                    " filename")
4164
    else:
4165
      self.do_initrd_path = False
4166

    
4167
    # boot order verification
4168
    if self.hvm_boot_order is not None:
4169
      if self.hvm_boot_order != constants.VALUE_DEFAULT:
4170
        if len(self.hvm_boot_order.strip("acdn")) != 0:
4171
          raise errors.OpPrereqError("invalid boot order specified,"
4172
                                     " must be one or more of [acdn]"
4173
                                     " or 'default'")
4174

    
4175
    # hvm_cdrom_image_path verification
4176
    if self.op.hvm_cdrom_image_path is not None:
4177
      if not os.path.isabs(self.op.hvm_cdrom_image_path):
4178
        raise errors.OpPrereqError("The path to the HVM CDROM image must"
4179
                                   " be an absolute path or None, not %s" %
4180
                                   self.op.hvm_cdrom_image_path)
4181
      if not os.path.isfile(self.op.hvm_cdrom_image_path):
4182
        raise errors.OpPrereqError("The HVM CDROM image must either be a"
4183
                                   " regular file or a symlink pointing to"
4184
                                   " an existing regular file, not %s" %
4185
                                   self.op.hvm_cdrom_image_path)
4186

    
4187
    # vnc_bind_address verification
4188
    if self.op.vnc_bind_address is not None:
4189
      if not utils.IsValidIP(self.op.vnc_bind_address):
4190
        raise errors.OpPrereqError("given VNC bind address '%s' doesn't look"
4191
                                   " like a valid IP address" %
4192
                                   self.op.vnc_bind_address)
4193

    
4194
    instance = self.cfg.GetInstanceInfo(
4195
      self.cfg.ExpandInstanceName(self.op.instance_name))
4196
    if instance is None:
4197
      raise errors.OpPrereqError("No such instance name '%s'" %
4198
                                 self.op.instance_name)
4199
    self.op.instance_name = instance.name
4200
    self.instance = instance
4201
    return
4202

    
4203
  def Exec(self, feedback_fn):
4204
    """Modifies an instance.
4205

4206
    All parameters take effect only at the next restart of the instance.
4207
    """
4208
    result = []
4209
    instance = self.instance
4210
    if self.mem:
4211
      instance.memory = self.mem
4212
      result.append(("mem", self.mem))
4213
    if self.vcpus:
4214
      instance.vcpus = self.vcpus
4215
      result.append(("vcpus",  self.vcpus))
4216
    if self.do_ip:
4217
      instance.nics[0].ip = self.ip
4218
      result.append(("ip", self.ip))
4219
    if self.bridge:
4220
      instance.nics[0].bridge = self.bridge
4221
      result.append(("bridge", self.bridge))
4222
    if self.mac:
4223
      instance.nics[0].mac = self.mac
4224
      result.append(("mac", self.mac))
4225
    if self.do_kernel_path:
4226
      instance.kernel_path = self.kernel_path
4227
      result.append(("kernel_path", self.kernel_path))
4228
    if self.do_initrd_path:
4229
      instance.initrd_path = self.initrd_path
4230
      result.append(("initrd_path", self.initrd_path))
4231
    if self.hvm_boot_order:
4232
      if self.hvm_boot_order == constants.VALUE_DEFAULT:
4233
        instance.hvm_boot_order = None
4234
      else:
4235
        instance.hvm_boot_order = self.hvm_boot_order
4236
      result.append(("hvm_boot_order", self.hvm_boot_order))
4237
    if self.hvm_acpi:
4238
      instance.hvm_acpi = self.hvm_acpi
4239
      result.append(("hvm_acpi", self.hvm_acpi))
4240
    if self.hvm_pae:
4241
      instance.hvm_pae = self.hvm_pae
4242
      result.append(("hvm_pae", self.hvm_pae))
4243
    if self.hvm_cdrom_image_path:
4244
      instance.hvm_cdrom_image_path = self.hvm_cdrom_image_path
4245
      result.append(("hvm_cdrom_image_path", self.hvm_cdrom_image_path))
4246
    if self.vnc_bind_address:
4247
      instance.vnc_bind_address = self.vnc_bind_address
4248
      result.append(("vnc_bind_address", self.vnc_bind_address))
4249

    
4250
    self.cfg.AddInstance(instance)
4251

    
4252
    return result
4253

    
4254

    
4255
class LUQueryExports(NoHooksLU):
4256
  """Query the exports list
4257

4258
  """
4259
  _OP_REQP = []
4260

    
4261
  def CheckPrereq(self):
4262
    """Check that the nodelist contains only existing nodes.
4263

4264
    """
4265
    self.nodes = _GetWantedNodes(self, getattr(self.op, "nodes", None))
4266

    
4267
  def Exec(self, feedback_fn):
4268
    """Compute the list of all the exported system images.
4269

4270
    Returns:
4271
      a dictionary with the structure node->(export-list)
4272
      where export-list is a list of the instances exported on
4273
      that node.
4274

4275
    """
4276
    return rpc.call_export_list(self.nodes)
4277

    
4278

    
4279
class LUExportInstance(LogicalUnit):
4280
  """Export an instance to an image in the cluster.
4281

4282
  """
4283
  HPATH = "instance-export"
4284
  HTYPE = constants.HTYPE_INSTANCE
4285
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
4286

    
4287
  def BuildHooksEnv(self):
4288
    """Build hooks env.
4289

4290
    This will run on the master, primary node and target node.
4291

4292
    """
4293
    env = {
4294
      "EXPORT_NODE": self.op.target_node,
4295
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
4296
      }
4297
    env.update(_BuildInstanceHookEnvByObject(self.instance))
4298
    nl = [self.sstore.GetMasterNode(), self.instance.primary_node,
4299
          self.op.target_node]
4300
    return env, nl, nl
4301

    
4302
  def CheckPrereq(self):
4303
    """Check prerequisites.
4304

4305
    This checks that the instance and node names are valid.
4306

4307
    """
4308
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
4309
    self.instance = self.cfg.GetInstanceInfo(instance_name)
4310
    if self.instance is None:
4311
      raise errors.OpPrereqError("Instance '%s' not found" %
4312
                                 self.op.instance_name)
4313

    
4314
    # node verification
4315
    dst_node_short = self.cfg.ExpandNodeName(self.op.target_node)
4316
    self.dst_node = self.cfg.GetNodeInfo(dst_node_short)
4317

    
4318
    if self.dst_node is None:
4319
      raise errors.OpPrereqError("Destination node '%s' is unknown." %
4320
                                 self.op.target_node)
4321
    self.op.target_node = self.dst_node.name
4322

    
4323
    # instance disk type verification
4324
    for disk in self.instance.disks:
4325
      if disk.dev_type == constants.LD_FILE:
4326
        raise errors.OpPrereqError("Export not supported for instances with"
4327
                                   " file-based disks")
4328

    
4329
  def Exec(self, feedback_fn):
4330
    """Export an instance to an image in the cluster.
4331

4332
    """
4333
    instance = self.instance
4334
    dst_node = self.dst_node
4335
    src_node = instance.primary_node
4336
    if self.op.shutdown:
4337
      # shutdown the instance, but not the disks
4338
      if not rpc.call_instance_shutdown(src_node, instance):
4339
         raise errors.OpExecError("Could not shutdown instance %s on node %s" %
4340
                                  (instance.name, src_node))
4341

    
4342
    vgname = self.cfg.GetVGName()
4343

    
4344
    snap_disks = []
4345

    
4346
    try:
4347
      for disk in instance.disks:
4348
        if disk.iv_name == "sda":
4349
          # new_dev_name will be a snapshot of an lvm leaf of the one we passed
4350
          new_dev_name = rpc.call_blockdev_snapshot(src_node, disk)
4351

    
4352
          if not new_dev_name:
4353
            logger.Error("could not snapshot block device %s on node %s" %
4354
                         (disk.logical_id[1], src_node))
4355
          else:
4356
            new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
4357
                                      logical_id=(vgname, new_dev_name),
4358
                                      physical_id=(vgname, new_dev_name),
4359
                                      iv_name=disk.iv_name)
4360
            snap_disks.append(new_dev)
4361

    
4362
    finally:
4363
      if self.op.shutdown and instance.status == "up":
4364
        if not rpc.call_instance_start(src_node, instance, None):
4365
          _ShutdownInstanceDisks(instance, self.cfg)
4366
          raise errors.OpExecError("Could not start instance")
4367

    
4368
    # TODO: check for size
4369

    
4370
    for dev in snap_disks:
4371
      if not rpc.call_snapshot_export(src_node, dev, dst_node.name, instance):
4372
        logger.Error("could not export block device %s from node %s to node %s"
4373
                     % (dev.logical_id[1], src_node, dst_node.name))
4374
      if not rpc.call_blockdev_remove(src_node, dev):
4375
        logger.Error("could not remove snapshot block device %s from node %s" %
4376
                     (dev.logical_id[1], src_node))
4377

    
4378
    if not rpc.call_finalize_export(dst_node.name, instance, snap_disks):
4379
      logger.Error("could not finalize export for instance %s on node %s" %
4380
                   (instance.name, dst_node.name))
4381

    
4382
    nodelist = self.cfg.GetNodeList()
4383
    nodelist.remove(dst_node.name)
4384

    
4385
    # on one-node clusters nodelist will be empty after the removal
4386
    # if we proceed the backup would be removed because OpQueryExports
4387
    # substitutes an empty list with the full cluster node list.
4388
    if nodelist:
4389
      op = opcodes.OpQueryExports(nodes=nodelist)
4390
      exportlist = self.proc.ChainOpCode(op)
4391
      for node in exportlist:
4392
        if instance.name in exportlist[node]:
4393
          if not rpc.call_export_remove(node, instance.name):
4394
            logger.Error("could not remove older export for instance %s"
4395
                         " on node %s" % (instance.name, node))
4396

    
4397

    
4398
class LURemoveExport(NoHooksLU):
4399
  """Remove exports related to the named instance.
4400

4401
  """
4402
  _OP_REQP = ["instance_name"]
4403

    
4404
  def CheckPrereq(self):
4405
    """Check prerequisites.
4406
    """
4407
    pass
4408

    
4409
  def Exec(self, feedback_fn):
4410
    """Remove any export.
4411

4412
    """
4413
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
4414
    # If the instance was not found we'll try with the name that was passed in.
4415
    # This will only work if it was an FQDN, though.
4416
    fqdn_warn = False
4417
    if not instance_name:
4418
      fqdn_warn = True
4419
      instance_name = self.op.instance_name
4420

    
4421
    op = opcodes.OpQueryExports(nodes=[])
4422
    exportlist = self.proc.ChainOpCode(op)
4423
    found = False
4424
    for node in exportlist:
4425
      if instance_name in exportlist[node]:
4426
        found = True
4427
        if not rpc.call_export_remove(node, instance_name):
4428
          logger.Error("could not remove export for instance %s"
4429
                       " on node %s" % (instance_name, node))
4430

    
4431
    if fqdn_warn and not found:
4432
      feedback_fn("Export not found. If trying to remove an export belonging"
4433
                  " to a deleted instance please use its Fully Qualified"
4434
                  " Domain Name.")
4435

    
4436

    
4437
class TagsLU(NoHooksLU):
4438
  """Generic tags LU.
4439

4440
  This is an abstract class which is the parent of all the other tags LUs.
4441

4442
  """
4443
  def CheckPrereq(self):
4444
    """Check prerequisites.
4445

4446
    """
4447
    if self.op.kind == constants.TAG_CLUSTER:
4448
      self.target = self.cfg.GetClusterInfo()
4449
    elif self.op.kind == constants.TAG_NODE:
4450
      name = self.cfg.ExpandNodeName(self.op.name)
4451
      if name is None:
4452
        raise errors.OpPrereqError("Invalid node name (%s)" %
4453
                                   (self.op.name,))
4454
      self.op.name = name
4455
      self.target = self.cfg.GetNodeInfo(name)
4456
    elif self.op.kind == constants.TAG_INSTANCE:
4457
      name = self.cfg.ExpandInstanceName(self.op.name)
4458
      if name is None:
4459
        raise errors.OpPrereqError("Invalid instance name (%s)" %
4460
                                   (self.op.name,))
4461
      self.op.name = name
4462
      self.target = self.cfg.GetInstanceInfo(name)
4463
    else:
4464
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
4465
                                 str(self.op.kind))
4466

    
4467

    
4468
class LUGetTags(TagsLU):
4469
  """Returns the tags of a given object.
4470

4471
  """
4472
  _OP_REQP = ["kind", "name"]
4473

    
4474
  def Exec(self, feedback_fn):
4475
    """Returns the tag list.
4476

4477
    """
4478
    return self.target.GetTags()
4479

    
4480

    
4481
class LUSearchTags(NoHooksLU):
4482
  """Searches the tags for a given pattern.
4483

4484
  """
4485
  _OP_REQP = ["pattern"]
4486

    
4487
  def CheckPrereq(self):
4488
    """Check prerequisites.
4489

4490
    This checks the pattern passed for validity by compiling it.
4491

4492
    """
4493
    try:
4494
      self.re = re.compile(self.op.pattern)
4495
    except re.error, err:
4496
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
4497
                                 (self.op.pattern, err))
4498

    
4499
  def Exec(self, feedback_fn):
4500
    """Returns the tag list.
4501

4502
    """
4503
    cfg = self.cfg
4504
    tgts = [("/cluster", cfg.GetClusterInfo())]
4505
    ilist = [cfg.GetInstanceInfo(name) for name in cfg.GetInstanceList()]
4506
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
4507
    nlist = [cfg.GetNodeInfo(name) for name in cfg.GetNodeList()]
4508
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
4509
    results = []
4510
    for path, target in tgts:
4511
      for tag in target.GetTags():
4512
        if self.re.search(tag):
4513
          results.append((path, tag))
4514
    return results
4515

    
4516

    
4517
class LUAddTags(TagsLU):
4518
  """Sets a tag on a given object.
4519

4520
  """
4521
  _OP_REQP = ["kind", "name", "tags"]
4522

    
4523
  def CheckPrereq(self):
4524
    """Check prerequisites.
4525

4526
    This checks the type and length of the tag name and value.
4527

4528
    """
4529
    TagsLU.CheckPrereq(self)
4530
    for tag in self.op.tags:
4531
      objects.TaggableObject.ValidateTag(tag)
4532

    
4533
  def Exec(self, feedback_fn):
4534
    """Sets the tag.
4535

4536
    """
4537
    try:
4538
      for tag in self.op.tags:
4539
        self.target.AddTag(tag)
4540
    except errors.TagError, err:
4541
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
4542
    try:
4543
      self.cfg.Update(self.target)
4544
    except errors.ConfigurationError:
4545
      raise errors.OpRetryError("There has been a modification to the"
4546
                                " config file and the operation has been"
4547
                                " aborted. Please retry.")
4548

    
4549

    
4550
class LUDelTags(TagsLU):
4551
  """Delete a list of tags from a given object.
4552

4553
  """
4554
  _OP_REQP = ["kind", "name", "tags"]
4555

    
4556
  def CheckPrereq(self):
4557
    """Check prerequisites.
4558

4559
    This checks that we have the given tag.
4560

4561
    """
4562
    TagsLU.CheckPrereq(self)
4563
    for tag in self.op.tags:
4564
      objects.TaggableObject.ValidateTag(tag)
4565
    del_tags = frozenset(self.op.tags)
4566
    cur_tags = self.target.GetTags()
4567
    if not del_tags <= cur_tags:
4568
      diff_tags = del_tags - cur_tags
4569
      diff_names = ["'%s'" % tag for tag in diff_tags]
4570
      diff_names.sort()
4571
      raise errors.OpPrereqError("Tag(s) %s not found" %
4572
                                 (",".join(diff_names)))
4573

    
4574
  def Exec(self, feedback_fn):
4575
    """Remove the tag from the object.
4576

4577
    """
4578
    for tag in self.op.tags:
4579
      self.target.RemoveTag(tag)
4580
    try:
4581
      self.cfg.Update(self.target)
4582
    except errors.ConfigurationError:
4583
      raise errors.OpRetryError("There has been a modification to the"
4584
                                " config file and the operation has been"
4585
                                " aborted. Please retry.")
4586

    
4587
class LUTestDelay(NoHooksLU):
4588
  """Sleep for a specified amount of time.
4589

4590
  This LU sleeps on the master and/or nodes for a specified amoutn of
4591
  time.
4592

4593
  """
4594
  _OP_REQP = ["duration", "on_master", "on_nodes"]
4595

    
4596
  def CheckPrereq(self):
4597
    """Check prerequisites.
4598

4599
    This checks that we have a good list of nodes and/or the duration
4600
    is valid.
4601

4602
    """
4603

    
4604
    if self.op.on_nodes:
4605
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
4606

    
4607
  def Exec(self, feedback_fn):
4608
    """Do the actual sleep.
4609

4610
    """
4611
    if self.op.on_master:
4612
      if not utils.TestDelay(self.op.duration):
4613
        raise errors.OpExecError("Error during master delay test")
4614
    if self.op.on_nodes:
4615
      result = rpc.call_test_delay(self.op.on_nodes, self.op.duration)
4616
      if not result:
4617
        raise errors.OpExecError("Complete failure from rpc call")
4618
      for node, node_result in result.items():
4619
        if not node_result:
4620
          raise errors.OpExecError("Failure during rpc call to node %s,"
4621
                                   " result: %s" % (node, node_result))
4622

    
4623

    
4624
class IAllocator(object):
4625
  """IAllocator framework.
4626

4627
  An IAllocator instance has three sets of attributes:
4628
    - cfg/sstore that are needed to query the cluster
4629
    - input data (all members of the _KEYS class attribute are required)
4630
    - four buffer attributes (in|out_data|text), that represent the
4631
      input (to the external script) in text and data structure format,
4632
      and the output from it, again in two formats
4633
    - the result variables from the script (success, info, nodes) for
4634
      easy usage
4635

4636
  """
4637
  _ALLO_KEYS = [
4638
    "mem_size", "disks", "disk_template",
4639
    "os", "tags", "nics", "vcpus",
4640
    ]
4641
  _RELO_KEYS = [
4642
    "relocate_from",
4643
    ]
4644

    
4645
  def __init__(self, cfg, sstore, mode, name, **kwargs):
4646
    self.cfg = cfg
4647
    self.sstore = sstore
4648
    # init buffer variables
4649
    self.in_text = self.out_text = self.in_data = self.out_data = None
4650
    # init all input fields so that pylint is happy
4651
    self.mode = mode
4652
    self.name = name
4653
    self.mem_size = self.disks = self.disk_template = None
4654
    self.os = self.tags = self.nics = self.vcpus = None
4655
    self.relocate_from = None
4656
    # computed fields
4657
    self.required_nodes = None
4658
    # init result fields
4659
    self.success = self.info = self.nodes = None
4660
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
4661
      keyset = self._ALLO_KEYS
4662
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
4663
      keyset = self._RELO_KEYS
4664
    else:
4665
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
4666
                                   " IAllocator" % self.mode)
4667
    for key in kwargs:
4668
      if key not in keyset:
4669
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
4670
                                     " IAllocator" % key)
4671
      setattr(self, key, kwargs[key])
4672
    for key in keyset:
4673
      if key not in kwargs:
4674
        raise errors.ProgrammerError("Missing input parameter '%s' to"
4675
                                     " IAllocator" % key)
4676
    self._BuildInputData()
4677

    
4678
  def _ComputeClusterData(self):
4679
    """Compute the generic allocator input data.
4680

4681
    This is the data that is independent of the actual operation.
4682

4683
    """
4684
    cfg = self.cfg
4685
    # cluster data
4686
    data = {
4687
      "version": 1,
4688
      "cluster_name": self.sstore.GetClusterName(),
4689
      "cluster_tags": list(cfg.GetClusterInfo().GetTags()),
4690
      "hypervisor_type": self.sstore.GetHypervisorType(),
4691
      # we don't have job IDs
4692
      }
4693

    
4694
    i_list = [cfg.GetInstanceInfo(iname) for iname in cfg.GetInstanceList()]
4695

    
4696
    # node data
4697
    node_results = {}
4698
    node_list = cfg.GetNodeList()
4699
    node_data = rpc.call_node_info(node_list, cfg.GetVGName())
4700
    for nname in node_list:
4701
      ninfo = cfg.GetNodeInfo(nname)
4702
      if nname not in node_data or not isinstance(node_data[nname], dict):
4703
        raise errors.OpExecError("Can't get data for node %s" % nname)
4704
      remote_info = node_data[nname]
4705
      for attr in ['memory_total', 'memory_free', 'memory_dom0',
4706
                   'vg_size', 'vg_free', 'cpu_total']:
4707
        if attr not in remote_info:
4708
          raise errors.OpExecError("Node '%s' didn't return attribute '%s'" %
4709
                                   (nname, attr))
4710
        try:
4711
          remote_info[attr] = int(remote_info[attr])
4712
        except ValueError, err:
4713
          raise errors.OpExecError("Node '%s' returned invalid value for '%s':"
4714
                                   " %s" % (nname, attr, str(err)))
4715
      # compute memory used by primary instances
4716
      i_p_mem = i_p_up_mem = 0
4717
      for iinfo in i_list:
4718
        if iinfo.primary_node == nname:
4719
          i_p_mem += iinfo.memory
4720
          if iinfo.status == "up":
4721
            i_p_up_mem += iinfo.memory
4722

    
4723
      # compute memory used by instances
4724
      pnr = {
4725
        "tags": list(ninfo.GetTags()),
4726
        "total_memory": remote_info['memory_total'],
4727
        "reserved_memory": remote_info['memory_dom0'],
4728
        "free_memory": remote_info['memory_free'],
4729
        "i_pri_memory": i_p_mem,
4730
        "i_pri_up_memory": i_p_up_mem,
4731
        "total_disk": remote_info['vg_size'],
4732
        "free_disk": remote_info['vg_free'],
4733
        "primary_ip": ninfo.primary_ip,
4734
        "secondary_ip": ninfo.secondary_ip,
4735
        "total_cpus": remote_info['cpu_total'],
4736
        }
4737
      node_results[nname] = pnr
4738
    data["nodes"] = node_results
4739

    
4740
    # instance data
4741
    instance_data = {}
4742
    for iinfo in i_list:
4743
      nic_data = [{"mac": n.mac, "ip": n.ip, "bridge": n.bridge}
4744
                  for n in iinfo.nics]
4745
      pir = {
4746
        "tags": list(iinfo.GetTags()),
4747
        "should_run": iinfo.status == "up",
4748
        "vcpus": iinfo.vcpus,
4749
        "memory": iinfo.memory,
4750
        "os": iinfo.os,
4751
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
4752
        "nics": nic_data,
4753
        "disks": [{"size": dsk.size, "mode": "w"} for dsk in iinfo.disks],
4754
        "disk_template": iinfo.disk_template,
4755
        }
4756
      instance_data[iinfo.name] = pir
4757

    
4758
    data["instances"] = instance_data
4759

    
4760
    self.in_data = data
4761

    
4762
  def _AddNewInstance(self):
4763
    """Add new instance data to allocator structure.
4764

4765
    This in combination with _AllocatorGetClusterData will create the
4766
    correct structure needed as input for the allocator.
4767

4768
    The checks for the completeness of the opcode must have already been
4769
    done.
4770

4771
    """
4772
    data = self.in_data
4773
    if len(self.disks) != 2:
4774
      raise errors.OpExecError("Only two-disk configurations supported")
4775

    
4776
    disk_space = _ComputeDiskSize(self.disk_template,
4777
                                  self.disks[0]["size"], self.disks[1]["size"])
4778

    
4779
    if self.disk_template in constants.DTS_NET_MIRROR:
4780
      self.required_nodes = 2
4781
    else:
4782
      self.required_nodes = 1
4783
    request = {
4784
      "type": "allocate",
4785
      "name": self.name,
4786
      "disk_template": self.disk_template,
4787
      "tags": self.tags,
4788
      "os": self.os,
4789
      "vcpus": self.vcpus,
4790
      "memory": self.mem_size,
4791
      "disks": self.disks,
4792
      "disk_space_total": disk_space,
4793
      "nics": self.nics,
4794
      "required_nodes": self.required_nodes,
4795
      }
4796
    data["request"] = request
4797

    
4798
  def _AddRelocateInstance(self):
4799
    """Add relocate instance data to allocator structure.
4800

4801
    This in combination with _IAllocatorGetClusterData will create the
4802
    correct structure needed as input for the allocator.
4803

4804
    The checks for the completeness of the opcode must have already been
4805
    done.
4806

4807
    """
4808
    instance = self.cfg.GetInstanceInfo(self.name)
4809
    if instance is None:
4810
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
4811
                                   " IAllocator" % self.name)
4812

    
4813
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4814
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
4815

    
4816
    if len(instance.secondary_nodes) != 1:
4817
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
4818

    
4819
    self.required_nodes = 1
4820

    
4821
    disk_space = _ComputeDiskSize(instance.disk_template,
4822
                                  instance.disks[0].size,
4823
                                  instance.disks[1].size)
4824

    
4825
    request = {
4826
      "type": "relocate",
4827
      "name": self.name,
4828
      "disk_space_total": disk_space,
4829
      "required_nodes": self.required_nodes,
4830
      "relocate_from": self.relocate_from,
4831
      }
4832
    self.in_data["request"] = request
4833

    
4834
  def _BuildInputData(self):
4835
    """Build input data structures.
4836

4837
    """
4838
    self._ComputeClusterData()
4839

    
4840
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
4841
      self._AddNewInstance()
4842
    else:
4843
      self._AddRelocateInstance()
4844

    
4845
    self.in_text = serializer.Dump(self.in_data)
4846

    
4847
  def Run(self, name, validate=True, call_fn=rpc.call_iallocator_runner):
4848
    """Run an instance allocator and return the results.
4849

4850
    """
4851
    data = self.in_text
4852

    
4853
    result = call_fn(self.sstore.GetMasterNode(), name, self.in_text)
4854

    
4855
    if not isinstance(result, tuple) or len(result) != 4:
4856
      raise errors.OpExecError("Invalid result from master iallocator runner")
4857

    
4858
    rcode, stdout, stderr, fail = result
4859

    
4860
    if rcode == constants.IARUN_NOTFOUND:
4861
      raise errors.OpExecError("Can't find allocator '%s'" % name)
4862
    elif rcode == constants.IARUN_FAILURE:
4863
        raise errors.OpExecError("Instance allocator call failed: %s,"
4864
                                 " output: %s" %
4865
                                 (fail, stdout+stderr))
4866
    self.out_text = stdout
4867
    if validate:
4868
      self._ValidateResult()
4869

    
4870
  def _ValidateResult(self):
4871
    """Process the allocator results.
4872

4873
    This will process and if successful save the result in
4874
    self.out_data and the other parameters.
4875

4876
    """
4877
    try:
4878
      rdict = serializer.Load(self.out_text)
4879
    except Exception, err:
4880
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
4881

    
4882
    if not isinstance(rdict, dict):
4883
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
4884

    
4885
    for key in "success", "info", "nodes":
4886
      if key not in rdict:
4887
        raise errors.OpExecError("Can't parse iallocator results:"
4888
                                 " missing key '%s'" % key)
4889
      setattr(self, key, rdict[key])
4890

    
4891
    if not isinstance(rdict["nodes"], list):
4892
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
4893
                               " is not a list")
4894
    self.out_data = rdict
4895

    
4896

    
4897
class LUTestAllocator(NoHooksLU):
4898
  """Run allocator tests.
4899

4900
  This LU runs the allocator tests
4901

4902
  """
4903
  _OP_REQP = ["direction", "mode", "name"]
4904

    
4905
  def CheckPrereq(self):
4906
    """Check prerequisites.
4907

4908
    This checks the opcode parameters depending on the director and mode test.
4909

4910
    """
4911
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
4912
      for attr in ["name", "mem_size", "disks", "disk_template",
4913
                   "os", "tags", "nics", "vcpus"]:
4914
        if not hasattr(self.op, attr):
4915
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
4916
                                     attr)
4917
      iname = self.cfg.ExpandInstanceName(self.op.name)
4918
      if iname is not None:
4919
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
4920
                                   iname)
4921
      if not isinstance(self.op.nics, list):
4922
        raise errors.OpPrereqError("Invalid parameter 'nics'")
4923
      for row in self.op.nics:
4924
        if (not isinstance(row, dict) or
4925
            "mac" not in row or
4926
            "ip" not in row or
4927
            "bridge" not in row):
4928
          raise errors.OpPrereqError("Invalid contents of the"
4929
                                     " 'nics' parameter")
4930
      if not isinstance(self.op.disks, list):
4931
        raise errors.OpPrereqError("Invalid parameter 'disks'")
4932
      if len(self.op.disks) != 2:
4933
        raise errors.OpPrereqError("Only two-disk configurations supported")
4934
      for row in self.op.disks:
4935
        if (not isinstance(row, dict) or
4936
            "size" not in row or
4937
            not isinstance(row["size"], int) or
4938
            "mode" not in row or
4939
            row["mode"] not in ['r', 'w']):
4940
          raise errors.OpPrereqError("Invalid contents of the"
4941
                                     " 'disks' parameter")
4942
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
4943
      if not hasattr(self.op, "name"):
4944
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
4945
      fname = self.cfg.ExpandInstanceName(self.op.name)
4946
      if fname is None:
4947
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
4948
                                   self.op.name)
4949
      self.op.name = fname
4950
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
4951
    else:
4952
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
4953
                                 self.op.mode)
4954

    
4955
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
4956
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
4957
        raise errors.OpPrereqError("Missing allocator name")
4958
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
4959
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
4960
                                 self.op.direction)
4961

    
4962
  def Exec(self, feedback_fn):
4963
    """Run the allocator test.
4964

4965
    """
4966
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
4967
      ial = IAllocator(self.cfg, self.sstore,
4968
                       mode=self.op.mode,
4969
                       name=self.op.name,
4970
                       mem_size=self.op.mem_size,
4971
                       disks=self.op.disks,
4972
                       disk_template=self.op.disk_template,
4973
                       os=self.op.os,
4974
                       tags=self.op.tags,
4975
                       nics=self.op.nics,
4976
                       vcpus=self.op.vcpus,
4977
                       )
4978
    else:
4979
      ial = IAllocator(self.cfg, self.sstore,
4980
                       mode=self.op.mode,
4981
                       name=self.op.name,
4982
                       relocate_from=list(self.relocate_from),
4983
                       )
4984

    
4985
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
4986
      result = ial.in_text
4987
    else:
4988
      ial.Run(self.op.allocator, validate=False)
4989
      result = ial.out_text
4990
    return result